self.print_and_log(f"The text of the exit loop element set: {node['parameters']['exitElement']} cannot be obtained, this loop will no longer check whether the text of the element has changed, and will continue to execute. To solve this problem, you can modify the element whose text does not change to other elements, or set the number of loops to a fixed number greater than 0.")
self.print_and_log(e)
exitElements=[]
# newBodyText为随机文本,保证一直执行
newBodyText=str(random.random())
ifnode["parameters"]["iframe"]:# 如果标记了iframe
iframes=self.browser.find_elements(
By.CSS_SELECTOR,"iframe",iframe=False)
@ -1200,9 +1249,15 @@ class BrowserThread(Thread):
iflen(elements)==0:
self.print_and_log("Loop element not found: ",
xpath)
self.print_and_log("找不到循环元素: ",xpath)
self.print_and_log("找不到循环元素:",xpath)
index=0
skipCount=node["parameters"]["skipCount"]
whileindex<len(elements):
ifindex<skipCount:
index+=1
self.print_and_log("跳过第"+str(index)+"个元素")
self.print_and_log("Skip the "+str(index)+"th element")
continue
try:
element=elements[index]
element_text=element.text
@ -1250,7 +1305,7 @@ class BrowserThread(Thread):
index=index+1
exceptNoSuchElementException:
self.print_and_log("Loop element not found: ",xpath)
# print("The number of rows in the Excel file exceeds 5000, too many rows will cause the speed of writing data in append mode to slow down, it is recommended to replace it with CSV file or MySQL database to store data. Reading data, please wait...")
# # existing_data = [[sheet.cell(row=i, column=j).value for j in range(1, sheet.max_column + 1)] for i in range(1, sheet.max_row + 1)]
# for i in range(1, sheet.max_row + 1):
# row_data = []
# if num_rows > 5000 and i % 500 == 0:
# print(f"正在读取第{i}/{num_rows}行的数据...")
# print(f"Reading data of row {i}/{num_rows}...")
# for j in range(1, sheet.max_column + 1):
# cell = sheet.cell(row=i, column=j).value
# if cell is None:
# cell = ""
# row_data.append(cell)
# existing_data.append(row_data)
# first = False # 如果文件存在,首行不再是标题行
# # 使用 xlsxwriter 创建新文件
# workbook = xlsxwriter.Workbook(file_name)
# worksheet = workbook.add_worksheet()
# # 写入现有数据
# for row_num, row_data in enumerate(existing_data):
print("The number of rows in the Excel file already exceeds 1000, too many rows will cause the speed of writing data in append mode to slow down, it is recommended to increase the value of the 'Save every how many data' option in the task save dialog to improve the collection speed, or replace it with CSV file or MySQL database to store data. Reading data, please wait...")
ws=wb.active
ifnum_rows>1000:
print("读取数据完成,正在追加数据...")
print("Reading data completed, appending data...")
{"id":149,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/7/2023, 6:36:49 AM","update_time":"12/20/2023, 4:03:13 AM","version":"0.6.0","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"mysql","saveName":"京东","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","removeDuplicate":1,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"List of URLs to be collected, separated by \\n for multiple lines","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_链接文本","desc":"","type":"text","exampleValue":"手机"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","exampleValue":"https://shouji.jd.com/"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div/a","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '手机')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"params":[{"nodeType":1,"contentType":0,"relative":true,"name":"参数1_链接文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"手机"}],"unique_index":"6h61epq3t9sljrq1vbg","iframe":false,"default":"","paraType":"text","beforeJS":"arguments[0].innerText = \"'\" + arguments[0].innerText + '\"'","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数2_链接地址","desc":"","relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://shouji.jd.com/"}],"unique_index":"6h61epq3t9sljrq1vbg","iframe":false,"default":"","paraType":"text","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
@ -651,7 +651,7 @@ If the expression returns a value greater than 0 or evaluates to True, the opera
</div>
<divv-else-if='TClass == 7'>
<label>Code/Script Content (<ahref="https://github.com/NaiboWang/EasySpider/wiki/Example-of-JavaScript-instruction-for-the-current-iteration-in-a-conditional-statement"target="_blank">Click here</a> for more examples): </label>
<textareaspellcheck=falseonkeydown="inputDelete(event)"class="form-control"rows="3"v-model='nowNode["parameters"]["code"]'placeholder="Enter the JS command for the current loop item. The loop item is represented by arguments[0]. If the return value is greater than 0 or true, the operations within this branch will be executed; otherwise, they will not be executed. For example: return arguments[0].innerText.length >= 5, which checks if the text length of the current loop item is greater than 5. Note that this is used in combination with element-related loop types (e.g., non-fixed element lists)."></textarea>
<textareaspellcheck=falseonkeydown="inputDelete(event)"class="form-control"rows="3"v-model='nowNode["parameters"]["code"]'placeholder="Enter the JS command for the current loop item. The loop item is represented by arguments[0]. If the return value is greater than 0 or true, the operations within this branch will be executed; otherwise, they will not be executed. For example: return arguments[0].innerText.length > 5, which checks if the text length of the current loop item is greater than 5. Note that this is used in combination with element-related loop types (e.g., non-fixed element lists)."></textarea>
<label>Maximum wait time for script execution (0 represents unlimited wait time): </label>