Browse Source

Remove Duplicate Pre

pull/254/head
naibo 9 months ago
parent
commit
0ec831dbf2
6 changed files with 24 additions and 2 deletions
  1. +6
    -0
      ElectronJS/src/taskGrid/FlowChart.html
  2. +5
    -0
      ElectronJS/src/taskGrid/FlowChart_CN.html
  3. +1
    -0
      ElectronJS/src/taskGrid/logic.js
  4. +1
    -1
      ElectronJS/tasks/297.json
  5. +1
    -1
      ExecuteStage/.vscode/launch.json
  6. +10
    -0
      ExecuteStage/easyspider_executestage.py

+ 6
- 0
ElectronJS/src/taskGrid/FlowChart.html View File

@ -592,6 +592,7 @@ If the expression returns a value greater than 0 or evaluates to True, the loop
<label>Waiting time in seconds after a history record rollback: </label>
<input spellcheck=false onkeydown="inputDelete(event)" required type="number" class="form-control" v-model.number='list.nl[index.nowNodeIndex]["parameters"]["historyWait"]'></input>
<label>After executed, whether scroll down:</label>
<select v-model='nowNode["parameters"]["scrollType"]' class="form-control">
<option :value = 0>No Scrolling</option>
@ -712,6 +713,11 @@ If the expression returns a value greater than 0 or evaluates to True, the opera
<!-- <option value=0>No</option>-->
<!-- <option value=1>Yes (Only support on Windows x64 platform)</option>-->
<!-- </select>-->
<label>Remove duplicates after execution (note that this function will be executed at the end of the task, and leaving the task midway will not perform deduplication):</label>
<select id="removeDuplicate" name="removeDuplicate" class="form-control">
<option value="0">No</option>
<option value="1">Yes</option>
</select>
<label>To modify the input parameters of each operation during execution, read the following Excel (.xlsx) file. Please click the "Read Input Parameters from Excel File" button when calling the task to view the file format:</label>
<input spellcheck=false onkeydown="inputDelete(event)" id="inputExcel" name="inputExcel" class="form-control" placeholder="If left empty, input parameters will not be read from Excel. The file path is relative to the EasySpider folder, e.g., inputs/task1.xlsx"></input>
<label>Browser Emulation Type:</label>

+ 5
- 0
ElectronJS/src/taskGrid/FlowChart_CN.html View File

@ -713,6 +713,11 @@ print(emotlib.emoji()) # 使用其中的函数。
<!-- <option value = 0>否</option>-->
<!-- <option value = 1>是(只支持Windows x64系统)</option>-->
<!-- </select>-->
<label>执行完成后是否去除重复数据(注意此功能需要等到任务结束时执行,因此执行任务中途退出将无法进行去重):</label>
<select id="removeDuplicate" name="removeDuplicate" class="form-control">
<option value = 0></option>
<option value = 1></option>
</select>
<label>执行时通过读取以下Excel(.xlsx)文件来修改各个操作的输入参数,文件格式请在调用任务时点击“从Excel文件读取输入参数”按钮查看:</label>
<input spellcheck=false onkeydown="inputDelete(event)" id="inputExcel" name="inputExcel" class="form-control" placeholder="为空则不从Excel读取输入参数,文件路径相对于EasySpider文件夹,如inputs/task1.xlsx"></input>
<label>浏览器模拟类型:</label>

+ 1
- 0
ElectronJS/src/taskGrid/logic.js View File

@ -611,6 +611,7 @@ function saveService(type) {
"pauseKey": $("#pauseKey").val(),
"containJudge": containJudge,
"browser": $("#browser").val(),
"removeDuplicate": parseInt($("#removeDuplicate").val()),
"desc": serviceDescription,
"inputParameters": inputParameters,
"outputParameters": outputParameters,

+ 1
- 1
ElectronJS/tasks/297.json
File diff suppressed because it is too large
View File


+ 1
- 1
ExecuteStage/.vscode/launch.json View File

@ -12,7 +12,7 @@
"justMyCode": false,
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
"args": ["--ids", "[77]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
"args": ["--ids", "[14]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
"--read_type", "remote"]
// "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
}

+ 10
- 0
ExecuteStage/easyspider_executestage.py View File

@ -469,6 +469,15 @@ class BrowserThread(Thread):
self.print_and_log(
"Already read input parameters from Excel and overwrite the original input parameters.")
def removeDuplicateData(self):
try:
removeDuplicateData = self.service["removeDuplicate"]
except:
removeDuplicateData = 0
if removeDuplicateData == 1:
self.print_and_log("正在去除重复数据,请稍后……")
self.print_and_log("Removing duplicate data, please wait...")
def run(self):
# 挨个执行程序
for i in range(len(self.links)):
@ -490,6 +499,7 @@ class BrowserThread(Thread):
quitWaitTime = self.service["quitWaitTime"]
except:
quitWaitTime = 60
self.removeDuplicateData()
self.print_and_log(f"任务执行完毕,将在{quitWaitTime}秒后自动退出浏览器并清理临时用户目录,等待时间可在保存任务对话框中设置。")
self.print_and_log(f"The task is completed, the browser will exit automatically and the temporary user directory will be cleaned up after {quitWaitTime} seconds, the waiting time can be set in the save task dialog.")
time.sleep(quitWaitTime)

Loading…
Cancel
Save