Procházet zdrojové kódy

V0.6.0 Pre

pull/254/head
naibo před 9 měsíci
rodič
revize
e4037e221d
32 změnil soubory, kde provedl 335 přidání a 224 odebrání
  1. +1
    -0
      ElectronJS/.gitignore
  2. +1
    -0
      ElectronJS/clean_and_release_win32.cmd
  3. +1
    -0
      ElectronJS/clean_and_release_win64.cmd
  4. +2
    -2
      ElectronJS/main.js
  5. +8
    -6
      ElectronJS/package-lock.json
  6. +1
    -1
      ElectronJS/package.json
  7. +1
    -0
      ElectronJS/package_linux64.sh
  8. +1
    -0
      ElectronJS/package_macos.sh
  9. +2
    -2
      ElectronJS/src/index.html
  10. +17
    -8
      ElectronJS/src/taskGrid/FlowChart.html
  11. +17
    -10
      ElectronJS/src/taskGrid/FlowChart_CN.html
  12. +6
    -6
      ElectronJS/src/taskGrid/invokeTask.html
  13. +3
    -1
      ElectronJS/src/taskGrid/logic.js
  14. +1
    -1
      ElectronJS/src/taskGrid/newTask.html
  15. +1
    -1
      ElectronJS/src/taskGrid/taskInfo.html
  16. +1
    -1
      ElectronJS/src/taskGrid/taskList.html
  17. +1
    -1
      ElectronJS/tasks/215.json
  18. +1
    -1
      ElectronJS/tasks/219.json
  19. +1
    -0
      ElectronJS/tasks/222.json
  20. +1
    -0
      ElectronJS/tasks/223.json
  21. +1
    -0
      ElectronJS/tasks/224.json
  22. +1
    -0
      ElectronJS/tasks/225.json
  23. +1
    -0
      ElectronJS/tasks/226.json
  24. +1
    -0
      ElectronJS/tasks/227.json
  25. +1
    -0
      ElectronJS/tasks/228.json
  26. +2
    -0
      ExecuteStage/.gitignore
  27. +3
    -3
      ExecuteStage/.vscode/launch.json
  28. +2
    -2
      ExecuteStage/Readme.md
  29. +179
    -171
      ExecuteStage/easyspider_executestage.py
  30. +57
    -0
      ExecuteStage/myCode.py
  31. +1
    -1
      ExecuteStage/requirements.txt
  32. +18
    -6
      ExecuteStage/utils.py

+ 1
- 0
ElectronJS/.gitignore Zobrazit soubor

@ -18,3 +18,4 @@ npminstall-debug.log
mysql_config.json
EasySpider_en/
EasySpider_zh/
TempUserDataFolder/

+ 1
- 0
ElectronJS/clean_and_release_win32.cmd Zobrazit soubor

@ -16,6 +16,7 @@ copy ..\ExecuteStage\myChrome.py ..\.temp_to_pub\EasySpider_windows_x32\Code
copy ..\ExecuteStage\utils.py ..\.temp_to_pub\EasySpider_windows_x32\Code
copy ..\ExecuteStage\requirements.txt ..\.temp_to_pub\EasySpider_windows_x32\Code
copy ..\ExecuteStage\Readme.md ..\.temp_to_pub\EasySpider_windows_x32\Code
copy ..\ExecuteStage\myCode.py ..\.temp_to_pub\EasySpider_windows_x32
xcopy ..\ExecuteStage\undetected_chromedriver_ES ..\.temp_to_pub\EasySpider_windows_x32\Code\undetected_chromedriver_ES /E /I /Y
xcopy ..\ExecuteStage\.vscode ..\.temp_to_pub\EasySpider_windows_x32\Code\.vscode /E /I /Y
rmdir /s /q ..\.temp_to_pub\EasySpider_windows_x32\user_data

+ 1
- 0
ElectronJS/clean_and_release_win64.cmd Zobrazit soubor

@ -16,6 +16,7 @@ copy ..\ExecuteStage\myChrome.py ..\.temp_to_pub\EasySpider_windows_x64\Code
copy ..\ExecuteStage\utils.py ..\.temp_to_pub\EasySpider_windows_x64\Code
copy ..\ExecuteStage\requirements.txt ..\.temp_to_pub\EasySpider_windows_x64\Code
copy ..\ExecuteStage\Readme.md ..\.temp_to_pub\EasySpider_windows_x64\Code
copy ..\ExecuteStage\myCode.py ..\.temp_to_pub\EasySpider_windows_x64
xcopy ..\ExecuteStage\undetected_chromedriver_ES ..\.temp_to_pub\EasySpider_windows_x64\Code\undetected_chromedriver_ES /E /I /Y
xcopy ..\ExecuteStage\.vscode ..\.temp_to_pub\EasySpider_windows_x64\Code\.vscode /E /I /Y
rmdir /s /q ..\.temp_to_pub\EasySpider_windows_x64\user_data

+ 2
- 2
ElectronJS/main.js Zobrazit soubor

@ -322,10 +322,10 @@ async function beginInvoke(msg, ws) {
let parameters = [];
console.log(msg.message)
if (msg.message.user_data_folder == null || msg.message.user_data_folder == undefined || msg.message.user_data_folder == "") {
parameters = ["--id", "[" + msg.message.id + "]", "--server_address", server_address, "--user_data", 0];
parameters = ["--ids", "[" + msg.message.id + "]", "--server_address", server_address, "--user_data", 0];
} else {
let user_data_folder_path = path.join(task_server.getDir(), msg.message.user_data_folder);
parameters = ["--id", "[" + msg.message.id + "]", "--server_address", server_address, "--user_data", 1];
parameters = ["--ids", "[" + msg.message.id + "]", "--server_address", server_address, "--user_data", 1];
config.user_data_folder = msg.message.user_data_folder;
config.absolute_user_data_folder = user_data_folder_path;
fs.writeFileSync(path.join(task_server.getDir(), "config.json"), JSON.stringify(config));

+ 8
- 6
ElectronJS/package-lock.json Zobrazit soubor

@ -16,7 +16,7 @@
"http": "^0.0.1-security",
"multer": "^1.4.5-lts.1",
"node-window-manager": "^2.2.4",
"selenium-webdriver": "^4.8.0",
"selenium-webdriver": "^4.16.0",
"ws": "^8.12.0",
"xlsx": "^0.18.5"
},
@ -4800,12 +4800,13 @@
"license": "MIT"
},
"node_modules/selenium-webdriver": {
"version": "4.10.0",
"license": "Apache-2.0",
"version": "4.16.0",
"resolved": "https://registry.npmjs.org/selenium-webdriver/-/selenium-webdriver-4.16.0.tgz",
"integrity": "sha512-IbqpRpfGE7JDGgXHJeWuCqT/tUqnLvZ14csSwt+S8o4nJo3RtQoE9VR4jB47tP/A8ArkYsh/THuMY6kyRP6kuA==",
"dependencies": {
"jszip": "^3.10.1",
"tmp": "^0.2.1",
"ws": ">=8.13.0"
"ws": ">=8.14.2"
},
"engines": {
"node": ">= 14.20.0"
@ -5600,8 +5601,9 @@
"license": "ISC"
},
"node_modules/ws": {
"version": "8.13.0",
"license": "MIT",
"version": "8.14.2",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.14.2.tgz",
"integrity": "sha512-wEBG1ftX4jcglPxgFCMJmZ2PLtSbJ2Peg6TmpJFTbe9GZYOQCDPdMYu/Tm0/bGZkw8paZnJY45J4K2PZrLYq8g==",
"engines": {
"node": ">=10.0.0"
},

+ 1
- 1
ElectronJS/package.json Zobrazit soubor

@ -38,7 +38,7 @@
"http": "^0.0.1-security",
"multer": "^1.4.5-lts.1",
"node-window-manager": "^2.2.4",
"selenium-webdriver": "^4.8.0",
"selenium-webdriver": "^4.16.0",
"ws": "^8.12.0",
"xlsx": "^0.18.5"
},

+ 1
- 0
ElectronJS/package_linux64.sh Zobrazit soubor

@ -24,6 +24,7 @@ cp ../ExecuteStage/myChrome.py ../.temp_to_pub/EasySpider_Linux_x64/Code
cp ../ExecuteStage/utils.py ../.temp_to_pub/EasySpider_Linux_x64/Code
cp ../ExecuteStage/requirements.txt ../.temp_to_pub/EasySpider_Linux_x64/Code
cp ../ExecuteStage/Readme.md ../.temp_to_pub/EasySpider_Linux_x64/Code
cp ../ExecuteStage/myCode.py ../.temp_to_pub/EasySpider_Linux_x64
cp -Rf ../ExecuteStage/undetected_chromedriver_ES ../.temp_to_pub/EasySpider_Linux_x64/Code
cp -Rf ../ExecuteStage/.vscode ../.temp_to_pub/EasySpider_Linux_x64/Code
chmod 777 ../.temp_to_pub/EasySpider_Linux_x64/easy-spider.sh

+ 1
- 0
ElectronJS/package_macos.sh Zobrazit soubor

@ -24,5 +24,6 @@ cp ../ExecuteStage/myChrome.py ../.temp_to_pub/EasySpider_MacOS_all_arch/Code
cp ../ExecuteStage/utils.py ../.temp_to_pub/EasySpider_MacOS_all_arch/Code
cp ../ExecuteStage/requirements.txt ../.temp_to_pub/EasySpider_MacOS_all_arch/Code
cp ../ExecuteStage/Readme.md ../.temp_to_pub/EasySpider_MacOS_all_arch/Code
cp ../ExecuteStage/myCode.py ../.temp_to_pub/EasySpider_MacOS_all_arch
cp -Rf ../ExecuteStage/undetected_chromedriver_ES ../.temp_to_pub/EasySpider_MacOS_all_arch/Code
cp -Rf ../ExecuteStage/.vscode ../.temp_to_pub/EasySpider_MacOS_all_arch/Code

+ 2
- 2
ElectronJS/src/index.html Zobrazit soubor

@ -1,5 +1,5 @@
<!DOCTYPE html>
<html">
<!DOCTYPE>
<html>
<head>
<script src="js/jquery-3.4.1.min.js"></script>

+ 17
- 8
ElectronJS/src/taskGrid/FlowChart.html Zobrazit soubor

@ -91,6 +91,7 @@
</div>
<div>
<label>Hint: Move the mouse to the smiley face to view the hint, double-click the option in the flowchart to try to run.</label>
<label>Option Name:</label>
<input onkeydown="inputDelete(event)" class="form-control" v-model='list.nl[index.nowNodeIndex]["title"]'></input>
</div>
@ -230,7 +231,7 @@
<label><strong>{{paras.parameters[paraIndex]["name"]}}</strong></label>
<p v-if="nowNode['isInLoop']"><input onkeydown="inputDelete(event)" type="checkbox" v-model='paras.parameters[paraIndex]["relative"]'></input>Use relative XPath</p>
<p v-if='!paras.parameters[paraIndex]["relative"]'><input onkeydown="inputDelete(event)" type="checkbox" v-model='paras.parameters[paraIndex]["iframe"]'></input>Element is inside iframe</p>
<p>XPATH (Field["FieldName"] can be used in any XPATHS): <span style="font-size: 30px!important;" title="Relative XPATH writing: start with /, e.g. the loop item XPATH is /html/body/div[1], your input is /*[@id='tab-customer'], then the final addressed xpath is: /html/body/div[1]/*[@id='tab-customer']"></span></p>
<p>XPATH (Field["FieldName"] and eval("your code") can be used in any XPATHS): <span style="font-size: 30px!important;" title="Relative XPATH writing: start with /, e.g. the loop item XPATH is /html/body/div[1], your input is /*[@id='tab-customer'], then the final addressed xpath is: /html/body/div[1]/*[@id='tab-customer']"></span></p>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='paras.parameters[paraIndex]["relativeXPath"]' placeholder="If you want to write the XPath relative to the current element in the loop, you can write as *../div[1] which matches the first div child element of the parent of the current element in the loop."></textarea>
<p><button type="button" data-toggle="modal" data-target="#myModal_XPath" @click="changeXPaths(paras.parameters[paraIndex]['allXPaths'])" class="btn btn-primary" style="margin-top: 10px">Click here to view other equivalent XPath expressions</button></p>
<p style="margin-top: 10px">
@ -389,7 +390,7 @@
<div v-if='nowNode["parameters"]["codeMode"] < 3 || nowNode["parameters"]["codeMode"] >= 5 && nowNode["parameters"]["codeMode"] <=6'>
<label>Code (Use Field["FieldName"] to input the lastest value of a field): </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["code"]' placeholder="Please input a JavaScript command or a system command. For example, document.body.innerText = '1' is an example of a JavaScript command, and python D:/test.py is an example of a system command. If you choose to execute a JavaScript script for the current iteration, you can represent the element of the current iteration using arguments[0]. For instance, arguments[0].style.color = 'blue' sets the color of the element in the current iteration to blue."></textarea>
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 200px; font-size: 15px!important; word-wrap: break-word; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='nowNode["parameters"]["codeMode"] == 5'>Please read the instructions first and then write the specific code in the input box above (not in this box).
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 200px; font-size: 15px!important; word-wrap: break-word; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='nowNode["parameters"]["codeMode"] == 5'>Please read the instructions first and then write the specific code in the input box above (not in this box). To execute a large amount of code, you can simply write "outside:myCode.py" and the program will read and execute the code within myCode.py under the EasySpider directory.
This option is an advanced feature that allows direct manipulation of the running browser using Python code. You can also customize variables in the entire execution environment and perform operations such as modifying and assigning values. Here are some examples:
1. Use `self.browser` to refer to the current browser being operated. You can directly use Selenium's API to perform operations, such as `self.browser.find_element(By.CSS_SELECTOR, "body").send_keys(Keys.END)` to scroll to the bottom.
2. Define a global variable: `self.myVar = 1`
@ -407,7 +408,7 @@ sys.path.append("D:/Python38/Lib/site-packages") # Assume emotlib library exists
import emotlib # Now you can use emotlib library
print(emotlib.emoji()) # Use one of its functions.
</pre>
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 200px; font-size: 15px!important; word-wrap: break-word; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='nowNode["parameters"]["codeMode"] == 6'>Please read the instructions first and then write the specific code in the input box above (not in this box).
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 200px; font-size: 15px!important; word-wrap: break-word; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='nowNode["parameters"]["codeMode"] == 6'>Please read the instructions first and then write the specific code in the input box above (not in this box). To execute a large amount of code, you can simply write "outside:myCode.py" and the program will read and execute the code within myCode.py under the EasySpider directory.
This option is an advanced feature that allows directly returning the expression value of Python code, and in other places, use Field["FieldName"] to represent the return value of this operation.. Here are some examples:
1. Return relevant values of the current browser object. Use `self.browser` to refer to the current browser being operated. You can directly use Selenium's API to perform operations, such as `self.browser.find_element(By.CSS_SELECTOR, "body").text` to return the text on the current page.
2. Return the value of a custom global variable: `self.myVar`
@ -529,7 +530,7 @@ Please note that this feature does not support assigning values to variables. In
<div v-else-if='parseInt(loopType) < 8'>
<label>Code (Use Field["FieldName"] to input the lastest value of a field):</label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="3" v-model='nowNode["parameters"]["code"]' placeholder="Continue the loop if the command return value is greater than 0 or evaluates to true; otherwise, stop the loop. For example, return document.body.scrollWidth > 1000 is an example of a JavaScript command return value, and python D:/test.py is an example of a system command return value."></textarea>
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 220px; font-size: 15px!important; word-wrap: break-word; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='parseInt(loopType) == 7'>Please read the instructions first and then write the specific code in the input box above (not in this box).
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 220px; font-size: 15px!important; word-wrap: break-word; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='parseInt(loopType) == 7'>Please read the instructions first and then write the specific code in the input box above (not in this box). To execute a large amount of code, you can simply write "outside:myCode.py" and the program will read and execute the code within myCode.py under the EasySpider directory.
Loop based on the expression value of Python code. Here are some examples:
1. Return relevant values of the current browser object. Use `self.browser` to refer to the current browser being operated. You can directly use Selenium's API to perform operations, such as `self.browser.find_element(By.CSS_SELECTOR, "body").text=="123"`, which checks whether the current page contains the text "123".
2. Return the value of a custom global variable: `self.myVar`
@ -541,8 +542,12 @@ If the expression returns a value greater than 0 or evaluates to True, the loop
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
</div>
<!-- 这里添加退出循环条件,找不到元素肯定退出循环 -->
<label v-if='parseInt(loopType) == 0'>Max Loop time (0 means infinite until cannot find the element or page content doesn't change):</label>
<input onkeydown="inputDelete(event)" required v-if='parseInt(loopType) == 0' class="form-control" type="number" v-model.number='nowNode["parameters"]["exitCount"]'></input>
<div v-if='parseInt(loopType) == 0'>
<label>Maximum number of loop iterations (0 represents an infinite loop until no more elements are found or no changes in page content are detected):</label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["exitCount"]'></input>
<label>Exit the loop when the content of the following page element does not change:</label>
<input onkeydown="inputDelete(event)" required class="form-control" type="text" v-model='nowNode["parameters"]["exitElement"]'></input>
</div>
<div id="breakAdvanced" v-if='nowNode["parameters"]["loopType"] < 5'>
<div>
@ -602,7 +607,7 @@ If the expression returns a value greater than 0 or evaluates to True, the loop
<div v-else-if='TClass > 0 && TClass < 7 || TClass == 8'>
<label>Code/Script Content: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="3" v-model='nowNode["parameters"]["code"]' placeholder="If the return value is greater than 0 or true, the operations within this branch will be executed; otherwise, they will not be executed. For example: return document.body.scrollWidth > 1000 or python D:/test.py, representing examples of JS command and system command return values."></textarea>
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 200px; font-size: 15px!important; word-wrap: break-word!important; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='TClass == 8'>Please read the instructions first and then write the specific code in the input box above (not in this box).
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 200px; font-size: 15px!important; word-wrap: break-word!important; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='TClass == 8'>Please read the instructions first and then write the specific code in the input box above (not in this box). To execute a large amount of code, you can simply write "outside:myCode.py" and the program will read and execute the code within myCode.py under the EasySpider directory.
Use the expression value of Python code to determine whether a condition is satisfied. Here are some examples:
1. Return relevant values of the current browser object. Use `self.browser` to refer to the current browser being operated. You can directly use Selenium's API to perform operations, such as `self.browser.find_element(By.CSS_SELECTOR, "body").text=="123"`, which checks whether the current page contains the text "123".
2. Return the value of a custom global variable: `self.myVar`
@ -680,7 +685,7 @@ If the expression returns a value greater than 0 or evaluates to True, the opera
<label>Browser Emulation Type:</label>
<select id="environment" name="environment" class="form-control">
<option value=0>Desktop</option>
<option value=1>Mobile (Not supported under Cloudflare mode)</option>
<option value=1>Mobile</option>
</select>
<label>Whether to maximize the browser window:</label>
<select id="maximizeWindow" name="maximizeWindow" class="form-control">
@ -694,6 +699,10 @@ If the expression returns a value greater than 0 or evaluates to True, the opera
<option value="0">No</option>
<option value="1">Yes (Requires running the same task ID and the same file name, please execute from the command line and specify the ID)</option>
</select>
<!-- <label>Wait time for the browser to close after the task is executed (in seconds):</label>-->
<label>任务执行完毕后自动关闭浏览器等待秒数(用户临时目录将在浏览器关闭后自动删除):</label>
<label>Wait time for the browser to close after the task is executed (in seconds), the temporary user data directory will be automatically deleted after the browser is closed:</label>
<input onkeydown="inputDelete(event)" type="number" value="60" id="quitWaitTime" name="quitWaitTime" class="form-control"></input>
<label>Maximum Display Length of Data in Console Preview:</label>
<input onkeydown="inputDelete(event)" type="number" value="15" id="maxViewLength" class="form-control"></input>
<label>Record log when executing the task:</label>

+ 17
- 10
ElectronJS/src/taskGrid/FlowChart_CN.html Zobrazit soubor

@ -91,7 +91,8 @@
</div>
<div>
<label>选项名称(鼠标移到笑脸可查看提示)<span style="font-size: 30px!important;" title="修改名称后点击下方“确定”按钮刷新流程图"></span></label>
<label>提示:鼠标移到笑脸可查看提示,在流程图中双击选项可试运行。</label>
<label>选项名称</span></label>
<input onkeydown="inputDelete(event)" class="form-control" v-model='list.nl[index.nowNodeIndex]["title"]'></input>
</div>
<!-- 下面是10种不同类型操作选项的不同的配置页面 -->
@ -104,7 +105,7 @@
<!-- <label>url:</label>-->
<!-- <input onkeydown="inputDelete(event)" class="form-control" v-model='nowNode["parameters"]["url"]'></input>-->
<label v-if='nowNode["parameters"]["url"]!="about:blank"'>链接池(每行一个链接,有多少行链接整个任务流程就会被执行多少次):</label>
<label v-else>链接(这里只能填写一个链接,因为是手动添加的打开网页操作)</label>
<label v-else>链接(只能填写一个链接,因为是手动添加的打开网页操作)</label>
<textarea v-if='nowNode["parameters"]["url"]!="about:blank"' onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["links"]'></textarea>
<input v-else onkeydown="inputDelete(event)" class="form-control" v-model='nowNode["parameters"]["links"]'></input>
</div>
@ -230,7 +231,7 @@
<label><strong>{{paras.parameters[paraIndex]["name"]}}</strong></label>
<p v-if="nowNode['isInLoop']"><input onkeydown="inputDelete(event)" type="checkbox" v-model='paras.parameters[paraIndex]["relative"]'></input>使用相对循环内的XPATH</p>
<p v-if='!paras.parameters[paraIndex]["relative"]'><input onkeydown="inputDelete(event)" type="checkbox" v-model='paras.parameters[paraIndex]["iframe"]'></input>元素在iframe内</p>
<p>XPath(所有XPath内均写Field["字段名"]表示参数值): <span style="font-size: 30px!important;" title="相对XPATH写法:以/开头,如循环项XPATH为/html/body/div[1],您的输入为/*[@id='tab-customer'],则最终寻址的xpath为:/html/body/div[1]/*[@id='tab-customer']"></span></p>
<p>XPath(所有XPath内均可用Field["字段名"]表示参数值,用eval("你的代码")来替换成自定义的变量): <span style="font-size: 30px!important;" title="相对XPATH写法:以/开头,如循环项XPATH为/html/body/div[1],您的输入为/*[@id='tab-customer'],则最终寻址的xpath为:/html/body/div[1]/*[@id='tab-customer']"></span></p>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='paras.parameters[paraIndex]["relativeXPath"]' placeholder="如果要写相对循环内的xpath,可以写如*../div[1]即匹配当前循环元素的父元素的第一个div子元素"></textarea>
<p><button type="button" data-toggle="modal" data-target="#myModal_XPath" @click="changeXPaths(paras.parameters[paraIndex]['allXPaths'])" class="btn btn-primary" style="margin-top: 10px">点此查看其他等价的XPath</button></p>
<p style="margin-top: 10px">
@ -389,7 +390,7 @@
<div v-if='nowNode["parameters"]["codeMode"] < 3 || nowNode["parameters"]["codeMode"] >= 5 && nowNode["parameters"]["codeMode"] <=6'>
<label>代码/脚本内容(用Field["字段名"]来输入某字段/自定义操作的最新提取/返回值): </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["code"]' placeholder="输入JS或系统命令,如:document.body.innerText = '1' 或 python D:/test.py,分别为JS命令和系统命令示例。如选择针对当前循环项的JS脚本,则循环项元素用arguments[0]表示,如arguments[0].style.color = 'blue'"></textarea>
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 200px; font-size: 15px!important; word-wrap: break-word; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='nowNode["parameters"]["codeMode"] == 5'>请先阅读此说明,再在上方输入框(不是本框)写具体代码。
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 200px; font-size: 15px!important; word-wrap: break-word; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='nowNode["parameters"]["codeMode"] == 5'>请先阅读此说明,再在上方输入框(不是本框)写具体代码,如果要执行大量代码,可以直接写outside:myCode.py,这样程序就会读取并执行EasySpider目录下的myCode.py中的代码
此选项为高级功能,可以直接用Python代码操纵正在运行中的浏览器,及可以自定义整个执行环境中的变量,并对变量进行修改赋值等操作,示例:
1. 用self.browser表示当前操作的浏览器,可直接用selenium的API进行操作,如self.browser.find_element(By.CSS_SELECTOR, "body").send_keys(Keys.END)即可滚动到页面最下方。
2. 自定义一个全局变量:self.myVar = 1
@ -407,7 +408,7 @@ sys.path.append("D:/Python38/Lib/site-packages") # 假设此路径下有emotlib
import emotlib # 此时就可以使用emotlib库了
print(emotlib.emoji()) # 使用其中的函数。
</pre>
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 200px; font-size: 15px!important; word-wrap: break-word; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='nowNode["parameters"]["codeMode"] == 6'>请先阅读此说明,再在上方输入框(不是本框)写具体代码。
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 200px; font-size: 15px!important; word-wrap: break-word; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='nowNode["parameters"]["codeMode"] == 6'>请先阅读此说明,再在上方输入框(不是本框)写具体代码,如果要执行大量代码,可以直接写outside:myCode.py,这样程序就会读取并执行EasySpider目录下的myCode.py中的代码
此选项为高级功能,可以直接返回Python代码的表达式值,并在其他位置用Field["本操作名称"]表示此操作返回值,示例:
1. 返回当前浏览器对象的相关值,用self.browser表示当前操作的浏览器,可直接用selenium的API进行操作,如self.browser.find_element(By.CSS_SELECTOR, "body").text即可返回当前页面的文字。
2. 返回自定义全局变量的值:self.myVar
@ -529,7 +530,7 @@ print(emotlib.emoji()) # 使用其中的函数。
<div v-else-if='parseInt(loopType) < 8'>
<label>代码/脚本内容(用Field["字段名"]来输入某字段/自定义操作的最新提取/返回值): </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="3" v-model='nowNode["parameters"]["code"]' placeholder="命令返回值大于0或为真则继续循环,否则停止循环。如:return document.body.scrollWidth > 1000 或 python D:/test.py,分别为JS命令和系统命令返回值示例。"></textarea>
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 220px; font-size: 15px!important; word-wrap: break-word; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='parseInt(loopType) == 7'>请先阅读此说明,再在上方输入框(不是本框)写具体代码。
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 220px; font-size: 15px!important; word-wrap: break-word; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='parseInt(loopType) == 7'>请先阅读此说明,再在上方输入框(不是本框)写具体代码,如果要执行大量代码,可以直接写outside:myCode.py,这样程序就会读取并执行EasySpider目录下的myCode.py中的代码
根据Python代码的表达式值来决定是否循环,示例:
1. 返回当前浏览器对象的相关值,用self.browser表示当前操作的浏览器,可直接用selenium的API进行操作,如self.browser.find_element(By.CSS_SELECTOR, "body").text=="123",表示判断当前页面是否为123这个文本。
2. 返回自定义全局变量的值:self.myVar,如果
@ -541,8 +542,12 @@ print(emotlib.emoji()) # 使用其中的函数。
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
</div>
<!-- 这里添加退出循环条件,找不到元素肯定退出循环 -->
<label v-if='parseInt(loopType) == 0'>最多执行循环次数(0代表无限循环直到找不到元素或检测不到页面内容变化为止):</label>
<input onkeydown="inputDelete(event)" required v-if='parseInt(loopType) == 0' class="form-control" type="number" v-model.number='nowNode["parameters"]["exitCount"]'></input>
<div v-if='parseInt(loopType) == 0'>
<label>最多执行循环次数(0代表无限循环直到找不到元素或检测不到页面内容变化为止):</label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["exitCount"]'></input>
<label>检测页面以下元素内容不变化时退出循环:</label>
<input onkeydown="inputDelete(event)" required class="form-control" type="text" v-model='nowNode["parameters"]["exitElement"]'></input>
</div>
<div id="breakAdvanced" v-if='nowNode["parameters"]["loopType"] < 5'>
<div>
@ -602,7 +607,7 @@ print(emotlib.emoji()) # 使用其中的函数。
<div v-else-if='TClass > 0 && TClass < 7 || TClass == 8'>
<label>代码/脚本内容(用Field["字段名"]来输入某字段/自定义操作的最新提取/返回值): </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="3" v-model='nowNode["parameters"]["code"]' placeholder="命令返回值大于0或为真则执行此分支内操作,否则不执行。如:return document.body.scrollWidth > 1000 或 python D:/test.py,分别为JS命令和系统命令返回值示例。"></textarea>
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 200px; font-size: 15px!important; word-wrap: break-word!important; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='TClass == 8'>请先阅读此说明,再在上方输入框(不是本框)写具体代码。
<pre class="form-control" style="background: white; margin-top: 20px; min-height: 200px; font-size: 15px!important; word-wrap: break-word!important; white-space: pre-wrap; border-radius: 0; border: 1px solid" disabled v-if='TClass == 8'>请先阅读此说明,再在上方输入框(不是本框)写具体代码,如果要执行大量代码,可以直接写outside:myCode.py,这样程序就会读取并执行EasySpider目录下的myCode.py中的代码
根据Python代码的表达式值来判断条件是否满足,示例:
1. 返回当前浏览器对象的相关值,用self.browser表示当前操作的浏览器,可直接用selenium的API进行操作,如self.browser.find_element(By.CSS_SELECTOR, "body").text=="123",表示判断当前页面是否为123这个文本。
2. 返回自定义全局变量的值:self.myVar,如果
@ -680,7 +685,7 @@ print(emotlib.emoji()) # 使用其中的函数。
<label>浏览器模拟类型:</label>
<select id="environment" name="environment" class="form-control">
<option value = 0>电脑端</option>
<option value = 1>手机端(Cloudflare模式下不支持)</option>
<option value = 1>手机端</option>
</select>
<label>是否最大化浏览器窗口:</label>
<select id="maximizeWindow" name="maximizeWindow" class="form-control">
@ -694,6 +699,8 @@ print(emotlib.emoji()) # 使用其中的函数。
<option value = 0></option>
<option value = 1>是(需要运行同一个任务ID和固定的文件名,请用命令行执行并指定ID)</option>
</select>
<label>任务执行完毕后自动关闭浏览器等待秒数(用户信息临时目录将在浏览器关闭后自动删除):</label>
<input onkeydown="inputDelete(event)" type="number" value="60" id="quitWaitTime" name="quitWaitTime" class="form-control"></input>
<label>控制台预览时数据最大显示长度:</label>
<input onkeydown="inputDelete(event)" type="number" value="15" id="maxViewLength" class="form-control"></input>
<label>任务执行时是否记录日志:</label>

+ 6
- 6
ElectronJS/src/taskGrid/invokeTask.html Zobrazit soubor

@ -10,7 +10,7 @@
<script src="vue.js"></script>
<script src="bootstrap/js/bootstrap.js"></script>
<link href="bootstrap/css/bootstrap.css" rel="stylesheet"></link>
<title>Task Invoke</title>
<title>任务调用 | Task Invoke</title>
<style>
table {
table-layout: auto;
@ -248,7 +248,7 @@
<label style="margin-top: 15px;display: block">{{"You can also use the XPath Helper extension to test XPaths when executing the task:~执行任务的过程中也可以随时使用XPath Helper扩展来调试XPath。" | lang}}</label>
<label style="margin-top: 15px;display: block">{{"如果想进行更复杂的操作,如设置无头模式,设置定时执行等,请使用下方的命令行执行任务选项并配置好命令行参数。~ If you want to perform more complex operations, such as setting headless mode, setting scheduled execution, etc., please use the command line to execute the task and configure the command line parameters below." | lang}}</label>
<div style="margin-bottom: 10px;">
<label style="margin-top: 10px;">{{"Execution ID (EID):~执行ID:" | lang}}</label>
<label style="margin-top: 10px;">{{"Execution ID (EID), execution files are stored in 'execution_instances' folder:~执行ID(执行文件存放在execution_instances文件夹内):" | lang}}</label>
<input class="form-control" v-model="ID"></input>
<p></p>
<!-- <p>提示:点击下方按钮获得任务ID,然后根据此ID进行服务执行;也可自己POST调用接口得到ID,具体参照POST调用文档。</p> -->
@ -499,18 +499,18 @@
function changeCommand() {
$.get(app.$data.backEndAddressServiceWrapper + "/queryOSVersion", function (OSInfo) {
if(OSInfo.version == 'win32' && OSInfo.bit == 'x64'){
app.$data.command = "./EasySpider/resources/app/chrome_win64/easyspider_executestage.exe --id [" + app.$data.ID.toString() + "] --user_data " + (app.$data.with_user_data ? "1" : "0") + " --server_address " + app.$data.backEndAddressServiceWrapper;
app.$data.command = "./EasySpider/resources/app/chrome_win64/easyspider_executestage.exe --ids [" + app.$data.ID.toString() + "] --user_data " + (app.$data.with_user_data ? "1" : "0") + " --server_address " + app.$data.backEndAddressServiceWrapper;
} else if(OSInfo.version == 'win32' && OSInfo.bit == 'ia32'){
app.$data.command = "./EasySpider/resources/app/chrome_win32/easyspider_executestage.exe --id [" + app.$data.ID.toString() + "] --user_data " + (app.$data.with_user_data ? "1" : "0") + " --server_address " + app.$data.backEndAddressServiceWrapper;
app.$data.command = "./EasySpider/resources/app/chrome_win32/easyspider_executestage.exe --ids [" + app.$data.ID.toString() + "] --user_data " + (app.$data.with_user_data ? "1" : "0") + " --server_address " + app.$data.backEndAddressServiceWrapper;
} else if(OSInfo.version == 'linux'){
app.$data.command = "./EasySpider/resources/app/chrome_linux64/easyspider_executestage --id '[" + app.$data.ID.toString() + "]' --user_data " + (app.$data.with_user_data ? "1" : "0") + " --server_address " + app.$data.backEndAddressServiceWrapper;
app.$data.command = "./EasySpider/resources/app/chrome_linux64/easyspider_executestage --ids '[" + app.$data.ID.toString() + "]' --user_data " + (app.$data.with_user_data ? "1" : "0") + " --server_address " + app.$data.backEndAddressServiceWrapper;
} else if(OSInfo.version == 'darwin'){
if(getUrlParam("lang") == "zh"){
app.$data.easyspider_location = "你的EasySpider文件夹,如:cd /Users/"+ app.$data.config_folder.split("/")[2] + "/Downloads/EasySpider_MacOS_all_arch";
} else {
app.$data.easyspider_location = "Your EasySpider folder, such as: cd /Users/"+ app.$data.config_folder.split("/")[2] + "/Downloads/EasySpider_MacOS_all_arch";
}
app.$data.command = "./easyspider_executestage --id '[" + app.$data.ID.toString() + "]' --user_data " + (app.$data.with_user_data ? "1" : "0") + " --server_address " + app.$data.backEndAddressServiceWrapper;
app.$data.command = "./easyspider_executestage --ids '[" + app.$data.ID.toString() + "]' --user_data " + (app.$data.with_user_data ? "1" : "0") + " --server_address " + app.$data.backEndAddressServiceWrapper;
}
});
}

+ 3
- 1
ElectronJS/src/taskGrid/logic.js Zobrazit soubor

@ -29,7 +29,7 @@ ws.onmessage = function(evt) {
evt = JSON.parse(evt.data);
console.log(evt);
if (evt["type"] == "title") { //如果不是特殊处理的话,默认全部是增加元素操作
if (old_title == "New Task") { //只记录第一次的title
if (old_title == "新任务 | New Task") { //只记录第一次的title
$("#serviceName").val(evt.data.title);
}
old_title = evt.data.title;
@ -226,6 +226,7 @@ function addParameters(t) {
t["parameters"]["code"] = ""; //执行的代码
t["parameters"]["waitTime"] = 0; //最长等待时间
t["parameters"]["exitCount"] = 0; //执行多少次后退出循环,0代表不设置此条件
t["parameters"]["exitElement"] = "//body"; //检测此元素不变时退出循环
t["parameters"]["historyWait"] = 2; //历史记录回退时间,用于循环点击每个链接的情况下点击链接后不打开新标签页的情况
t["parameters"]["breakMode"] = 0; //break类型,0代表JS,2代表系统命令
t["parameters"]["breakCode"] = ""; //break条件
@ -485,6 +486,7 @@ function saveService(type) {
"version": "0.6.0",
"saveThreshold": saveThreshold,
// "cloudflare": cloudflare,
"quitWaitTime": parseInt($("#quitWaitTime").val()),
"environment": environment,
"maximizeWindow": parseInt($("#maximizeWindow").val()),
"maxViewLength": parseInt($("#maxViewLength").val()),

+ 1
- 1
ElectronJS/src/taskGrid/newTask.html Zobrazit soubor

@ -9,7 +9,7 @@
<script src="bootstrap/js/bootstrap.js"></script>
<script src="vue.js"></script>
<link rel="stylesheet" href="bootstrap/css/bootstrap.css"></link>
<title>New Task</title>
<title>新任务 | New Task</title>
<style>
@media (max-width: 500px) {
#newTask{

+ 1
- 1
ElectronJS/src/taskGrid/taskInfo.html Zobrazit soubor

@ -8,7 +8,7 @@
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<script src="vue.js"></script>
<link rel="stylesheet" href="bootstrap/css/bootstrap.css"></link>
<title>Task Information</title>
<title>任务信息 | Task Information</title>
<style>
table {
table-layout: auto;

+ 1
- 1
ElectronJS/src/taskGrid/taskList.html Zobrazit soubor

@ -8,7 +8,7 @@
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<script src="vue.js"></script>
<link rel="stylesheet" href="bootstrap/css/bootstrap.css"></link>
<title>Start</title>
<title>任务列表 | Task List</title>
</head>
<style>

+ 1
- 1
ElectronJS/tasks/215.json
Diff nebyl zobrazen, protože je příliš veliký
Zobrazit soubor


+ 1
- 1
ElectronJS/tasks/219.json Zobrazit soubor

@ -1 +1 @@
{"id":219,"name":"Index of /files/","url":"https://www.easyspider.cn/files","links":"https://www.easyspider.cn/files","create_time":"12/5/2023, 3:49:19 AM","update_time":"12/5/2023, 5:00:23 AM","version":"0.6.0","saveThreshold":10,"environment":0,"maximizeWindow":1,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"pauseKey":"t","containJudge":false,"desc":"https://www.easyspider.cn/files","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.easyspider.cn/files","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.easyspider.cn/files"}],"outputParameters":[{"id":0,"name":"自定义操作","desc":"自定义操作返回的数据","type":"text","recordASField":0,"exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,4,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.easyspider.cn/files","links":"https://www.easyspider.cn/files","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":2,"title":"点击V0.5.0/","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/pre[1]/a[7]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"alertHandleType":0,"allXPaths":["/html/body/pre[1]/a[7]","//a[contains(., 'V0.5.0/')]","/html/body/pre/a"]}},{"id":4,"index":3,"parentId":0,"type":0,"option":2,"title":"点击EasySpider_0....","sequence":[],"isInLoop":false,"position":3,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/pre[1]/a[4]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"alertHandleType":0,"allXPaths":["/html/body/pre[1]/a[4]","//a[contains(., 'EasySpider')]","/html/body/pre/a[last()-1]"]}},{"id":3,"index":4,"parentId":0,"type":0,"option":5,"title":"自定义操作","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"codeMode":"7","code":"","waitTime":0,"recordASField":0,"paraType":"text"}}]}
{"id":219,"name":"Index of /files/","url":"https://www.easyspider.cn/files","links":"https://www.easyspider.cn/files","create_time":"12/5/2023, 3:49:19 AM","update_time":"12/7/2023, 2:43:02 AM","version":"0.6.0","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":1,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"pauseKey":"t","containJudge":false,"desc":"https://www.easyspider.cn/files","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.easyspider.cn/files","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.easyspider.cn/files"}],"outputParameters":[{"id":0,"name":"自定义操作","desc":"自定义操作返回的数据","type":"text","recordASField":0,"exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3,4],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.easyspider.cn/files","links":"https://www.easyspider.cn/files","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":2,"title":"点击V0.5.0/","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/pre[1]/a[7]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"alertHandleType":0,"allXPaths":["/html/body/pre[1]/a[7]","//a[contains(., 'V0.5.0/')]","/html/body/pre/a"]}},{"id":3,"index":3,"parentId":0,"type":0,"option":2,"title":"点击EasySpider_0....","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/pre[1]/a[4]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"alertHandleType":0,"allXPaths":["/html/body/pre[1]/a[4]","//a[contains(., 'EasySpider')]","/html/body/pre/a[last()-1]"]}},{"id":4,"index":4,"parentId":0,"type":0,"option":5,"title":"自定义操作","sequence":[],"isInLoop":false,"position":3,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"codeMode":"7","code":"","waitTime":0,"recordASField":0,"paraType":"text"}}]}

+ 1
- 0
ElectronJS/tasks/222.json
Diff nebyl zobrazen, protože je příliš veliký
Zobrazit soubor


+ 1
- 0
ElectronJS/tasks/223.json Zobrazit soubor

@ -0,0 +1 @@
{"id":223,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"","update_time":"12/7/2023, 12:43:42 AM","version":"0.6.0","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"自定义操作","desc":"自定义操作返回的数据","type":"text","recordASField":0,"exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":-1,"index":2,"parentId":0,"type":0,"option":2,"title":"点击手机","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"LeftSide_menu_list__qXCeM\")]/div[1]/a[1]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"alertHandleType":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '手机')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"]}},{"id":2,"index":3,"parentId":0,"type":0,"option":5,"title":"自定义操作","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"codeMode":"5","code":"from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException\ntry:\n # 使用XPath定位元素并点击\n element = self.browser.find_element(By.XPATH, \"//*[contains(@class, 'LeftSide_menu_list__qX123CeM')]/div[1]/a[1]\")\n element.click()\n print(\"点击成功\")\nexcept ElementClickInterceptedException:\n # 如果元素被遮挡,点击失败\n print(\"元素被遮挡,无法点击\")\nexcept Exception as e:\n # 打印其他异常\n print(\"发生了一个异常:\", e)\nfinally:\n print(\"finally\")","waitTime":0,"recordASField":0,"paraType":"text"}}]}

+ 1
- 0
ElectronJS/tasks/224.json
Diff nebyl zobrazen, protože je příliš veliký
Zobrazit soubor


+ 1
- 0
ElectronJS/tasks/225.json
Diff nebyl zobrazen, protože je příliš veliký
Zobrazit soubor


+ 1
- 0
ElectronJS/tasks/226.json
Diff nebyl zobrazen, protože je příliš veliký
Zobrazit soubor


+ 1
- 0
ElectronJS/tasks/227.json Zobrazit soubor

@ -0,0 +1 @@
{"id":227,"name":"MyCode","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"12/7/2023, 2:04:22 AM","update_time":"12/7/2023, 2:13:25 AM","version":"0.6.0","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"自定义操作","desc":"自定义操作返回的数据","type":"text","recordASField":0,"exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":-1,"index":2,"parentId":0,"type":0,"option":2,"title":"点击手机","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"LeftSide_menu_list__qXCeM\")]/div[1]/a[1]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"alertHandleType":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '手机')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"]}},{"id":2,"index":3,"parentId":0,"type":0,"option":5,"title":"自定义操作","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"codeMode":"5","code":"outside:myCode.py","waitTime":0,"recordASField":0,"paraType":"text"}}]}

+ 1
- 0
ElectronJS/tasks/228.json Zobrazit soubor

@ -0,0 +1 @@
{"id":228,"name":"[2312.02977] Exploring the nonclassical dynamics of the \"classical'' Schrödinger equation","url":"https://arxiv.org/abs/2312.02977","links":"https://arxiv.org/abs/2312.02977","create_time":"12/7/2023, 2:44:58 AM","update_time":"12/7/2023, 2:56:47 AM","version":"0.6.0","saveThreshold":10,"quitWaitTime":60,"environment":1,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"desc":"https://arxiv.org/abs/2312.02977","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://arxiv.org/abs/2312.02977","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://arxiv.org/abs/2312.02977"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://arxiv.org/abs/2312.02977","links":"https://arxiv.org/abs/2312.02977","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":2,"title":"点击Download PDF","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"download-pdf\")]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"alertHandleType":0,"allXPaths":["/html/body/div[2]/main[1]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[1]/a[1]","//a[contains(., 'Download P')]","//A[@class='abs-button download-pdf']","/html/body/div[last()-3]/main/div/div/div[last()-2]/div[last()-5]/ul/li[last()-2]/a"]}}]}

+ 2
- 0
ExecuteStage/.gitignore Zobrazit soubor

@ -16,3 +16,5 @@ execution_instances/
.DS_Store
mysql_config.json
EasySpider.app/
TempUserDataFolder/

+ 3
- 3
ExecuteStage/.vscode/launch.json Zobrazit soubor

@ -10,9 +10,9 @@
"program": "easyspider_executestage.py",
"console": "integratedTerminal",
"justMyCode": false,
// "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
// "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
"args": ["--id", "[96]", "--headless", "0", "--user_data", "0", "--keyboard", "1"]
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
"args": ["--ids", "[52]", "--headless", "0", "--user_data", "1", "--keyboard", "1"]
}
]
}

+ 2
- 2
ExecuteStage/Readme.md Zobrazit soubor

@ -58,7 +58,7 @@ This section covers the compilation instructions for the `Execution stage progra
在当前文件夹下直接运行程序:
```Python
python3 easyspider_executestage.py --id [1]
python3 easyspider_executestage.py --ids [1]
```
以上是运行任务号为`1`的任务的示例命令,更多命令行参数使用说明请参考:[Argument Instruction](https://github.com/NaiboWang/EasySpider/wiki/Argument-Instruction)。
@ -70,7 +70,7 @@ Before running the program, make sure you have completed the compilation of the
To run the program directly in the current folder, use the following command:
```Python
python3 easyspider_executestage.py --id [1]
python3 easyspider_executestage.py --ids [1]
```
The above is an example command to run a task with the ID of `1`. For more information on command-line parameters, please refer to: [Argument Instruction](https://github.com/NaiboWang/EasySpider/wiki/Argument-Instruction) on the project's GitHub Wiki.

+ 179
- 171
ExecuteStage/easyspider_executestage.py Zobrazit soubor

@ -1,8 +1,12 @@
# -*- coding: utf-8 -*-
# import atexit
import atexit
import copy
import shutil
import string
import undetected_chromedriver as uc
from utils import download_image, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, \
on_press_creator, on_release_creator, replace_field_values, write_to_csv, write_to_excel, write_to_json
on_press_creator, on_release_creator, readCode, replace_field_values, write_to_csv, write_to_excel, write_to_json
from myChrome import MyChrome
from threading import Thread, Event
from PIL import Image
@ -24,6 +28,7 @@ from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from pynput.keyboard import Key, Listener
from datetime import datetime
import io # 遇到错误退出时应执行的代码
@ -55,7 +60,7 @@ desired_capabilities["pageLoadStrategy"] = "none"
class BrowserThread(Thread):
def __init__(self, browser_t, id, service, version, event, saveName, config):
def __init__(self, browser_t, id, service, version, event, saveName, config, option):
Thread.__init__(self)
self.logs = io.StringIO()
try:
@ -63,6 +68,7 @@ class BrowserThread(Thread):
except:
self.log = True
self.browser = browser_t
self.option = option
self.config = config
self.version = version
self.totalSteps = 0
@ -90,12 +96,12 @@ class BrowserThread(Thread):
now = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
self.saveName = self.saveName.replace("current_time", now)
self.print_and_log("任务ID", i, "的保存文件名为:", self.saveName)
self.print_and_log("Save Name for task ID", i, "is:", self.saveName)
if not os.path.exists("Data/Task_" + str(i)):
os.mkdir("Data/Task_" + str(i))
if not os.path.exists("Data/Task_" + str(i) + "/" + self.saveName):
os.mkdir("Data/Task_" + str(i) + "/" +
self.print_and_log("任务ID", id, "的保存文件名为:", self.saveName)
self.print_and_log("Save Name for task ID", id, "is:", self.saveName)
if not os.path.exists("Data/Task_" + str( id)):
os.mkdir("Data/Task_" + str( id))
if not os.path.exists("Data/Task_" + str( id) + "/" + self.saveName):
os.mkdir("Data/Task_" + str( id) + "/" +
self.saveName) # 创建保存文件夹用来保存截图
self.getDataStep = 0
self.startSteps = 0
@ -245,7 +251,7 @@ class BrowserThread(Thread):
cookies = node["parameters"]["cookies"]
except:
node["parameters"]["cookies"] = ""
if node["option"] == 2: # 点击操作
elif node["option"] == 2: # 点击操作
try:
alertHandleType = node["parameters"]["alertHandleType"]
except:
@ -312,6 +318,13 @@ class BrowserThread(Thread):
node["parameters"]["xpath"] = ""
self.print_and_log("您的任务版本号为" + self.task_version +
",循环点击不支持相对XPath写法,已自动切换为纯循环的XPath")
elif node["option"] == 8: # 循环操作
try:
exitElement = node["parameters"]["exitElement"]
if exitElement == "":
node["parameters"]["exitElement"] = "//body"
except:
node["parameters"]["exitElement"] = "//body"
self.print_and_log("预处理完成|Preprocess completed")
def readFromExcel(self):
@ -391,7 +404,21 @@ class BrowserThread(Thread):
self.saveData(exit=True)
if self.outputFormat == "mysql":
self.mysql.close()
try:
quitWaitTime = self.service["quitWaitTime"]
except:
quitWaitTime = 60
self.print_and_log(f"任务执行完毕,将在{quitWaitTime}秒后自动退出浏览器并清理临时用户目录,等待时间可在保存任务对话框中设置。")
self.print_and_log(f"The task is completed, the browser will exit automatically and the temporary user directory will be cleaned up after {quitWaitTime} seconds, the waiting time can be set in the save task dialog.")
time.sleep(quitWaitTime)
self.browser.quit()
self.print_and_log("正在清理临时用户目录……|Cleaning up temporary user directory...")
try:
shutil.rmtree(self.option["tmp_user_data_folder"])
except:
pass
self.print_and_log("清理完成!|Clean up completed!")
def recordLog(self, *args, **kwargs):
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
print(now + ":", *args, file=self.logs, **kwargs)
@ -576,6 +603,7 @@ class BrowserThread(Thread):
self.recordLog("JavaScript execution failed")
elif int(codeMode) == 5:
try:
code = readCode(code)
output = exec(code)
self.recordLog("执行下面的代码:" + code)
self.recordLog("Execute the following code:" + code)
@ -585,6 +613,7 @@ class BrowserThread(Thread):
code, ", error is:", e)
elif int(codeMode) == 6:
try:
code = readCode(code)
output = eval(code)
self.recordLog("获得下面的代码返回值:" + code)
self.recordLog(
@ -909,6 +938,40 @@ class BrowserThread(Thread):
self.recordLog(
"判断条件内所有条件分支的条件都不满足|None of the conditions in the judgment condition are met")
def handleHistory(self, node, xpath, thisHitoryURL, thisHistoryLength, index, element=None, elements=None):
if self.history["index"] != thisHistoryLength and self.history["handle"] == self.browser.current_window_handle: # 如果执行完一次循环之后历史记录发生了变化,注意当前页面的判断
difference = thisHistoryLength - self.history["index"] # 计算历史记录变化差值
self.browser.execute_script('history.go(' + str(difference) + ')') # 回退历史记录
# if node["parameters"]["historyWait"] > 2: # 回退后要等待的时间
time.sleep(node["parameters"]["historyWait"])
# else:
# time.sleep(2)
try:
self.browser.execute_script('window.stop()')
except:
pass
ti = 0
if self.browser.current_url.startswith("data:"):
while self.browser.current_url != thisHitoryURL: # 如果执行完一次循环之后网址发生了变化
try:
self.browser.execute_script("history.go(1)") # 如果是data:开头的网址,就前进一步
except: # 超时的情况下
pass
ti += 1
if self.browser.current_url == thisHitoryURL or ti > thisHistoryLength: # 如果执行完一次循环之后网址发生了变化
break
time.sleep(2)
if element == None: # 不固定元素列表
element = self.browser.find_elements(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
else: # 固定元素列表
element = self.browser.find_element(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
if index > 0:
index -= 1 # 如果是data:开头的网址,就要重试一次
else:
if element == None:
element = elements
return index, element
# 对循环的处理
def loopExecute(self, node, loopValue, clickPath="", index=0):
time.sleep(0.1) # 第一次执行循环的时候强制等待1秒
@ -931,8 +994,7 @@ class BrowserThread(Thread):
# newBodyText = self.browser.page_source
# newBodyText = self.browser.find_element(By.XPATH, "//body").text
if node["parameters"]["exitCount"] == 0:
newBodyText = self.browser.find_element(
By.CSS_SELECTOR, "body", iframe=False).text
newBodyText = self.browser.find_element(By.XPATH, node["parameters"]["exitElement"], iframe=node["parameters"]["iframe"]).text
if node["parameters"]["iframe"]: # 如果标记了iframe
iframes = self.browser.find_elements(
By.CSS_SELECTOR, "iframe", iframe=False)
@ -1045,38 +1107,7 @@ class BrowserThread(Thread):
self.print_and_log("关闭标签页发生错误:", e)
self.print_and_log(
"Error occurred while closing tab: ", e)
if self.history["index"] != thisHistoryLength and self.history[
"handle"] == self.browser.current_window_handle: # 如果执行完一次循环之后历史记录发生了变化,注意当前页面的判断
difference = thisHistoryLength - \
self.history["index"] # 计算历史记录变化差值
self.browser.execute_script(
'history.go(' + str(difference) + ')') # 回退历史记录
# if node["parameters"]["historyWait"] > 2: # 回退后要等待的时间
time.sleep(node["parameters"]["historyWait"])
# else:
# time.sleep(2)
# 切换历史记录等待:
self.recordLog("Change history back time or: ",
node["parameters"]["historyWait"])
try:
self.browser.execute_script('window.stop()')
except:
pass
ti = 0
if self.browser.current_url.startswith("data:"):
while self.browser.current_url != thisHitoryURL: # 如果执行完一次循环之后网址发生了变化
try:
self.browser.execute_script("history.go(1)") # 如果是data:开头的网址,就前进一步
except: # 超时的情况下
pass
ti += 1
if self.browser.current_url == thisHitoryURL or ti > thisHistoryLength: # 如果执行完一次循环之后网址发生了变化
break
time.sleep(2)
elements = self.browser.find_elements(By.XPATH,
xpath, iframe=node["parameters"]["iframe"])
if index > 0:
index -= 1 # 如果是data:开头的网址,就要重试一次
index, elements = self.handleHistory(node, xpath, thisHitoryURL, thisHistoryLength, index, elements=elements)
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
output = self.execute_code(int(
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"],
@ -1130,50 +1161,11 @@ class BrowserThread(Thread):
self.print_and_log("关闭标签页发生错误:", e)
self.print_and_log(
"Error occurred while closing tab: ", e)
if self.history["index"] != thisHistoryLength and self.history[
"handle"] == self.browser.current_window_handle: # 如果执行完一次循环之后历史记录发生了变化,注意当前页面的判断
difference = thisHistoryLength - \
self.history["index"] # 计算历史记录变化差值
self.browser.execute_script(
'history.go(' + str(difference) + ')') # 回退历史记录
# if node["parameters"]["historyWait"] > 2: # 回退后要等待的时间
time.sleep(node["parameters"]["historyWait"])
# else:
# time.sleep(2)
self.recordLog("Change history back time or: ",
node["parameters"]["historyWait"])
try:
self.browser.execute_script('window.stop()')
except:
pass
# if self.browser.current_url.startswith("data:"):
# try:
# self.browser.execute_script("history.go(1)") # 如果是data:开头的网址,就前进一步
# except: # 超时的情况下
# pass
# time.sleep(2)
# elements = self.browser.find_elements(By.XPATH,
# xpath, iframe=node["parameters"]["iframe"])
# if index > 0:
# index -= 1 # 如果是data:开头的网址,就要重试一次
ti = 0
if self.browser.current_url.startswith("data:"):
while self.browser.current_url != thisHitoryURL: # 如果执行完一次循环之后网址发生了变化
try:
self.browser.execute_script("history.go(1)") # 如果是data:开头的网址,就前进一步
except: # 超时的情况下
pass
ti += 1
if self.browser.current_url == thisHitoryURL or ti > thisHistoryLength: # 如果执行完一次循环之后网址发生了变化
break
time.sleep(2)
elements = self.browser.find_elements(By.XPATH,
xpath, iframe=node["parameters"]["iframe"])
if index > 0:
index -= 1 # 如果是data:开头的网址,就要重试一次
index, element = self.handleHistory(node, path, thisHitoryURL, thisHistoryLength, index, element=element)
except NoSuchElementException:
self.print_and_log("Loop element not found: ", path)
self.print_and_log("找不到循环元素: ", path)
index += 1
continue # 循环中找不到元素就略过操作
except Exception as e:
raise
@ -1903,16 +1895,13 @@ class BrowserThread(Thread):
self.maxViewLength, self.outputParametersRecord)
self.OUTPUT.append(line)
if __name__ == '__main__':
from multiprocessing import freeze_support
freeze_support() # 防止无限死循环多开
# 如果需要调试程序,请在命令行参数中加入--keyboard 0 来禁用键盘监听以提升调试速度
# If you need to debug the program, please add --keyboard 0 in the command line parameters to disable keyboard listening to improve debugging speed
config = {
"id": [0],
"ids": [0],
"saved_file_name": "",
"user_data": False,
"config_folder": "",
@ -1925,20 +1914,20 @@ if __name__ == '__main__':
}
c = Config(config)
print(c)
options = Options()
options = webdriver.ChromeOptions()
driver_path = "chromedriver.exe"
import platform
print(sys.platform, platform.architecture())
option = webdriver.ChromeOptions()
# option = webdriver.ChromeOptions()
if not os.path.exists(os.getcwd() + "/Data"):
os.mkdir(os.getcwd() + "/Data")
if sys.platform == "darwin" and platform.architecture()[0] == "64bit":
options.binary_location = "EasySpider.app/Contents/Resources/app/chrome_mac64.app/Contents/MacOS/Google Chrome"
# MacOS需要用option而不是options!
option.binary_location = "EasySpider.app/Contents/Resources/app/chrome_mac64.app/Contents/MacOS/Google Chrome"
option.add_extension(
"EasySpider.app/Contents/Resources/app/XPathHelper.crx")
# option.binary_location = "EasySpider.app/Contents/Resources/app/chrome_mac64.app/Contents/MacOS/Google Chrome"
# option.add_extension(
# "EasySpider.app/Contents/Resources/app/XPathHelper.crx")
options.add_extension(
"EasySpider.app/Contents/Resources/app/XPathHelper.crx")
driver_path = "EasySpider.app/Contents/Resources/app/chromedriver_mac64"
@ -1957,26 +1946,26 @@ if __name__ == '__main__':
if sys.platform == "win32" and platform.architecture()[0] == "32bit":
options.binary_location = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win32/chrome.exe") # 指定chrome位置
option.binary_location = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win32/chrome.exe") # 指定chrome位置
# option.binary_location = os.path.join(
# os.getcwd(), "EasySpider/resources/app/chrome_win32/chrome.exe") # 指定chrome位置
driver_path = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe")
option.add_extension("EasySpider/resources/app/XPathHelper.crx")
# option.add_extension("EasySpider/resources/app/XPathHelper.crx")
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
elif sys.platform == "win32" and platform.architecture()[0] == "64bit":
options.binary_location = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
option.binary_location = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
# option.binary_location = os.path.join(
# os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
driver_path = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe")
option.add_extension("EasySpider/resources/app/XPathHelper.crx")
# option.add_extension("EasySpider/resources/app/XPathHelper.crx")
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
elif sys.platform == "linux" and platform.architecture()[0] == "64bit":
options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
option.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
# option.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64"
option.add_extension("EasySpider/resources/app/XPathHelper.crx")
# option.add_extension("EasySpider/resources/app/XPathHelper.crx")
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
else:
print("Unsupported platform")
@ -1993,16 +1982,21 @@ if __name__ == '__main__':
# 软件dev用
print("Finding chromedriver in EasySpider",
os.getcwd() + "/ElectronJS")
option.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置
# option.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置
options.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置
driver_path = "../ElectronJS/chrome_win64/chromedriver_win64.exe"
option.add_extension("../ElectronJS/XPathHelper.crx")
# option.add_extension("../ElectronJS/XPathHelper.crx")
options.add_extension("../ElectronJS/XPathHelper.crx")
else:
options.binary_location = "./chrome.exe" # 指定chrome位置
# option.binary_location = "./chrome.exe" # 指定chrome位置
driver_path = "./chromedriver.exe"
option.add_extension("XPathHelper.crx")
# option.add_extension("XPathHelper.crx")
options.add_extension("XPathHelper.crx")
option.add_experimental_option(
# option.add_experimental_option(
# 'excludeSwitches', ['enable-automation']) # 以开发者模式
options.add_experimental_option(
'excludeSwitches', ['enable-automation']) # 以开发者模式
# user_data_dir = r'' # 注意没有Default!
@ -2021,65 +2015,84 @@ if __name__ == '__main__':
print("Config file path: " +
c.config_folder + c.config_file_name)
absolute_user_data_folder = config["absolute_user_data_folder"]
print("\nAbsolute_user_data_folder:",
absolute_user_data_folder, "\n")
except:
pass
if c.user_data:
option.add_argument(
f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒
option.add_argument("--profile-directory=Default")
options.add_argument(
f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒
options.add_argument("--profile-directory=Default")
print(
"正在使用带用户信息浏览器模式,注意此模式同一个用户信息目录只能同时运行一个浏览器实例,如果需要多开请复制用户信息目录并载入复制的目录地址,具体请参考程序多开文档:https://github.com/NaiboWang/EasySpider/wiki/Run-multiple-tasks-in-parallel")
print(
"Using browser with user data, please note that only one browser instance can be run at the same time with the same user data directory, if you need to run multiple instances, please copy the user data directory and load the copied directory address, please refer to the program multiple open document for details: https://github.com/NaiboWang/EasySpider/wiki/Run-multiple-tasks-in-parallel")
print(
"如果报错Selenium.common.exceptions.WebDriverException: Message: unknown error: Chrome failed to start: exited abnormally,说明有之前运行的Chrome实例没有正常关闭,请关闭之前打开的所有Chrome实例后再运行程序即可。")
print(
"If you get an error Selenium.common.exceptions.WebDriverException: Message: unknown error: Chrome failed to start: exited abnormally, it means that there is a Chrome instance that was not closed properly before, please close all Chrome instances that were opened before running the program.")
if c.headless:
print("Headless mode")
print("无头模式")
option.add_argument("--headless")
options.add_argument("--headless")
# options.add_argument(
# '--user-data-dir=C:\\Users\\q9823\\AppData\\Local\\Google\\Chrome\\User Data') # TMALL 反扒
option.add_argument(
"--disable-blink-features=AutomationControlled") # TMALL 反扒
# option.add_argument(
# "--disable-blink-features=AutomationControlled") # TMALL 反扒
options.add_argument(
"--disable-blink-features=AutomationControlled") # TMALL 反扒
options.add_argument('-ignore-certificate-errors')
options.add_argument('-ignore -ssl-errors')
option.add_argument('-ignore-certificate-errors')
option.add_argument('-ignore -ssl-errors')
# option.add_argument('-ignore-certificate-errors')
# option.add_argument('-ignore -ssl-errors')
if c.headless:
print("Headless mode")
print("无头模式")
# option.add_argument("--headless")
options.add_argument("--headless")
tmp_options = []
for id in c.ids:
tmp_options.append({"options": copy.deepcopy(options), "tmp_user_data_folder": ""})
if c.user_data:
tmp_user_folder_parent = os.path.join(os.getcwd(), "TempUserDataFolder")
if not os.path.exists(tmp_user_folder_parent):
os.mkdir(tmp_user_folder_parent)
characters = string.ascii_letters + string.digits
for i in range(len(c.ids)):
id = c.ids[i]
# 从字符集中随机选择字符构成字符串
random_string = ''.join(random.choice(characters) for i in range(10))
tmp_user_data_folder = os.path.join(tmp_user_folder_parent, "user_data_" + str(id) + "_" + str(time.time()).replace(".","") + "_" + random_string)
tmp_options[i]["tmp_user_data_folder"] = tmp_user_data_folder
if os.path.exists(tmp_user_data_folder):
shutil.rmtree(tmp_user_data_folder)
print(f"Copying user data folder to: {tmp_user_data_folder}, please wait...")
print(f"正在复制用户信息目录到: {tmp_user_data_folder},请稍等...")
shutil.copytree(absolute_user_data_folder, tmp_user_data_folder)
print("User data folder copied successfully, if you exit the program before it finishes, please delete the temporary user data folder manually.")
print("用户信息目录复制成功,如果程序在运行过程中被手动退出,请手动删除临时用户信息目录。")
# option = tmp_options[i]["option"]
options = tmp_options[i]["options"]
# option.add_argument(
# f'--user-data-dir={tmp_user_data_folder}') # TMALL 反扒
# option.add_argument("--profile-directory=Default")
options.add_argument(
f'--user-data-dir={tmp_user_data_folder}') # TMALL 反扒
options.add_argument("--profile-directory=Default")
print(
"如果报错Selenium.common.exceptions.WebDriverException: Message: unknown error: Chrome failed to start: exited abnormally,说明有之前运行的Chrome实例没有正常关闭,请关闭之前打开的所有Chrome实例后再运行程序即可。")
print(
"If you get an error Selenium.common.exceptions.WebDriverException: Message: unknown error: Chrome failed to start: exited abnormally, it means that there is a Chrome instance that was not closed properly before, please close all Chrome instances that were opened before running the program.")
threads = []
for i in c.id:
# print(options)
print("id: ", i)
for i in range(len(c.ids)):
id = c.ids[i]
# option = tmp_options[i]["option"]
options = tmp_options[i]["options"]
print("id: ", id)
if c.read_type == "remote":
print("remote")
content = requests.get(
c.server_address + "/queryExecutionInstance?id=" + str(i))
c.server_address + "/queryExecutionInstance?id=" + str( id))
service = json.loads(content.text) # 加载服务信息
else:
print("local")
with open("execution_instances/" + str(i) + ".json", 'r', encoding='utf-8') as f:
with open("execution_instances/" + str( id) + ".json", 'r', encoding='utf-8') as f:
content = f.read()
service = json.loads(content) # 加载服务信息
try:
print("Task Name:", service["name"])
print("任务名称:", service["name"])
except:
print("Cannot find task with id:" + str(i) + ", please check whether " + str(i) +
".json exists in the 'execution_instances' folder.")
print("未找到id为" + str(i) + "的任务,请检查'execution_instances'文件夹中是否存在" + str(i) + ".json文件。")
print(f"Cannot find task with id: {str(id)}, please check whether {str(id)}.json exists in the 'execution_instances' folder.")
print(f"未找到id为{str(id)}的任务,请检查'execution_instances'文件夹中是否存在{str(id)}.json文件。")
continue
try:
cloudflare = service["cloudflare"]
@ -2087,8 +2100,8 @@ if __name__ == '__main__':
cloudflare = 0
if cloudflare == 0:
options.add_argument('log-level=3') # 隐藏日志
option.add_argument('log-level=3') # 隐藏日志
path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(i))
# option.add_argument('log-level=3') # 隐藏日志
path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str( id))
print("Data path:", path)
options.add_experimental_option("prefs", {
# 设置文件下载路径
@ -2103,30 +2116,32 @@ if __name__ == '__main__':
'safebrowsing.disable_download_protection': True,
'profile.default_content_settings.popups': 0,
})
option.add_experimental_option("prefs", {
# 设置文件下载路径
"download.default_directory": path,
"download.prompt_for_download": False, # 禁止下载提示框
"plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}],
"download.directory_upgrade": True,
"download.extensions_to_open": "applications/pdf",
"plugins.always_open_pdf_externally": True, # 总是在外部程序中打开PDF
"safebrowsing_for_trusted_sources_enabled": False,
"safebrowsing.enabled": False,
'safebrowsing.enabled': False,
'safebrowsing.disable_download_protection': True,
'profile.default_content_settings.popups': 0,
})
# option.add_experimental_option("prefs", {
# # 设置文件下载路径
# "download.default_directory": path,
# "download.prompt_for_download": False, # 禁止下载提示框
# "plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}],
# "download.directory_upgrade": True,
# "download.extensions_to_open": "applications/pdf",
# "plugins.always_open_pdf_externally": True, # 总是在外部程序中打开PDF
# "safebrowsing_for_trusted_sources_enabled": False,
# "safebrowsing.enabled": False,
# 'safebrowsing.enabled': False,
# 'safebrowsing.disable_download_protection': True,
# 'profile.default_content_settings.popups': 0,
# })
try:
if service["environment"] == 1:
option.add_experimental_option(
'mobileEmulation', {'deviceName': 'iPhone X'}) # 模拟iPhone X浏览
# option.add_experimental_option(
# 'mobileEmulation', {'deviceName': 'iPhone X'}) # 模拟iPhone X浏览
options.add_experimental_option(
'mobileEmulation', {'deviceName': 'iPhone X'}) # 模拟iPhone X浏览
except:
pass
browser_t = MyChrome(
options=options, chrome_options=option, executable_path=driver_path)
# browser_t = MyChrome(
# options=options, chrome_options=option, executable_path=driver_path)
selenium_service = Service(executable_path=driver_path)
browser_t = MyChrome(service=selenium_service, options=options)
elif cloudflare == 1:
if sys.platform == "win32":
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
@ -2149,9 +2164,9 @@ if __name__ == '__main__':
sys.exit()
event = Event()
event.set()
thread = BrowserThread(browser_t, i, service,
c.version, event, c.saved_file_name, config=config)
print("Thread with task id: ", i, " is created")
thread = BrowserThread(browser_t, id, service,
c.version, event, c.saved_file_name, config=config, option=tmp_options[i])
print("Thread with task id: ", id, " is created")
threads.append(thread)
thread.start()
# Set the pause operation
@ -2190,10 +2205,3 @@ if __name__ == '__main__':
for thread in threads:
print()
thread.join()
for thread in threads:
thread.browser.quit()
# print("Thread with task id: ", thread.id, " is closed")
print("程序已运行完成,请手动关闭此窗口。")
print(
"The program has finished running, please manually close this window.")

+ 57
- 0
ExecuteStage/myCode.py Zobrazit soubor

@ -0,0 +1,57 @@
"""
Python代码exec操作中调用myCode.pyEasySpider程序目录下Data/exec操作中可以直接写outside:myCode.py来调用此文件中的代码
1. self.browser表示当前操作的浏览器selenium的API进行操作self.browser.find_element(By.CSS_SELECTOR, "body").send_keys(Keys.END)
2. self.myVar = 1
3. self.myVar = self.myVar + 1
4. print(self.myVar)
5. self.myVar = self.outputParameters["字段名"]
6. self.outputParameters["字段名"] = "新值"
This is a sample code snippet file. You can directly write Python code here, and then call it in the program using an `exec` operation. If this file is named myCode.py, please place this file under the EasySpider program directory (at the same level as the Data/ folder). Then, in the program's `exec` operation, you can directly write outside:myCode.py to invoke the code from this file. Examples:
1. Use self.browser to refer to the current browser being operated on. You can directly utilize the selenium API to perform actions. For instance, self.browser.find_element(By.CSS_SELECTOR, "body").send_keys(Keys.END) will scroll to the bottom of the page.
2. Define a global variable: self.myVar = 1
3. Manipulate the above-defined global variable: self.myVar = self.myVar + 1
4. Print the above-defined global variable: print(self.myVar)
5. Assign a value to the custom variable from a value extracted for some field: self.myVar = self.outputParameters["field name"]
6. Modify the value extracted for some field: self.outputParameters["field name"] = "new value"
For more complex operations, please download the source code and compile it for execution.
"""
# 请在下面编写你的代码,不要有代码缩进!!! | Please write your code below, do not indent the code!!!
# 导包 | Import packages
from selenium.common.exceptions import ElementClickInterceptedException
# 定义一个函数 | Define a function
def test(n = 0):
for i in range(0, n):
if i % 2 == 0:
print(i)
return "test"
# 异常捕获 | Exception capture
try:
# 使用XPath定位元素并点击浏览器中元素 | Use XPath to locate the element and click the element in the browser
element = self.browser.find_element(By.XPATH, "//*[contains(@class, 'LeftSide_menu_list__qXCeM')]/div[1]/a[1]") # 这里请忽略IDE的报错,因为代码是嵌入到程序中的,IDE无法识别self变量和By变量是正常的 | Please ignore the error reported by the IDE, because the code is embedded in the program, and the IDE cannot recognize that the self variable and By variable are normal
element.click()
print("点击成功|Click success")
except ElementClickInterceptedException:
# 如果元素被遮挡,点击失败
print("元素被遮挡,无法点击|The element is blocked and cannot be clicked")
except Exception as e:
# 打印其他异常
print("发生了一个异常|An exception occurred", e)
finally:
# 测试函数 | Test function
self.a = 1
print("a = ", self.a)
self.a = self.a + 1
print("a = ", self.a)
print("All parameters:", self.outputParameters)
print(test(3))
print("执行完毕|Execution completed")

+ 1
- 1
ExecuteStage/requirements.txt Zobrazit soubor

@ -1,6 +1,6 @@
commandline_config==2.2.3
requests==2.31.0
selenium==4.5.0
selenium==4.16.0
pyinstaller==5.13.2
Pillow==10.0.1
openpyxl==3.1.2

+ 18
- 6
ExecuteStage/utils.py Zobrazit soubor

@ -176,7 +176,6 @@ def write_to_csv(file_name, data, record):
f_csv.writerow(to_write)
f.close()
def replace_field_values(orginal_text, outputParameters, browser=None):
pattern = r'Field\["([^"]+)"\]'
try:
@ -184,16 +183,29 @@ def replace_field_values(orginal_text, outputParameters, browser=None):
pattern, lambda match: outputParameters.get(match.group(1), ''), orginal_text)
if re.search(r'eval\(', replaced_text, re.IGNORECASE): # 如果返回值中包含EVAL
replaced_text = replaced_text.replace("self.", "browser.")
pattern = re.compile(r'(?i)eval\("([^"]+)"\)')
match = pattern.search(replaced_text)
eval_replaced_text = str(eval(match.group(1)))
replaced_text = replaced_text.replace(match.group(0), eval_replaced_text)
pattern = re.compile(r'(?i)eval\("(.+?)"\)')
# 循环替换所有匹配到的eval语句
while True:
match = pattern.search(replaced_text)
if not match:
break
# 执行eval并将其结果转换为字符串形式
eval_replaced_text = str(eval(match.group(1)))
# 替换eval语句
replaced_text = replaced_text.replace(match.group(0), eval_replaced_text)
except Exception as e:
print(e)
print(n>"eval替换失败,请检查eval语句是否正确。| Failed to replace eval, please check if the eval statement is correct.")
replaced_text = orginal_text
return replaced_text
def readCode(code):
if code.startswith("outside:"):
file_name = os.path.join(os.path.abspath("./"), code[8:])
with open(file_name, 'r', encoding='utf-8-sig') as file_obj:
code = file_obj.read()
return code
def write_to_json(file_name, data, types, record, keys):
keys = list(keys)
# Prepare empty list for data

Načítá se…
Zrušit
Uložit