Explorar el Código

Cloudflare!!!

pull/129/head
naibo hace 1 año
padre
commit
e50cd7a62a
Se han modificado 26 ficheros con 443 adiciones y 48 borrados
  1. +8
    -7
      .temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py
  2. +9
    -3
      .temp_to_pub/EasySpider_windows_x64/Code/myChrome.py
  3. +1
    -0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/0.json
  4. +1
    -0
      .temp_to_pub/EasySpider_windows_x64/tasks/127.json
  5. +1
    -1
      .temp_to_pub/EasySpider_windows_x64/tasks/49.json
  6. +1
    -1
      .temp_to_pub/compress.py
  7. BIN
      ElectronJS/EasySpider_en.crx
  8. BIN
      ElectronJS/EasySpider_zh.crx
  9. +2
    -2
      ElectronJS/change_version.py
  10. +1
    -1
      ElectronJS/config.json
  11. +7
    -3
      ElectronJS/main.js
  12. +25
    -3
      ElectronJS/src/index.html
  13. +4
    -0
      ElectronJS/src/index.js
  14. +1
    -0
      ElectronJS/src/js/preload.js
  15. +1
    -1
      ElectronJS/src/taskGrid/FlowChart.html
  16. +1
    -1
      ElectronJS/src/taskGrid/FlowChart_CN.html
  17. +3
    -3
      ElectronJS/src/taskGrid/global.js
  18. +2
    -0
      ElectronJS/src/taskGrid/invokeTask.html
  19. +312
    -1
      ElectronJS/tasks/158.json
  20. +1
    -1
      ExecuteStage/.vscode/launch.json
  21. +1
    -1
      ExecuteStage/config.json
  22. +14
    -9
      ExecuteStage/easyspider_executestage.py
  23. +7
    -5
      ExecuteStage/myChrome.py
  24. +37
    -2
      ExecuteStage/undetected_chromedriver_ES/__init__.py
  25. +2
    -2
      ExecuteStage/undetected_chromedriver_ES/patcher.py
  26. +1
    -1
      Readme.md

+ 8
- 7
.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py Ver fichero

@ -1577,13 +1577,14 @@ if __name__ == '__main__':
browser_t = MyChrome(
options=options, chrome_options=option, executable_path=driver_path)
elif cloudflare == 1:
if sys.platform != "darwin":
options.binary_location = "" # 需要用自己的浏览器
if sys.platform == "win32":
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
# options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器
browser_t = MyUCChrome(
options=options, driver_executable_path=driver_path)
else:
print("Not support Cloudflare Mode on MacOS")
print("MacOS不支持Cloudflare验证模式")
print("Cloudflare模式只支持Windows x64平台。")
print("Cloudflare Mode only support on Windows x64 platform.")
sys.exit()
event = Event()
event.set()
@ -1607,9 +1608,9 @@ if __name__ == '__main__':
print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。")
print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
print("----------------------------------\n\n")
if cloudflare:
print("过Cloudflare验证模式有时候会不稳定,请注意观察上方提示的浏览器版本信息是否正确,如果无法通过验证则需要隔几分钟重试一次,或者可以更换新的用户信息文件夹再执行任务。")
print("Passing the Cloudflare verification mode is sometimes unstable. Please pay attention to whether the browser version information prompted above is correct. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.")
# if cloudflare:
# print("过Cloudflare验证模式有时候会不稳定,如果无法通过验证则需要隔几分钟重试一次,或者可以更换新的用户信息文件夹再执行任务。")
# print("Passing the Cloudflare verification mode is sometimes unstable. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.")
# 使用监听器监听键盘输入
try:
with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener:

+ 9
- 3
.temp_to_pub/EasySpider_windows_x64/Code/myChrome.py Ver fichero

@ -12,6 +12,8 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import Select
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
import sys
desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none"
@ -89,9 +91,13 @@ class MyChrome(webdriver.Chrome):
else:
return super().find_elements(by=by, value=value)
import sys
if sys.platform != "darwin": # MacOS不支持Cloudflare
import undetected_chromedriver_ES as uc
# MacOS不支持直接打包带Cloudflare的功能,如果要自己编译运行,可以把这个if去掉,然后配置好浏览器和driver路径
if sys.platform != "darwin":
ES = True
if ES: # 用自己写的ES版本
import undetected_chromedriver_ES as uc
else:
import undetected_chromedriver as uc
class MyUCChrome(uc.Chrome):

+ 1
- 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/0.json
La diferencia del archivo ha sido suprimido porque es demasiado grande
Ver fichero


+ 1
- 0
.temp_to_pub/EasySpider_windows_x64/tasks/127.json
La diferencia del archivo ha sido suprimido porque es demasiado grande
Ver fichero


+ 1
- 1
.temp_to_pub/EasySpider_windows_x64/tasks/49.json
La diferencia del archivo ha sido suprimido porque es demasiado grande
Ver fichero


+ 1
- 1
.temp_to_pub/compress.py Ver fichero

@ -9,7 +9,7 @@ import platform
import shutil
import zipfile
import urllib.request
import py7zr
# import py7zr
def compress_folder_to_7z(folder_path, output_file):
if os.path.exists(output_file):

BIN
ElectronJS/EasySpider_en.crx Ver fichero


BIN
ElectronJS/EasySpider_zh.crx Ver fichero


+ 2
- 2
ElectronJS/change_version.py Ver fichero

@ -30,7 +30,7 @@ def update_file_version(file_path, new_version, key="当前版本/Current Versio
file.write(line)
version = "0.3.5"
version = "0.3.6"
# py html js
@ -39,7 +39,7 @@ if __name__ == "__main__":
file_path = "../.temp_to_pub/compress.py"
update_file_version(file_path, version, key='easyspider_version = "')
file_path = "./src/taskGrid/logic_deprecated.js"
file_path = "./src/taskGrid/logic.js"
update_file_version(file_path, version, key='"version": "')
# file_path = "./src/taskGrid/logic.js"

+ 1
- 1
ElectronJS/config.json Ver fichero

@ -1 +1 @@
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":0,"mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Document\\Projects\\EasySpider\\ElectronJS\\user_data"}
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":1,"mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"}

+ 7
- 3
ElectronJS/main.js Ver fichero

@ -105,14 +105,14 @@ let handle_pairs = {};
function createWindow() {
// Create the browser window.
mainWindow = new BrowserWindow({
width: 520,
width: 550,
height: 750,
webPreferences: {
preload: path.join(__dirname, 'src/js/preload.js')
},
icon: iconPath,
// frame: false, //取消window自带的关闭最小化等
// resizable: false //禁止改变主窗口尺寸
resizable: false //禁止改变主窗口尺寸
})
// and load the index.html of the app.
@ -126,7 +126,7 @@ function createWindow() {
app.quit();
}
});
mainWindow.webContents.openDevTools();
// mainWindow.webContents.openDevTools();
// Open the DevTools.
// mainWindow.webContents.openDevTools()
}
@ -549,6 +549,10 @@ app.whenReady().then(() => {
})
ipcMain.on('start-design', handleOpenBrowser);
ipcMain.on('start-invoke', handleOpenInvoke);
ipcMain.on('accept-agreement', function (event, arg) {
config.copyright = 1;
fs.writeFileSync(path.join(task_server.getDir(), "config.json"), JSON.stringify(config));
});
createWindow();
app.on('activate', function () {

+ 25
- 3
ElectronJS/src/index.html Ver fichero

@ -56,7 +56,19 @@
<div style="padding: 10px; text-align: center;vertical-align: middle;" v-else>
<div v-if="lang=='en'">
<div v-if="step == -1">
TEST
<h4 style="margin-top: 20px">Copyright and Disclaimer</h4>
<p>Please carefully read the following instructions regarding the use of the software and commercial payments. If you agree, please accept the agreement.</p>
<textarea class="form-control" style="margin:0 auto;width:90%; color:black; height: 450px; min-height: 200px; background: white" readonly>
This software is intended for educational and communication purposes only. It is strictly prohibited to use the software for any illegal activities or operations, such as crawling government/military websites that are not allowed to be crawled. The user bears all consequences resulting from the use of this software and the author shall not be held responsible or liable in any way. Furthermore, the software is protected by patent rights. If you intend to use it for commercial purposes or profit-making activities, such as using the software for client orders, please contact Hangzhou Tianqin Intellectual Property Agency Co., Ltd. (http://www.tqip.com/) for patent authorization and payment operations: https://www.patentguru.com/cn/search?q=一种自定义提取流程的服务封装系统
For individual users, EasySpider is a completely free and ad-free open-source software. The development and maintenance of the software rely solely on the author's voluntary efforts. Therefore, you can choose to support the author, allowing them to have more enthusiasm and energy to maintain this software. Alternatively, if you have profited from using this software, you are welcome to support the author through the following methods:
1. PayPal account: naibowang, or scan the QR code provided in the software package.
2. Alipay account: naibowang@foxmail.com, or scan the QR code provided in the software package.
3. WeChat payment: scan the QR code provided in the software package.
</textarea>
<p><a @click="acceptAgreement" class="btn btn-primary btn-lg"
style="margin-top: 30px; width: 300px;height:60px;padding-top:12px;color:white">Agree and Start</a></p>
</div>
<div v-if="step == 0">
<p style="margin-top: 20px">Hint: Click Button below to start.</p>
@ -130,8 +142,18 @@
<div v-else-if="lang=='zh'">
<div v-if="step == -1">
<h4 style="margin-top: 20px">版权和注意事项声明</h4>
<p>请仔细阅读下方有关软件使用和商用付费的说明,并接受使用协议以使用本软件。</p>
<textarea class="form-control" style="min-height: 200px;" readonly>
<p>请仔细阅读下方有关软件使用和商用付费的说明,同意请接受协议。</p>
<textarea class="form-control" style="margin:0 auto;width:90%; color:black; height: 480px; min-height: 200px; background: white" readonly>
本软件仅供学习交流使用,严禁使用软件进行任何违法违规的操作,如爬取不允许爬取的政府/军事机关网站等。使用本软件所造成的一切后果由使用者自负,与作者本人无关,作者不会承担任何责任。同时,软件受到专利权保护,如要用于商业用途,如使用软件进行盈利接单等,请联系杭州天勤知识产权代理有限公司(http://www.tqip.com/)进行专利授权等付费操作:https://www.patentguru.com/cn/search?q=一种自定义提取流程的服务封装系统
对于个人使用者来说,易采集EasySpider是一款完全免费无广告的开源软件,软件开发和维护全靠作者用爱发电,因此您可以选择支持作者让作者有更多的热情和精力维护此软件,或者您使用了此软件进行了盈利,欢迎您通过下面的方式支持作者:
1、支付宝账号:naibowang@foxmail.com,也可以扫描软件包中带的二维码。
2、微信收款:扫描软件包中带的二维码。
3、PayPal账号:naibowang,或扫描软件包中带的二维码。
</textarea>
<p><a @click="acceptAgreement" class="btn btn-primary btn-lg"
style="margin-top: 30px; width: 300px;height:60px;padding-top:12px;color:white">同意并开始使用</a></p>
</div>
<div v-if="step == 0">
<p style="margin-top: 20px">提示:点击下方按钮开始使用。</p>

+ 4
- 0
ElectronJS/src/index.js Ver fichero

@ -55,6 +55,10 @@ var app = Vue.createApp({
this.init = false;
this.lang = lang;
},
acceptAgreement() {
this.step = 0;
window.electronAPI.acceptAgreement();
},
startDesign(lang, with_data = false, mobile=false) {
if (with_data) {
console.log(this.user_data_folder)

+ 1
- 0
ElectronJS/src/js/preload.js Ver fichero

@ -10,4 +10,5 @@ const { contextBridge, ipcRenderer } = require('electron');
contextBridge.exposeInMainWorld('electronAPI', {
startDesign: (lang="en", user_data_folder = '', mobile=false) => ipcRenderer.send('start-design', lang, user_data_folder, mobile),
startInvoke: (lang="en") => ipcRenderer.send('start-invoke', lang),
acceptAgreement: () => ipcRenderer.send('accept-agreement'),
})

+ 1
- 1
ElectronJS/src/taskGrid/FlowChart.html Ver fichero

@ -566,7 +566,7 @@
<label>Is it an extreme anti-scraping website like Cloudflare (<a href="https://www.bilibili.com/video/BV1Ph4y1E7R9/" target="_blank">Watch Tutorial</a>)?</label>
<select id="cloudflare" name="cloudflare" class="form-control">
<option value=0>No</option>
<option value=1>Yes (Not support on MacOS, unless compile by yourself)</option>
<option value=1>Yes (Only support on Windows x64 platform)</option>
</select>
<label>Browser Emulation Type:</label>
<select id="environment" name="environment" class="form-control">

+ 1
- 1
ElectronJS/src/taskGrid/FlowChart_CN.html Ver fichero

@ -566,7 +566,7 @@
<label>是否为Cloudflare等极端反爬网站(<a href="https://www.bilibili.com/video/BV1Ph4y1E7R9/" target="_blank">查看Cloudflare设计和执行教程</a>):</label>
<select id="cloudflare" name="cloudflare" class="form-control">
<option value = 0></option>
<option value = 1>是(MacOS不支持直接运行,但可以自行编译</option>
<option value = 1>是(只支持Windows x64系统</option>
</select>
<label>浏览器模拟类型:</label>
<select id="environment" name="environment" class="form-control">

+ 3
- 3
ElectronJS/src/taskGrid/global.js Ver fichero

@ -44,10 +44,10 @@ function detectLang(str) {
if (enCount === cnCount) {
return 2;
} else if (enCount > cnCount) {
return 0;
} else if (cnCount>=3) {
return 1;
}
return 1;
return 0;
}
Vue.filter('lang', function (value) {

+ 2
- 0
ElectronJS/src/taskGrid/invokeTask.html Ver fichero

@ -208,6 +208,8 @@
<label>{{"User Data Folder (If you want to load the cookie, data and extension(s) from your local browser, please set this folder path, and then cilck the 'Run with (Data Mode)' button to run the task):~用户本地浏览器数据目录(如果需要使用本地的登录信息,插件和cookie,请设置此目录,并点击下方“执行(带用户信息模式)”按钮开始执行任务):" | lang}}</label>
<input type="text" class="form-control" v-model="user_data_folder"></input>
</div>
<p v-if="task['cloudflare']==1">{{`要想过Cloudflare验证,需要以下目录存在115版本的Chrome Beta版浏览器,注意是Beta版不是正式版:C:\\Program Files\\Google\\Chrome Beta。如果Beta版本不是115,请在软件下载目录中找到Chrome_Beta_115_win64.7z压缩包,然后解压并复制(覆盖)为C:\\Program Files\\Google\\Chrome Beta目录即可。~To pass the Cloudflare verification, you need the following directory to exist in the 115 version of Chrome Beta, note that it is the Beta version not the official version: C:\\Program Files\\Google\\Chrome Beta,
If the Beta version is not 115, please find the Chrome_Beta_115_win64.7z compressed package in the software download directory, then unzip and copy (overwrite) to the C:\\Program Files\\Google\\Chrome Beta directory.` | lang }}</p>
<div class="form-group" style="margin-top: 10px" v-if="task.outputFormat=='mysql'">
<label>{{"MySQL configuration file Path, relative to this folder:~MySQL配置文件路径,路径相对此文件夹:" | lang}} {{config_folder}}</label>
<input type="text" class="form-control" v-model="mysql_config_path"></input>

+ 312
- 1
ElectronJS/tasks/158.json Ver fichero

@ -1 +1,312 @@
{"id":158,"name":"(子元素)京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/12/2023, 1:51:59 AM","update_time":"7/12/2023, 9:52:06 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":2,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"iPhone","value":"iPhone"}],"outputParameters":[{"id":0,"name":"参数40_文本","desc":"","type":"text","recordASField":1,"exampleValue":"iPhone"},{"id":1,"name":"参数47_文本","desc":"","type":"text","recordASField":1,"exampleValue":"剩余9天22时10分"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3,4],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"iPhone","allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/input[1]","//input[contains(., '')]","id(\"key\")","//INPUT[@class='text defcolor']","/html/body/div[last()-6]/div/div[last()-2]/div/input"]}},{"id":3,"index":3,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"search-btn\"]/i[1]","iframe":false,"wait":8,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":"3","scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/button[1]/i[1]","//i[contains(., '')]","/html/body/div[last()-6]/div/div[last()-2]/div/button/i"]}},{"id":4,"index":4,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[5],"isInLoop":false,"position":3,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[2]/div[2]/div[1]/div[1]/div[2]/ul[1]/li/div[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[2]/div[2]/div[1]/div[1]/div[2]/ul[1]/li[1]/div[1]","//div[contains(., '')]","//DIV[@class='gl-i-wrap']","/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-29]/div"]}},{"id":5,"index":5,"parentId":4,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":1,"relative":true,"name":"参数40_文本","desc":"","relativeXPath":"/div[4]/a[1]/em[1]/font[3]","allXPaths":["/div[4]/a[1]/em[1]/font[3]","//font[contains(., 'iPhone')]","//FONT[@class='skcolor_ljg']","/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-24]/div/div[last()-5]/a/em/font"],"exampleValues":[{"num":5,"value":"iPhone"}],"unique_index":"/div[4]/a[1]/em[1]/font[3]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":1,"relative":true,"name":"参数47_文本","desc":"","relativeXPath":"//a/em[1]","allXPaths":["/div[10]/em[1]","//em[contains(., '剩余9天22时10分')]","/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-19]/div/div/em"],"exampleValues":[{"num":10,"value":"剩余9天22时10分"}],"unique_index":"/div[10]/em[1]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
{
"id": 158,
"name": "(子元素)京东全球版-专业的综合网上购物商城",
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"create_time": "7/12/2023, 1:51:59 AM",
"update_time": "7/12/2023, 9:52:06 AM",
"version": "0.3.5",
"saveThreshold": 10,
"cloudflare": 0,
"environment": 0,
"maxViewLength": 15,
"outputFormat": "xlsx",
"saveName": "current_time",
"containJudge": false,
"desc": "https://www.jd.com",
"inputParameters": [
{
"id": 0,
"name": "urlList_0",
"nodeId": 1,
"nodeName": "打开网页",
"value": "https://www.jd.com",
"desc": "要采集的网址列表,多行以\\n分开",
"type": "text",
"exampleValue": "https://www.jd.com"
},
{
"id": 1,
"name": "inputText_1",
"nodeName": "输入文字",
"nodeId": 2,
"desc": "要输入的文本,如京东搜索框输入:电脑",
"type": "text",
"exampleValue": "iPhone",
"value": "iPhone"
}
],
"outputParameters": [
{
"id": 0,
"name": "参数40_文本",
"desc": "",
"type": "text",
"recordASField": 1,
"exampleValue": "iPhone"
},
{
"id": 1,
"name": "参数47_文本",
"desc": "",
"type": "text",
"recordASField": 1,
"exampleValue": "剩余9天22时10分"
}
],
"graph": [
{
"index": 0,
"id": 0,
"parentId": 0,
"type": -1,
"option": 0,
"title": "root",
"sequence": [
1,
2,
3,
4
],
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0,
"waitType": 0
},
"isInLoop": false
},
{
"id": 1,
"index": 1,
"parentId": 0,
"type": 0,
"option": 1,
"title": "打开网页",
"sequence": [],
"isInLoop": false,
"position": 0,
"parameters": {
"useLoop": false,
"xpath": "",
"wait": 0,
"waitType": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"maxWaitTime": 10,
"scrollType": 0,
"scrollCount": 1,
"scrollWaitTime": 1,
"cookies": ""
}
},
{
"id": 2,
"index": 2,
"parentId": 0,
"type": 0,
"option": 4,
"title": "输入文字",
"sequence": [],
"isInLoop": false,
"position": 1,
"parameters": {
"history": 4,
"tabIndex": -1,
"useLoop": false,
"xpath": "//*[@id=\"key\"]",
"iframe": false,
"wait": 0,
"waitType": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"value": "iPhone",
"allXPaths": [
"/html/body/div[4]/div[1]/div[2]/div[1]/input[1]",
"//input[contains(., '')]",
"id(\"key\")",
"//INPUT[@class='text defcolor']",
"/html/body/div[last()-6]/div/div[last()-2]/div/input"
]
}
},
{
"id": 3,
"index": 3,
"parentId": 0,
"type": 0,
"option": 2,
"title": "点击元素",
"sequence": [],
"isInLoop": false,
"position": 2,
"parameters": {
"history": 4,
"tabIndex": -1,
"useLoop": false,
"xpath": "//*[@id=\"search-btn\"]/i[1]",
"iframe": false,
"wait": 8,
"waitType": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"scrollType": "3",
"scrollCount": 1,
"scrollWaitTime": 1,
"clickWay": 0,
"maxWaitTime": 10,
"paras": [],
"allXPaths": [
"/html/body/div[4]/div[1]/div[2]/div[1]/button[1]/i[1]",
"//i[contains(., '')]",
"/html/body/div[last()-6]/div/div[last()-2]/div/button/i"
]
}
},
{
"id": 4,
"index": 4,
"parentId": 0,
"type": 1,
"option": 8,
"title": "循环",
"sequence": [
5
],
"isInLoop": false,
"position": 3,
"parameters": {
"history": 5,
"tabIndex": -1,
"useLoop": false,
"xpath": "/html/body/div[5]/div[2]/div[2]/div[1]/div[1]/div[2]/ul[1]/li/div[1]",
"iframe": false,
"wait": 0,
"waitType": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"scrollType": 0,
"scrollCount": 1,
"scrollWaitTime": 1,
"loopType": 1,
"pathList": "",
"textList": "",
"code": "",
"waitTime": 0,
"exitCount": 0,
"historyWait": 2,
"breakMode": 0,
"breakCode": "",
"breakCodeWaitTime": 0,
"allXPaths": [
"/html/body/div[5]/div[2]/div[2]/div[1]/div[1]/div[2]/ul[1]/li[1]/div[1]",
"//div[contains(., '')]",
"//DIV[@class='gl-i-wrap']",
"/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-29]/div"
]
}
},
{
"id": 5,
"index": 5,
"parentId": 4,
"type": 0,
"option": 3,
"title": "提取数据",
"sequence": [],
"isInLoop": true,
"position": 0,
"parameters": {
"history": 5,
"tabIndex": -1,
"useLoop": false,
"xpath": "",
"iframe": false,
"wait": 0,
"waitType": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"paras": [
{
"nodeType": 0,
"contentType": 1,
"relative": true,
"name": "参数40_文本",
"desc": "",
"relativeXPath": "/div[4]/a[1]/em[1]/font[3]",
"allXPaths": [
"/div[4]/a[1]/em[1]/font[3]",
"//font[contains(., 'iPhone')]",
"//FONT[@class='skcolor_ljg']",
"/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-24]/div/div[last()-5]/a/em/font"
],
"exampleValues": [
{
"num": 5,
"value": "iPhone"
}
],
"unique_index": "/div[4]/a[1]/em[1]/font[3]",
"iframe": false,
"default": "",
"paraType": "text",
"recordASField": 1,
"beforeJS": "",
"beforeJSWaitTime": 0,
"JS": "",
"JSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"downloadPic": 0
},
{
"nodeType": 0,
"contentType": 1,
"relative": true,
"name": "参数47_文本",
"desc": "",
"relativeXPath": "//a/em[1]",
"allXPaths": [
"/div[10]/em[1]",
"//em[contains(., '剩余9天22时10分')]",
"/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-19]/div/div/em"
],
"exampleValues": [
{
"num": 10,
"value": "剩余9天22时10分"
}
],
"unique_index": "/div[10]/em[1]",
"iframe": false,
"default": "",
"paraType": "text",
"recordASField": 1,
"beforeJS": "",
"beforeJSWaitTime": 0,
"JS": "",
"JSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"downloadPic": 0
}
],
"loopType": 1
}
}
]
}

+ 1
- 1
ExecuteStage/.vscode/launch.json Ver fichero

@ -12,7 +12,7 @@
"justMyCode": false,
// "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
// "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
"args": ["--id", "[4]", "--headless", "0", "--user_data", "1"]
"args": ["--id", "[3]", "--headless", "0", "--user_data", "0"]
}
]
}

+ 1
- 1
ExecuteStage/config.json Ver fichero

@ -1 +1 @@
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Document\\Projects\\EasySpider\\ElectronJS\\user_data"}
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"}

+ 14
- 9
ExecuteStage/easyspider_executestage.py Ver fichero

@ -615,13 +615,17 @@ class BrowserThread(Thread):
while True: # do while循环
try:
finished = False
newBodyText = self.browser.page_source
# newBodyText = self.browser.page_source
newBodyText = self.browser.find_element(By.XPATH, "//body").text
if newBodyText == bodyText: # 如果页面内容无变化
print("页面已检测不到新内容,停止循环。")
print("No new content detected on the page, stop loop.")
finished = True
break
else:
if node["parameters"]["exitCount"] == 0:
print("检测到页面变化,继续循环。")
print("Page changed detected, continue loop.")
bodyText = newBodyText
element = self.browser.find_element(
By.XPATH, node["parameters"]["xpath"], iframe=node["parameters"]["iframe"])
@ -1577,13 +1581,14 @@ if __name__ == '__main__':
browser_t = MyChrome(
options=options, chrome_options=option, executable_path=driver_path)
elif cloudflare == 1:
if sys.platform != "darwin":
options.binary_location = "" # 需要用自己的浏览器
if sys.platform == "win32":
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
# options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器
browser_t = MyUCChrome(
options=options)
options=options, driver_executable_path=driver_path)
else:
print("Not support Cloudflare Mode on MacOS")
print("MacOS不支持Cloudflare验证模式")
print("Cloudflare模式只支持Windows x64平台。")
print("Cloudflare Mode only support on Windows x64 platform.")
sys.exit()
event = Event()
event.set()
@ -1607,9 +1612,9 @@ if __name__ == '__main__':
print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。")
print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
print("----------------------------------\n\n")
if cloudflare:
print("过Cloudflare验证模式有时候会不稳定,请注意观察上方提示的浏览器版本信息是否正确,如果无法通过验证则需要隔几分钟重试一次,或者可以更换新的用户信息文件夹再执行任务。")
print("Passing the Cloudflare verification mode is sometimes unstable. Please pay attention to whether the browser version information prompted above is correct. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.")
# if cloudflare:
# print("过Cloudflare验证模式有时候会不稳定,如果无法通过验证则需要隔几分钟重试一次,或者可以更换新的用户信息文件夹再执行任务。")
# print("Passing the Cloudflare verification mode is sometimes unstable. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.")
# 使用监听器监听键盘输入
try:
with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener:

+ 7
- 5
ExecuteStage/myChrome.py Ver fichero

@ -12,6 +12,8 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import Select
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
import sys
desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none"
@ -89,11 +91,11 @@ class MyChrome(webdriver.Chrome):
else:
return super().find_elements(by=by, value=value)
import sys
if sys.platform != "darwin": # MacOS不支持Cloudflare
ES = 1
if ES == 1:
import undetected_chromedriver as uc
# MacOS不支持直接打包带Cloudflare的功能,如果要自己编译运行,可以把这个if去掉,然后配置好浏览器和driver路径
if sys.platform != "darwin":
ES = True
if ES: # 用自己写的ES版本
import undetected_chromedriver_ES as uc
else:
import undetected_chromedriver as uc

+ 37
- 2
ExecuteStage/undetected_chromedriver_ES/__init__.py Ver fichero

@ -371,6 +371,37 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
options.binary_location = (
browser_executable_path or find_chrome_executable(chrome_version)
)
if not os.path.exists(options.binary_location):
time.sleep(5)
# 如果没有安装,可以在下面的链接下载安装:https://www.google.com/chrome/beta/
print(f"""\n\n\n要想过Cloudflare验证,需要以下目录存在115版本的Chrome Beta版浏览器,注意是Beta版不是正式版:C:\Program Files\Google\Chrome Beta
Beta版本不是115Chrome_Beta_115_win64.7z压缩包C:\Program Files\Google\Chrome Beta目录即可
""")
print("""To pass the Cloudflare verification, you need the following directory to exist in the 115 version of Chrome Beta, note that it is the Beta version not the official version: C:\Program Files\Google\Chrome Beta,
If the Beta version is not 115, please find the Chrome_Beta_115_win64.7z compressed package in the software download directory, then unzip and copy (overwrite) to the C:\Program Files\Google\Chrome Beta directory.
Please close this program manually and re-execute the task after the configuration is complete.
""")
time.sleep(100)
else:
folder_path = os.path.dirname(os.path.abspath(options.binary_location))
folder_list = [f for f in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, f))]
numeric_folders = [f for f in folder_list if f[0].isdigit()]
version = numeric_folders[0].split('.')[0]
if version != "115":
time.sleep(5)
print("Chrome Beta版本不是115,请将Chrome Beta的版本替换为115, 方法为下载115版本的Chrome Beta浏览器,然后解压并覆盖C:\Program Files\Google\Chrome Beta目录即可,软件下载目录中有Chrome_Beta_115_win64.7z版本的压缩包,可直接下载后解压替换。")
print("Chrome Beta version is not 115, please replace the version of Chrome Beta with 115, the method is to download the 115 version of Chrome Beta browser, then unzip and overwrite the C:\Program Files\Google\Chrome Beta directory, the software download directory has Chrome_Beta_115_win64.7z version of the compressed package, you can download and unzip directly to replace.")
print("\n请手动关闭此程序。\n")
print("\nPlease close this program manually.\n")
time.sleep(100)
print("Options Binary Location: ", options.binary_location)
@ -855,7 +886,11 @@ def find_chrome_executable(version):
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
for candidate in candidates:
if os.path.exists(candidate) and os.access(candidate, os.X_OK):
print("\n\n\n软件将会使用以下目录的Chrome浏览器:", os.path.normpath(candidate), ",请检查此浏览器版本是否为" + str(version) + "版本,如果不是将无法运行。")
print("The software will use the Chrome browser in the following directory:", os.path.normpath(candidate), "Please check if the version of this browser is version " + str(version) + ", if not, it will not be able to run.\n\n\n")
print(f"""\n\n\n要想过Cloudflare验证,需要满足以下条件:
115Chrome Beta版浏览器Beta版不是正式版C:\Program Files\Google\Chrome Beta\Application\chrome.exe
https://www.google.com/chrome/beta/
使Chrome Beta浏览器", {os.path.normpath(candidate)}, " 115 Beta浏览器""")
# print("The software will use the Chrome browser in the following directory:", os.path.normpath(candidate), "Please check if the version of this browser is version " + str(version) + ", if not, it will not be able to run.\n\n\n")
print(f"""The software will use the Chrome browser in the following directory: {os.path.normpath(candidate)}, Please check if the version of this browser is version 115, if not, it will not be able to run.\n\n\n""")
time.sleep(5)
return os.path.normpath(candidate)

+ 2
- 2
ExecuteStage/undetected_chromedriver_ES/patcher.py Ver fichero

@ -141,8 +141,8 @@ class Patcher(object):
folder_list = [f for f in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, f))]
numeric_folders = [f for f in folder_list if f[0].isdigit()]
version = numeric_folders[0].split('.')[0]
print(f"\n\n\nCloudflare下需要自行安装浏览器,请确保自己的机器环境已经安装了 {numeric_folders[0].split('.')[0]} 版本的Chrome浏览器(不是软件自带的Chrome浏览器,需要自己安装浏览器且版本号一定要正确),否则程序无法运行!")
print("Please make sure that your machine environment has installed the Chrome browser version %s (not the Chrome browser provided by the software, you need to install the browser yourself and the version number must be correct), otherwise the program cannot run!" % numeric_folders[0].split('.')[0])
# print(f"\n\n\nCloudflare下需要自行安装浏览器,请确保自己的机器环境已经安装了 {numeric_folders[0].split('.')[0]} 版本的Chrome浏览器(不是软件自带的Chrome浏览器,需要自己安装浏览器且版本号一定要正确),否则程序无法运行!")
# print("Please make sure that your machine environment has installed the Chrome browser version %s (not the Chrome browser provided by the software, you need to install the browser yourself and the version number must be correct), otherwise the program cannot run!" % numeric_folders[0].split('.')[0])
if not ispatched:
print("Patching chromedriver...")

+ 1
- 1
Readme.md Ver fichero

@ -130,7 +130,7 @@ This software is for learning and communication only. **It is strictly forbidden
For the crawler operations of government and military websites, **the author will not answer any questions** in order to avoid violating relevant national laws, regulations and policies.
同时,软件受到专利权保护,如要用于商业用途,请联系[杭州天勤知识产权代理有限公司](http://www.tqip.com/)进行专利授权等付费操作。
同时,软件受到专利权保护,如要用于商业用途,如使用软件进行盈利接单等,请联系[杭州天勤知识产权代理有限公司](http://www.tqip.com/)进行专利授权等付费操作。
At the same time, the software is protected by patent rights. If you want to use it for commercial purposes, please contact [Hangzhou Tianqin Intellectual Property Agency](http://www.tqip.com/) for patent authorization and other paid operations.

Cargando…
Cancelar
Guardar