ソースを参照

V0.6.2

pull/356/head v0.6.2
naibo 5ヶ月前
コミット
1b6661afb8
28個のファイルの変更1105行の追加94行の削除
  1. +106
    -63
      .temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py
  2. +34
    -3
      .temp_to_pub/EasySpider_windows_x64/Code/utils.py
  3. +18
    -1
      .temp_to_pub/EasySpider_windows_x64/myCode.py
  4. +1
    -1
      .temp_to_pub/EasySpider_windows_x64/tasks/112.json
  5. +871
    -1
      .temp_to_pub/EasySpider_windows_x64/tasks/212.json
  6. +1
    -1
      .temp_to_pub/EasySpider_windows_x64/tasks/228.json
  7. +1
    -1
      .temp_to_pub/EasySpider_windows_x64/tasks/229.json
  8. +1
    -1
      .temp_to_pub/EasySpider_windows_x64/tasks/295.json
  9. +1
    -1
      .temp_to_pub/EasySpider_windows_x64/tasks/70.json
  10. +1
    -1
      .temp_to_pub/EasySpider_windows_x64/tasks/95.json
  11. +1
    -1
      .temp_to_pub/compress.py
  12. バイナリ
      ElectronJS/EasySpider_en.crx
  13. バイナリ
      ElectronJS/EasySpider_zh.crx
  14. +1
    -1
      ElectronJS/change_version.py
  15. +17
    -1
      ElectronJS/main.js
  16. +3
    -3
      ElectronJS/package.json
  17. +1
    -1
      ElectronJS/src/index.html
  18. +8
    -2
      ElectronJS/src/taskGrid/FlowChart.html
  19. +8
    -2
      ElectronJS/src/taskGrid/FlowChart_CN.html
  20. +3
    -3
      ElectronJS/src/taskGrid/logic.js
  21. +1
    -1
      ElectronJS/src/taskGrid/newTask.html
  22. +1
    -0
      ElectronJS/tasks/318.json
  23. +1
    -0
      ElectronJS/tasks/319.json
  24. +1
    -0
      ElectronJS/tasks/320.json
  25. +1
    -1
      ExecuteStage/.vscode/launch.json
  26. +21
    -2
      ExecuteStage/easyspider_executestage.py
  27. +1
    -1
      Extension/manifest_v3/package.json
  28. +1
    -1
      Extension/manifest_v3/src/manifest.json

+ 106
- 63
.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py ファイルの表示

@ -5,9 +5,10 @@ import copy
import platform
import shutil
import string
import threading
# import undetected_chromedriver as uc
from utils import detect_optimizable, download_image, extract_text_from_html, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, \
on_press_creator, on_release_creator, readCode, replace_field_values, send_email, split_text_by_lines, write_to_csv, write_to_excel, write_to_json
on_press_creator, on_release_creator, readCode, rename_downloaded_file, replace_field_values, send_email, split_text_by_lines, write_to_csv, write_to_excel, write_to_json
from myChrome import MyChrome
from threading import Thread, Event
from PIL import Image
@ -112,9 +113,13 @@ class BrowserThread(Thread):
self.print_and_log("Save Name for task ID", id, "is:", self.saveName)
if not os.path.exists("Data/Task_" + str(id)):
os.mkdir("Data/Task_" + str(id))
if not os.path.exists("Data/Task_" + str(id) + "/" + self.saveName):
os.mkdir("Data/Task_" + str(id) + "/" +
self.saveName) # 创建保存文件夹用来保存截图
self.downloadFolder = "Data/Task_" + str(id) + "/" + self.saveName
if not os.path.exists(self.downloadFolder):
os.mkdir(self.downloadFolder) # 创建保存文件夹用来保存截图和文件
if not os.path.exists(self.downloadFolder + "/files"):
os.mkdir(self.downloadFolder + "/files")
if not os.path.exists(self.downloadFolder + "/images"):
os.mkdir(self.downloadFolder + "/images")
self.getDataStep = 0
self.startSteps = 0
try:
@ -142,12 +147,21 @@ class BrowserThread(Thread):
self.print_and_log("Loading stealth.min.js")
self.browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
'source': js}) # TMALL 反扒
self.browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
WebDriverWait(self.browser, 10)
self.browser.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(self.id))
path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(self.id), self.saveName, "files")
self.paramss = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': path}}
self.browser.execute("send_command", self.paramss) # 下载地址改变
self.browser.execute("send_command", self.paramss) # 下载目录改变
self.monitor_event = threading.Event()
self.monitor_thread = threading.Thread(target=rename_downloaded_file, args=(path, self.monitor_event)) #path后面的逗号不能省略,是元组固定写法
self.monitor_thread.start()
# self.browser.get('about:blank')
self.procedure = service["graph"] # 程序执行流程
try:
@ -187,12 +201,19 @@ class BrowserThread(Thread):
self.links = list(filter(isnotnull, service["url"])) # 要执行的link
self.OUTPUT = [] # 采集的数据
try:
self.dataWriteMode = service["dataWriteMode"] # 数据写入模式,1为追加,2为覆盖
self.dataWriteMode = service["dataWriteMode"] # 数据写入模式,1为追加,2为覆盖,3为重命名文件
except:
self.dataWriteMode = 1
if self.outputFormat == "csv" or self.outputFormat == "txt" or self.outputFormat == "xlsx" or self.outputFormat == "json":
if self.dataWriteMode == 2 and os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat):
os.remove("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat)
if os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat):
if self.dataWriteMode == 2:
os.remove("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat)
elif self.dataWriteMode == 3:
i = 2
while os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '_' + str(i) + '.' + self.outputFormat):
i = i + 1
self.saveName = self.saveName + '_' + str(i)
self.print_and_log("文件已存在,已重命名为", self.saveName)
self.writeMode = 1 # 写入模式,0为新建,1为追加
if self.outputFormat == "csv" or self.outputFormat == "txt" or self.outputFormat == "xlsx":
if not os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat):
@ -521,7 +542,7 @@ class BrowserThread(Thread):
"/", len(self.links))
self.executeNode(0)
self.urlId = self.urlId + 1
files = os.listdir("Data/Task_" + str(self.id) + "/" + self.saveName)
# files = os.listdir("Data/Task_" + str(self.id) + "/" + self.saveName)
# 如果目录为空,则删除该目录
# if not files:
# os.rmdir("Data/Task_" + str(self.id) + "/" + self.saveName)
@ -544,6 +565,7 @@ class BrowserThread(Thread):
shutil.rmtree(self.option["tmp_user_data_folder"])
except:
pass
self.monitor_event.set()
self.print_and_log("清理完成!|Clean up completed!")
self.print_and_log("您现在可以安全的关闭此窗口了。|You can safely close this window now.")
@ -768,6 +790,8 @@ class BrowserThread(Thread):
elif int(codeMode) == 5:
try:
code = readCode(code)
# global_namespace = globals().copy()
# global_namespace["self"] = self
output = exec(code)
self.recordLog("执行下面的代码:" + code)
self.recordLog("Execute the following code:" + code)
@ -847,6 +871,23 @@ class BrowserThread(Thread):
self.print_and_log("根据设置的自定义操作,任务已刷新页面|Task refreshed page according to custom operation")
elif codeMode == 9: # 发送邮件
send_email(node["parameters"]["emailConfig"])
elif codeMode == 10: # 清空所有字段值
self.clearOutputParameters()
elif codeMode == 11: # 生成新的数据行
line = new_line(self.outputParameters,
self.maxViewLength, self.outputParametersRecord)
self.OUTPUT.append(line)
elif codeMode == 12: # 退出程序
self.print_and_log("根据设置的自定义操作,任务已退出|Task exited according to custom operation")
self.saveData(exit=True)
self.browser.quit()
self.print_and_log("正在清理临时用户目录……|Cleaning up temporary user directory...")
try:
shutil.rmtree(self.option["tmp_user_data_folder"])
except:
pass
self.print_and_log("清理完成!|Clean up completed!")
os._exit(0)
else: # 0 1 5 6
output = self.execute_code(
codeMode, code, max_wait_time, iframe=params["iframe"])
@ -1106,7 +1147,25 @@ class BrowserThread(Thread):
self.recordLog(
"判断条件内所有条件分支的条件都不满足|None of the conditions in the judgment condition are met")
def handleHistory(self, node, xpath, thisHistoryURL, thisHistoryLength, index, element=None, elements=None):
def handleHistory(self, node, xpath, thisHandle, thisHistoryURL, thisHistoryLength, index, element=None, elements=None):
try:
changed_handle = self.browser.current_window_handle != thisHandle
except: # 如果网页被意外关闭了的情况下
self.browser.switch_to.window(
self.browser.window_handles[-1])
changed_handle = self.browser.window_handles[-1] != thisHandle
if changed_handle: # 如果执行完一次循环之后标签页的位置发生了变化
try:
while True: # 一直关闭窗口直到当前标签页
self.browser.close() # 关闭使用完的标签页
self.browser.switch_to.window(
self.browser.window_handles[-1])
if self.browser.current_window_handle == thisHandle:
break
except Exception as e:
self.print_and_log("关闭标签页发生错误:", e)
self.print_and_log(
"Error occurred while closing tab: ", e)
if self.history["index"] != thisHistoryLength and self.history["handle"] == self.browser.current_window_handle: # 如果执行完一次循环之后历史记录发生了变化,注意当前页面的判断
difference = thisHistoryLength - self.history["index"] # 计算历史记录变化差值
self.browser.execute_script('history.go(' + str(difference) + ')') # 回退历史记录
@ -1132,12 +1191,13 @@ class BrowserThread(Thread):
if self.browser.current_url == thisHistoryURL or ti > thisHistoryLength: # 如果执行完一次循环之后网址发生了变化
break
time.sleep(2)
if element == None: # 不固定元素列表
element = self.browser.find_elements(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
else: # 固定元素列表
element = self.browser.find_element(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
# if index > 0:
# index -= 1 # 如果是data:开头的网址,就要重试一次
if xpath != "":
if element == None: # 不固定元素列表
element = self.browser.find_elements(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
else: # 固定元素列表
element = self.browser.find_element(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
# if index > 0:
# index -= 1 # 如果是data:开头的网址,就要重试一次
else:
if element == None:
element = elements
@ -1321,25 +1381,7 @@ class BrowserThread(Thread):
if self.BREAK:
self.BREAK = False
break
try:
changed_handle = self.browser.current_window_handle != thisHandle
except: # 如果网页被意外关闭了的情况下
self.browser.switch_to.window(
self.browser.window_handles[-1])
changed_handle = self.browser.window_handles[-1] != thisHandle
if changed_handle: # 如果执行完一次循环之后标签页的位置发生了变化
try:
while True: # 一直关闭窗口直到当前标签页
self.browser.close() # 关闭使用完的标签页
self.browser.switch_to.window(
self.browser.window_handles[-1])
if self.browser.current_window_handle == thisHandle:
break
except Exception as e:
self.print_and_log("关闭标签页发生错误:", e)
self.print_and_log(
"Error occurred while closing tab: ", e)
index, elements = self.handleHistory(node, xpath, thisHistoryURL, thisHistoryLength, index, elements=elements)
index, elements = self.handleHistory(node, xpath, thisHandle, thisHistoryURL, thisHistoryLength, index, elements=elements)
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
output = self.execute_code(int(
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"],
@ -1381,25 +1423,7 @@ class BrowserThread(Thread):
if self.BREAK:
self.BREAK = False
break
try:
changed_handle = self.browser.current_window_handle != thisHandle
except: # 如果网页被意外关闭了的情况下
self.browser.switch_to.window(
self.browser.window_handles[-1])
changed_handle = self.browser.window_handles[-1] != thisHandle
if changed_handle: # 如果执行完一次循环之后标签页的位置发生了变化
try:
while True: # 一直关闭窗口直到当前标签页
self.browser.close() # 关闭使用完的标签页
self.browser.switch_to.window(
self.browser.window_handles[-1])
if self.browser.current_window_handle == thisHandle:
break
except Exception as e:
self.print_and_log("关闭标签页发生错误:", e)
self.print_and_log(
"Error occurred while closing tab: ", e)
index, element = self.handleHistory(node, path, thisHistoryURL, thisHistoryLength, index, element=element)
index, element = self.handleHistory(node, path, thisHandle, thisHistoryURL, thisHistoryLength, index, element=element)
except NoSuchElementException:
self.print_and_log("Loop element not found: ", path)
self.print_and_log("找不到循环元素:", path)
@ -1447,6 +1471,7 @@ class BrowserThread(Thread):
code = get_output_code(output)
if code <= 0:
break
index, _ = self.handleHistory(node, "", thisHandle, thisHistoryURL, thisHistoryLength, index)
elif int(node["parameters"]["loopType"]) == 4: # 固定网址列表
# tempList = node["parameters"]["textList"].split("\r\n")
urlList = list(
@ -1715,6 +1740,21 @@ class BrowserThread(Thread):
script = 'var result = document.evaluate(`' + path + \
'`, document, null, XPathResult.ANY_TYPE, null);for(let i=0;i<arguments[0];i++){result.iterateNext();} result.iterateNext().click();'
self.browser.execute_script(script, str(index)) # 用js的点击方法
elif click_way == 2: # 双击
try:
actions = ActionChains(self.browser) # 实例化一个action对象
actions.double_click(element).perform()
except Exception as e:
self.browser.execute_script("arguments[0].scrollIntoView();", element)
try:
actions = ActionChains(self.browser) # 实例化一个action对象
actions.double_click(element).perform()
except Exception as e:
self.print_and_log(f"Selenium双击元素{path}失败,将尝试使用JavaScript双击")
self.print_and_log(f"Failed to double click element {path} with Selenium, will try to double click with JavaScript")
script = 'var result = document.evaluate(`' + path + \
'`, document, null, XPathResult.ANY_TYPE, null);for(let i=0;i<arguments[0];i++){result.iterateNext();} result.iterateNext().click();'
self.browser.execute_script(script, str(index)) # 用js的点击方法
self.recordLog("点击元素|Click element: " + path)
except TimeoutException:
self.print_and_log(
@ -1797,7 +1837,6 @@ class BrowserThread(Thread):
self.print_and_log("History Length Error")
self.history["index"] = 0
self.scrollDown(param) # 根据参数配置向下滚动
# rt.end()
def get_content(self, p, element):
content = ""
@ -1824,7 +1863,7 @@ class BrowserThread(Thread):
downloadPic = 0
if downloadPic == 1:
download_image(self, content, "Data/Task_" +
str(self.id) + "/" + self.saveName + "/", element)
str(self.id) + "/" + self.saveName + "/images", element)
else: # 普通节点
if p["splitLine"] == 1:
text = extract_text_from_html(element.get_attribute('outerHTML'))
@ -1853,7 +1892,7 @@ class BrowserThread(Thread):
downloadPic = 0
if downloadPic == 1:
download_image(self, content, "Data/Task_" +
str(self.id) + "/" + self.saveName + "/", element)
str(self.id) + "/" + self.saveName + "/images", element)
else:
command = 'var arr = [];\
var content = arguments[0];\
@ -1965,6 +2004,8 @@ class BrowserThread(Thread):
content = element.get_attribute(attribute_name)
except:
content = ""
elif p["contentType"] == 15: # 常量值
content = p["JS"]
if content == None:
content = ""
return content
@ -2208,7 +2249,7 @@ if __name__ == '__main__':
"server_address": "http://localhost:8074",
"keyboard": True, # 是否监听键盘输入
"pause_key": "p", # 暂停键
"version": "0.6.0",
"version": "0.6.2",
}
c = Config(config)
print(c)
@ -2283,7 +2324,9 @@ if __name__ == '__main__':
options.add_argument(
"--disable-blink-features=AutomationControlled") # TMALL 反扒
# 阻止http -> https的重定向
options.add_argument("--disable-features=CrossSiteDocumentBlockingIfIsolating,CrossSiteDocumentBlockingAlways,IsolateOrigins,site-per-process")
options.add_argument("--disable-web-security") # 禁用同源策略
options.add_argument('-ignore-certificate-errors')
options.add_argument('-ignore -ssl-errors')
@ -2370,8 +2413,8 @@ if __name__ == '__main__':
cloudflare = 0
if cloudflare == 0:
options.add_argument('log-level=3') # 隐藏日志
path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(id))
print("Data path:", path)
path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(id), "files")
print("文件下载路径|File Download path:", path)
options.add_experimental_option("prefs", {
# 设置文件下载路径
"download.default_directory": path,

+ 34
- 3
.temp_to_pub/EasySpider_windows_x64/Code/utils.py ファイルの表示

@ -59,7 +59,31 @@ def send_email(config):
smtp_server.quit()
except:
pass
def rename_downloaded_file(download_dir, stop_event):
original_files = set(os.listdir(download_dir))
while not stop_event.is_set():
files = os.listdir(download_dir)
for file in files:
if file in original_files:
continue # 跳过原始文件和已重命名的文件
full_path = os.path.join(download_dir, file)
if not full_path.endswith('.crdownload') and not full_path.endswith('.htm') and not full_path.endswith('.html') and not full_path.startswith('esfile_'):
new_name = "esfile_" + file.split('/')[-1] + '_' + str(uuid.uuid4()) + '_' + file.split('/')[-1]
new_path = os.path.join(download_dir, new_name)
try:
os.rename(full_path, new_path)
original_files.add(new_name) # 记录新文件名以避免再次重命名
print(f"文件已重命名为|File has been renamed to: {new_path}")
except:
print("文件重命名失败|File rename failed")
time.sleep(1) # 每一秒检查一次
# print("下载文件重命名监控中,请等待...|Download file rename monitoring, please wait...")
print("下载文件重命名监控已停止。|Download file rename monitoring has stopped.")
def is_valid_url(url):
try:
@ -505,10 +529,17 @@ def write_to_excel(file_name, data, types, record):
for i in range(len(line)):
if record[i]:
to_write.append(line[i])
ws.append(to_write)
try:
ws.append(to_write)
except:
print("写入Excel文件失败,请检查数据类型是否正确。")
print("Failed to write to Excel file, please check if the data type is correct.")
# 保存工作簿
wb.save(file_name)
try:
wb.save(file_name)
except:
print("保存Excel文件失败,请检查文件是否被其他程序打开。")
print("Failed to save Excel file, please check if the file is opened by other programs.")
class Time:
def __init__(self, type1=""):

+ 18
- 1
.temp_to_pub/EasySpider_windows_x64/myCode.py ファイルの表示

@ -23,7 +23,7 @@ For more complex operations, please download the source code and compile it for
"""
# 请在下面编写你的代码,不要有代码缩进!!! | Please write your code below, do not indent the code!!!
print(globals())
# 导包 | Import packages
from selenium.common.exceptions import ElementClickInterceptedException
@ -56,3 +56,20 @@ finally:
print("All parameters:", self.outputParameters)
print(test(3))
print("执行完毕|Execution completed")
import time
time.sleep(3)
def new_line(outputParameters, maxViewLength, record):
line = []
print("Use this function to print a new line in the console")
i = 0
for value in outputParameters.values():
line.append(value)
if record[i]:
print(value[:maxViewLength], " ", end="")
i += 1
print("")
return line
new_line(self.outputParameters, 10, [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True])

+ 1
- 1
.temp_to_pub/EasySpider_windows_x64/tasks/112.json
ファイル差分が大きすぎるため省略します
ファイルの表示


+ 871
- 1
.temp_to_pub/EasySpider_windows_x64/tasks/212.json
ファイル差分が大きすぎるため省略します
ファイルの表示


+ 1
- 1
.temp_to_pub/EasySpider_windows_x64/tasks/228.json ファイルの表示

@ -1 +1 @@
{"id":228,"name":"[2312.02977] Exploring the nonclassical dynamics of the \"classical'' Schrödinger equation","url":"https://arxiv.org/abs/2312.02977","links":"https://arxiv.org/abs/2312.02977","create_time":"12/7/2023, 2:44:58 AM","update_time":"12/7/2023, 2:56:47 AM","version":"0.6.0","saveThreshold":10,"quitWaitTime":60,"environment":1,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"desc":"https://arxiv.org/abs/2312.02977","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://arxiv.org/abs/2312.02977","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://arxiv.org/abs/2312.02977"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://arxiv.org/abs/2312.02977","links":"https://arxiv.org/abs/2312.02977","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":2,"title":"点击Download PDF","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"download-pdf\")]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":["/html/body/div[2]/main[1]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[1]/a[1]","//a[contains(., 'Download P')]","//A[@class='abs-button download-pdf']","/html/body/div[last()-3]/main/div/div/div[last()-2]/div[last()-5]/ul/li[last()-2]/a"]}}]}
{"id":228,"name":"[2312.02977] Exploring the nonclassical dynamics of the \"classical'' Schrödinger equation","url":"https://arxiv.org/abs/2312.02977","links":"https://arxiv.org/abs/2312.02977","create_time":"12/7/2023, 2:44:58 AM","update_time":"2024-01-05 22:08:46","version":"0.6.0","saveThreshold":10,"quitWaitTime":3,"environment":1,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"TTT","dataWriteMode":3,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","removeDuplicate":0,"desc":"https://arxiv.org/abs/2312.02977","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://arxiv.org/abs/2312.02977","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://arxiv.org/abs/2312.02977"},{"id":1,"name":"loopTimes_1","nodeId":5,"nodeName":"循环 - 单个元素","desc":"循环循环 - 单个元素执行的次数(0代表无限循环)","type":"int","exampleValue":10,"value":10}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,5],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://arxiv.org/abs/2312.02977","links":"https://arxiv.org/abs/2312.02977","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":3,"index":2,"parentId":2,"type":0,"option":2,"title":"点击Download PDF","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"download-pdf\")]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":["/html/body/div[2]/main[1]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[1]/a[1]","//a[contains(., 'Download P')]","//A[@class='abs-button download-pdf']","/html/body/div[last()-3]/main/div/div/div[last()-2]/div[last()-5]/ul/li[last()-2]/a"]}},{"id":-1,"index":3,"parentId":0,"type":0,"option":2,"title":"点击Download PDF","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"download-pdf\")]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":["/html/body/div[2]/main[1]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[1]/a[1]","//a[contains(., 'Download P')]","//A[@class='abs-button download-pdf']","/html/body/div[last()-3]/main/div/div/div[last()-2]/div[last()-5]/ul/li[last()-2]/a"]}},{"id":-1,"index":4,"parentId":0,"type":0,"option":2,"title":"点击Download PDF","sequence":[],"isInLoop":false,"position":3,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"download-pdf\")]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":["/html/body/div[2]/main[1]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[1]/a[1]","//a[contains(., 'Download P')]","//A[@class='abs-button download-pdf']","/html/body/div[last()-3]/main/div/div/div[last()-2]/div[last()-5]/ul/li[last()-2]/a"]}},{"id":2,"index":5,"parentId":0,"type":1,"option":8,"title":"循环 - 单个元素","sequence":[2],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"//body","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":0,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":10,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"skipCount":0}}]}

+ 1
- 1
.temp_to_pub/EasySpider_windows_x64/tasks/229.json ファイルの表示

@ -1 +1 @@
{"id":229,"name":"知乎 - 有问题,就会有答案","url":"https://www.zhihu.com","links":"https://www.zhihu.com","create_time":"07/12/2023, 03:26:24","update_time":"07/12/2023, 03:43:34","version":"0.6.0","saveThreshold":10,"quitWaitTime":6,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"pauseKey":"t","containJudge":false,"desc":"https://www.zhihu.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环采集数据","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","//div[contains(., '死刑执行前可以谎称肚')]","/html/body/div[last()-7]/div/main/div/div/div[last()-1]/div/div/div/div/div/div[last()-12]/div/div/div/div/h2/div"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"params":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"unique_index":"onlvi030w9jlpu5tjzb","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
{"id":229,"name":"知乎 - 有问题,就会有答案","url":"https://www.zhihu.com","links":"https://www.zhihu.com","create_time":"07/12/2023, 03:26:24","update_time":"2023-12-27 20:05:50","version":"0.6.0","saveThreshold":10,"quitWaitTime":6,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"t","containJudge":false,"browser":"chrome","removeDuplicate":0,"desc":"知了个乎","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.zhihu.com"},{"id":1,"name":"loopTimes_1","nodeId":4,"nodeName":"循环 - 单个元素","desc":"循环循环 - 单个元素执行的次数(0代表无限循环)","type":"int","exampleValue":0,"value":0}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,4,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":3,"index":2,"parentId":0,"type":1,"option":8,"title":"循环采集数据","sequence":[3],"isInLoop":false,"position":2,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","//div[contains(., '死刑执行前可以谎称肚')]","/html/body/div[last()-7]/div/main/div/div/div[last()-1]/div/div/div/div/div/div[last()-12]/div/div/div/div/h2/div"]}},{"id":4,"index":3,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"params":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"unique_index":"onlvi030w9jlpu5tjzb","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}},{"id":2,"index":4,"parentId":0,"type":1,"option":8,"title":"循环 - 单个元素","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":0,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"skipCount":0}}]}

+ 1
- 1
.temp_to_pub/EasySpider_windows_x64/tasks/295.json
ファイル差分が大きすぎるため省略します
ファイルの表示


+ 1
- 1
.temp_to_pub/EasySpider_windows_x64/tasks/70.json ファイルの表示

@ -1 +1 @@
{"id":70,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/24/2023, 8:21:45 PM","version":"0.3.1","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":7,"title":"移动到元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":2,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"allXPaths":"","loopType":1}}]}
{"id":-2,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/24/2023, 8:21:45 PM","version":"0.3.1","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":7,"title":"移动到元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":2,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"allXPaths":"","loopType":1}}]}

+ 1
- 1
.temp_to_pub/EasySpider_windows_x64/tasks/95.json
ファイル差分が大きすぎるため省略します
ファイルの表示


+ 1
- 1
.temp_to_pub/compress.py ファイルの表示

@ -64,7 +64,7 @@ def compress_folder_to_7z_split(folder_path, output_file):
except:
subprocess.call(["7zz", "a", "-v95m", output_file, folder_path])
easyspider_version = "0.6.0"
easyspider_version = "0.6.2"
if __name__ == "__main__":

バイナリ
ElectronJS/EasySpider_en.crx ファイルの表示


バイナリ
ElectronJS/EasySpider_zh.crx ファイルの表示


+ 1
- 1
ElectronJS/change_version.py ファイルの表示

@ -30,7 +30,7 @@ def update_file_version(file_path, new_version, key="当前版本/Current Versio
file.write(line)
version = "0.6.0"
version = "0.6.2"
# py html js

+ 17
- 1
ElectronJS/main.js ファイルの表示

@ -651,7 +651,11 @@ async function beginInvoke(msg, ws) {
if (parameters.xpath.includes("point(")) {
await click_element(element, point);
} else {
await click_element(element);
if (parameters.clickWay == 2){ //双击
await click_element(element, "double");
} else {
await click_element(element); //单击
}
}
let alertHandleType = parameters.alertHandleType;
if (alertHandleType == 1) {
@ -1002,6 +1006,14 @@ async function beginInvoke(msg, ws) {
"Attribute value obtained: " + result,
"success"
);
} else if(param.contentType == 15) {
//元素的属性值
let result = param.JS;
notify_browser(
"获取的常量值:" + result,
"Constant value obtained: " + result,
"success"
);
} else {
//其他暂不支持
notify_browser(
@ -1130,6 +1142,8 @@ async function click_element(element, type = "click") {
// await actions.click().perform();
let script = `document.elementFromPoint(${x}, ${y}).click();`;
await driver.executeScript(script);
} else if (type == "double") {
await driver.actions().doubleClick(element).perform();
} else {
await element.click();
}
@ -1341,6 +1355,8 @@ async function runBrowser(lang = "en", user_data_folder = "", mobile = false) {
let options = new chrome.Options();
options.addArguments("--disable-blink-features=AutomationControlled");
options.addArguments("--disable-infobars");
options.addArguments("--disable-web-security");
options.addArguments("--disable-features=CrossSiteDocumentBlockingIfIsolating,CrossSiteDocumentBlockingAlways,IsolateOrigins,site-per-process");
// 添加实验性选项以排除'enable-automation'开关
options.set("excludeSwitches", ["enable-automation"]);
options.excludeSwitches("enable-automation");

+ 3
- 3
ElectronJS/package.json ファイルの表示

@ -1,7 +1,7 @@
{
"name": "easy-spider",
"productName": "EasySpider",
"version": "0.6.0",
"version": "0.6.2",
"icon": "./favicon",
"description": "NoCode Visual Web Crawler",
"main": "main.js",
@ -67,7 +67,7 @@
],
"packagerConfig": {
"icon": "./favicon",
"appVersion": "0.6.0",
"appVersion": "0.6.2",
"name": "EasySpider",
"executableName": "EasySpider",
"appCopyright": "Naibo Wang (naibowang@foxmail.com)",
@ -80,4 +80,4 @@
"publishers": []
}
}
}
}

+ 1
- 1
ElectronJS/src/index.html ファイルの表示

@ -40,7 +40,7 @@
<p><a @click="changeLang('en')" class="btn btn-outline-primary btn-lg"
style="margin-top: 15px; width: 300px;height:60px;padding-top:12px;">English</a></p>
<p style="font-size: 17px">当前版本/Current Version: <b>v0.6.0</b></p>
<p style="font-size: 17px">当前版本/Current Version: <b>v0.6.2</b></p>
<p style="font-size: 17px"><a href="https://github.com/NaiboWang/EasySpider/releases"
target="_blank">Github</a>最新版本/Newest Version:<b>{{newest_version}}</b></p>
<!-- <p>如发现新版本更新,可从以下Github仓库下载最新版本使用/If a new version is found, you can download the latest version from the following Github repository:</p>-->

+ 8
- 2
ElectronJS/src/taskGrid/FlowChart.html ファイルの表示

@ -170,10 +170,11 @@
</div>
<label>Maximum wait time for page load after clicking (in seconds):</label>
<input spellcheck=false onkeydown="inputDelete(event)" class="form-control" v-model.number="nowNode['parameters']['maxWaitTime']" type="number" required></input>
<label>Click Type:</label>
<label>Click Type (including double-click):</label>
<select v-model='nowNode["parameters"]["clickWay"]' class="form-control">
<option :value = 0>Selenium</option>
<option :value = 1>JavaScript</option>
<option :value = 2>Double-click</option>
</select>
<label>Open link in new tab:</label>
<select v-model='nowNode["parameters"]["newTab"]' class="form-control">
@ -271,6 +272,7 @@
<option :value = 4>Background Image Address</option>
<option :value = 5>Webpage URL</option>
<option :value = 6>Webpage Title</option>
<option :value = 15>Constant String</option>
<option :value = 7>Element Screenshot</option>
<option :value = 8>OCR Results</option>
<option :value = 14>Properties of elements</option>
@ -280,7 +282,11 @@
<option :value = 10>Selected value of the current select box</option>
<option :value = 11>Selected text of the current select box</option>
</select>
<div v-if='params.parameters[paraIndex]["contentType"] == 14'>
<div v-if='params.parameters[paraIndex]["contentType"] == 15'>
<label>Constant String:</label>
<input spellcheck=false onkeydown="inputDelete(event)" class="form-control" v-model='params.parameters[paraIndex]["JS"]' placeholder="This field type is usually used for remarks"></input>
</div>
<div v-else-if='params.parameters[paraIndex]["contentType"] == 14'>
<label>Attribute Name:</label>
<input spellcheck=false onkeydown="inputDelete(event)" class="form-control" v-model='params.parameters[paraIndex]["JS"]' placeholder="Attribute names, such as href to represent the href attribute of the current element, that is, the link address."></input>
</div>

+ 8
- 2
ElectronJS/src/taskGrid/FlowChart_CN.html ファイルの表示

@ -170,10 +170,11 @@
</div>
<label>点击后页面加载最长等待时间(秒):</label>
<input spellcheck=false onkeydown="inputDelete(event)" class="form-control" v-model.number="nowNode['parameters']['maxWaitTime']" type="number" required></input>
<label>点击类型:</label>
<label>点击类型(如是否双击)</label>
<select v-model='nowNode["parameters"]["clickWay"]' class="form-control">
<option :value = 0>Selenium点击</option>
<option :value = 1>JavaScript点击</option>
<option :value = 2>双击</option>
</select>
<label>在新标签页打开超链接:</label>
<select v-model='nowNode["parameters"]["newTab"]' class="form-control">
@ -271,6 +272,7 @@
<option :value = 4>背景图片地址</option>
<option :value = 5>页面网址</option>
<option :value = 6>页面标题</option>
<option :value = 15>常量字符串</option>
<option :value = 7>元素截图</option>
<option :value = 8>OCR识别文字</option>
<option :value = 14>元素的属性值</option>
@ -280,7 +282,11 @@
<option :value = 10>当前选择框选中的选项值</option>
<option :value = 11>当前选择框选中的选项文本</option>
</select>
<div v-if='params.parameters[paraIndex]["contentType"] == 14'>
<div v-if='params.parameters[paraIndex]["contentType"] == 15'>
<label>常量字符串:</label>
<input spellcheck=false onkeydown="inputDelete(event)" class="form-control" v-model='params.parameters[paraIndex]["JS"]' placeholder="此字段类型通常作为备注使用"></input>
</div>
<div v-else-if='params.parameters[paraIndex]["contentType"] == 14'>
<label>属性名称:</label>
<input spellcheck=false onkeydown="inputDelete(event)" class="form-control" v-model='params.parameters[paraIndex]["JS"]' placeholder="属性名称,如class表示当前元素的class属性值,即元素所拥有的类名。"></input>
</div>

+ 3
- 3
ElectronJS/src/taskGrid/logic.js ファイルの表示

@ -446,7 +446,7 @@ function modifyParameters(t, param) {
}
}
function showSuccess(msg, time = 4000) {
function showSuccess(msg, time = 1000) {
$("#tip").text(msg);
$("#tip").slideDown(); //提示框
let fadeout = setTimeout(function () {
@ -491,7 +491,7 @@ if (mobile == "true") {
}
let serviceInfo = {
"version": "0.6.0"
"version": "0.6.2"
};
function saveService(type) {
@ -625,7 +625,7 @@ function saveService(type) {
"links": links,
"create_time": $("#create_time").val(),
"update_time": formatDateTime(new Date()),
"version": "0.6.0",
"version": "0.6.2",
"saveThreshold": saveThreshold,
// "cloudflare": cloudflare,
"quitWaitTime": parseInt($("#quitWaitTime").val()),

+ 1
- 1
ElectronJS/src/taskGrid/newTask.html ファイルの表示

@ -33,7 +33,7 @@
<h4 style="text-align: center;">{{"New Task~新任务" | lang}}</h4>
<div class="form-group">
<label>{{"Please Input URL (http or https):~请输入网页网址(以http或https开头):" | lang}} </label>
<textarea class="form-control" id="links" placeholder="links" style="min-height: 100px;">{{"https://www.ebay.com~https://www.jd.com" | lang}}</textarea>
<textarea class="form-control" id="links" placeholder="links" style="min-height: 100px;">{{"https://www.ebay.com~https://www.baidu.com" | lang}}</textarea>
</div>
<button type="submit" id="send" class="btn btn-primary">{{"Start Design~开始设计" | lang}}</button>
<!-- <div class="form-group" style="margin-top: 10px">-->

+ 1
- 0
ElectronJS/tasks/318.json ファイルの表示

@ -0,0 +1 @@
{"id":318,"name":"京东(JD.COM)-正品低价、品质保障、配送及时、轻松购物!","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"2024-04-22 05:08:03","update_time":"2024-04-22 05:19:48","version":"0.6.2","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"csv","saveName":"current_time","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","removeDuplicate":0,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"电脑数码"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://prodev.jd.com/mall/active/31XPWPTonxJ9e5YoQ85HS7z8XNYQ/index.html?babelChannel=ttt40"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环采集数据","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[4]/div[1]/div[4]/ul[1]/li/a[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"skipCount":0,"allXPaths":["/html/body/div[1]/div[4]/div[1]/div[4]/ul[1]/li[1]/a[1]","//a[contains(., '电脑数码')]","//A[@class='navitems-lk']","/html/body/div[last()-5]/div[last()-2]/div/div[last()-1]/ul/li[last()-8]/a"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"params":[{"nodeType":1,"contentType":15,"relative":true,"name":"参数1_链接文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"电脑数码"}],"unique_index":"auwkv5g1krqlva0tsc4","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"123","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0,"splitLine":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数2_链接地址","desc":"","relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://prodev.jd.com/mall/active/31XPWPTonxJ9e5YoQ85HS7z8XNYQ/index.html?babelChannel=ttt40"}],"unique_index":"auwkv5g1krqlva0tsc4","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0,"splitLine":0}]}}]}

+ 1
- 0
ElectronJS/tasks/319.json ファイルの表示

@ -0,0 +1 @@
{"id":-2,"name":"百度一下,你就知道","url":"https://www.baidu.com?id=1","links":"https://www.baidu.com?id=11\nhttps://www.baidu.com?id=12","create_time":"2024-04-22 05:45:12","update_time":"2024-04-22 05:45:20","version":"0.6.2","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"csv","saveName":"current_time","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","removeDuplicate":0,"desc":"https://www.baidu.com?id=1","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.baidu.com?id=11\nhttps://www.baidu.com?id=12","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.baidu.com?id=11\nhttps://www.baidu.com?id=12"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.baidu.com?id=1","links":"https://www.baidu.com?id=11\nhttps://www.baidu.com?id=12","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}}]}

+ 1
- 0
ElectronJS/tasks/320.json ファイルの表示

@ -0,0 +1 @@
{"id":320,"name":"百度一下,你就知道","url":"https://www.baidu.com","links":"https://www.baidu.com","create_time":"2024-04-22 05:53:18","update_time":"2024-04-22 05:53:28","version":"0.6.2","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"csv","saveName":"current_time","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","removeDuplicate":0,"desc":"https://www.baidu.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.baidu.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.baidu.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.baidu.com","links":"https://www.baidu.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环点击每个元素","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[2]/div[1]/div[5]/div[1]/div[1]/div[3]/ul[1]/li/a[1]/span[2]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"skipCount":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"newTab":1,"maxWaitTime":10,"params":[],"alertHandleType":0,"downloadWaitTime":3600,"allXPaths":""}}]}

+ 1
- 1
ExecuteStage/.vscode/launch.json ファイルの表示

@ -12,7 +12,7 @@
"justMyCode": false,
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
"args": ["--ids", "[79]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
"args": ["--ids", "[83]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
"--read_type", "remote"]
// "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
}

+ 21
- 2
ExecuteStage/easyspider_executestage.py ファイルの表示

@ -1740,6 +1740,21 @@ class BrowserThread(Thread):
script = 'var result = document.evaluate(`' + path + \
'`, document, null, XPathResult.ANY_TYPE, null);for(let i=0;i<arguments[0];i++){result.iterateNext();} result.iterateNext().click();'
self.browser.execute_script(script, str(index)) # 用js的点击方法
elif click_way == 2: # 双击
try:
actions = ActionChains(self.browser) # 实例化一个action对象
actions.double_click(element).perform()
except Exception as e:
self.browser.execute_script("arguments[0].scrollIntoView();", element)
try:
actions = ActionChains(self.browser) # 实例化一个action对象
actions.double_click(element).perform()
except Exception as e:
self.print_and_log(f"Selenium双击元素{path}失败,将尝试使用JavaScript双击")
self.print_and_log(f"Failed to double click element {path} with Selenium, will try to double click with JavaScript")
script = 'var result = document.evaluate(`' + path + \
'`, document, null, XPathResult.ANY_TYPE, null);for(let i=0;i<arguments[0];i++){result.iterateNext();} result.iterateNext().click();'
self.browser.execute_script(script, str(index)) # 用js的点击方法
self.recordLog("点击元素|Click element: " + path)
except TimeoutException:
self.print_and_log(
@ -1989,6 +2004,8 @@ class BrowserThread(Thread):
content = element.get_attribute(attribute_name)
except:
content = ""
elif p["contentType"] == 15: # 常量值
content = p["JS"]
if content == None:
content = ""
return content
@ -2232,7 +2249,7 @@ if __name__ == '__main__':
"server_address": "http://localhost:8074",
"keyboard": True, # 是否监听键盘输入
"pause_key": "p", # 暂停键
"version": "0.6.0",
"version": "0.6.2",
}
c = Config(config)
print(c)
@ -2307,7 +2324,9 @@ if __name__ == '__main__':
options.add_argument(
"--disable-blink-features=AutomationControlled") # TMALL 反扒
# 阻止http -> https的重定向
options.add_argument("--disable-features=CrossSiteDocumentBlockingIfIsolating,CrossSiteDocumentBlockingAlways,IsolateOrigins,site-per-process")
options.add_argument("--disable-web-security") # 禁用同源策略
options.add_argument('-ignore-certificate-errors')
options.add_argument('-ignore -ssl-errors')

+ 1
- 1
Extension/manifest_v3/package.json ファイルの表示

@ -1,6 +1,6 @@
{
"name": "EasySpider",
"version": "0.6.0",
"version": "0.6.2",
"type": "module",
"scripts": {
"build": "rollup -c",

+ 1
- 1
Extension/manifest_v3/src/manifest.json ファイルの表示

@ -1,6 +1,6 @@
{
"name": "EasySpider",
"version": "0.6.0",
"version": "0.6.2",
"description": "EasySpider's chrome extension",
"author": "Naibo Wang",
"manifest_version": 3,

読み込み中…
キャンセル
保存