|
|
@ -1,5 +1,6 @@ |
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
# import atexit |
|
|
|
import undetected_chromedriver as uc |
|
|
|
from utils import download_image, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, replace_field_values, write_to_csv, write_to_excel, write_to_json |
|
|
|
from myChrome import MyChrome |
|
|
|
from threading import Thread, Event |
|
|
@ -41,7 +42,6 @@ from urllib.parse import urljoin |
|
|
|
from lxml import etree |
|
|
|
import onnxruntime |
|
|
|
onnxruntime.set_default_logger_severity(3) # 隐藏onnxruntime的日志 |
|
|
|
import undetected_chromedriver as uc |
|
|
|
# import pandas as pd |
|
|
|
# import numpy |
|
|
|
# import pytesseract |
|
|
@ -157,7 +157,7 @@ class BrowserThread(Thread): |
|
|
|
self.OUTPUT.append([]) # 添加表头 |
|
|
|
self.writeMode = 0 |
|
|
|
elif self.outputFormat == "json": |
|
|
|
self.writeMode = 3 # JSON模式无需判断是否存在文件 |
|
|
|
self.writeMode = 3 # JSON模式无需判断是否存在文件 |
|
|
|
elif self.outputFormat == "mysql": |
|
|
|
self.mysql = myMySQL(config["mysql_config_path"]) |
|
|
|
self.mysql.create_table(self.saveName, service["outputParameters"]) |
|
|
@ -409,7 +409,8 @@ class BrowserThread(Thread): |
|
|
|
elif self.outputFormat == "json": |
|
|
|
file_name = "Data/Task_" + \ |
|
|
|
str(self.id) + "/" + self.saveName + '.json' |
|
|
|
write_to_json(file_name, self.OUTPUT, self.outputParametersTypes, self.outputParametersRecord, self.outputParameters.keys()) |
|
|
|
write_to_json(file_name, self.OUTPUT, self.outputParametersTypes, |
|
|
|
self.outputParametersRecord, self.outputParameters.keys()) |
|
|
|
elif self.outputFormat == "mysql": |
|
|
|
self.mysql.write_to_mysql( |
|
|
|
self.OUTPUT, self.outputParametersRecord, self.outputParametersTypes) |
|
|
@ -647,7 +648,8 @@ class BrowserThread(Thread): |
|
|
|
optionValue = loopValue |
|
|
|
optionMode = 1 |
|
|
|
try: |
|
|
|
xpath = replace_field_values(para["xpath"], self.outputParameters, self) |
|
|
|
xpath = replace_field_values( |
|
|
|
para["xpath"], self.outputParameters, self) |
|
|
|
dropdown = Select(self.browser.find_element( |
|
|
|
By.XPATH, xpath, iframe=para["iframe"])) |
|
|
|
try: |
|
|
@ -678,7 +680,8 @@ class BrowserThread(Thread): |
|
|
|
def moveToElement(self, para, loopElement=None, loopPath="", index=0): |
|
|
|
time.sleep(0.1) # 移动之前等待0.1秒 |
|
|
|
loopPath = replace_field_values(loopPath, self.outputParameters, self) |
|
|
|
xpath = replace_field_values(para["xpath"], self.outputParameters, self) |
|
|
|
xpath = replace_field_values( |
|
|
|
para["xpath"], self.outputParameters, self) |
|
|
|
if para["useLoop"]: # 使用循环的情况下,传入的clickPath就是实际的xpath |
|
|
|
if xpath == "": |
|
|
|
path = loopPath |
|
|
@ -873,8 +876,11 @@ class BrowserThread(Thread): |
|
|
|
def loopExecute(self, node, loopValue, clickPath="", index=0): |
|
|
|
time.sleep(0.1) # 第一次执行循环的时候强制等待1秒 |
|
|
|
thisHandle = self.browser.current_window_handle # 记录本次循环内的标签页的ID |
|
|
|
thisHistoryLength = self.browser.execute_script( |
|
|
|
'return history.length') # 记录本次循环内的history的length |
|
|
|
try: |
|
|
|
thisHistoryLength = self.browser.execute_script( |
|
|
|
'return history.length') # 记录本次循环内的history的length |
|
|
|
except: |
|
|
|
thisHistoryLength = 0 |
|
|
|
self.history["index"] = thisHistoryLength |
|
|
|
self.history["handle"] = thisHandle |
|
|
|
if int(node["parameters"]["loopType"]) == 0: # 单个元素循环 |
|
|
@ -1009,7 +1015,7 @@ class BrowserThread(Thread): |
|
|
|
# else: |
|
|
|
# time.sleep(2) |
|
|
|
# 切换历史记录等待: |
|
|
|
self.recordLog("Change history back time or: ", |
|
|
|
self.recordLog("Change history back time or: ", |
|
|
|
node["parameters"]["historyWait"]) |
|
|
|
try: |
|
|
|
self.browser.execute_script('window.stop()') |
|
|
@ -1030,7 +1036,8 @@ class BrowserThread(Thread): |
|
|
|
# 千万不要忘了分割!! |
|
|
|
for path in node["parameters"]["pathList"].split("\n"): |
|
|
|
try: |
|
|
|
path = replace_field_values(path, self.outputParameters, self) |
|
|
|
path = replace_field_values( |
|
|
|
path, self.outputParameters, self) |
|
|
|
element = self.browser.find_element( |
|
|
|
By.XPATH, path, iframe=node["parameters"]["iframe"]) |
|
|
|
# self.recordLog("循环元素|Loop element:", path) |
|
|
@ -1224,13 +1231,17 @@ class BrowserThread(Thread): |
|
|
|
"return history.length") |
|
|
|
except: |
|
|
|
self.history["index"] = 0 |
|
|
|
except Exception as e: |
|
|
|
self.print_and_log("History Length Error") |
|
|
|
self.history["index"] = 0 |
|
|
|
self.scrollDown(para) # 控制屏幕向下滚动 |
|
|
|
|
|
|
|
# 键盘输入事件 |
|
|
|
def inputInfo(self, para, loopValue): |
|
|
|
time.sleep(0.1) # 输入之前等待0.1秒 |
|
|
|
try: |
|
|
|
xpath = replace_field_values(para["xpath"], self.outputParameters, self) |
|
|
|
xpath = replace_field_values( |
|
|
|
para["xpath"], self.outputParameters, self) |
|
|
|
textbox = self.browser.find_element( |
|
|
|
By.XPATH, xpath, iframe=para["iframe"]) |
|
|
|
# textbox.send_keys(Keys.CONTROL, 'a') |
|
|
@ -1289,8 +1300,10 @@ class BrowserThread(Thread): |
|
|
|
try: |
|
|
|
# element = self.browser.find_element( |
|
|
|
# By.XPATH, path, iframe=para["iframe"]) |
|
|
|
clickPath = replace_field_values(clickPath, self.outputParameters, self) |
|
|
|
xpath = replace_field_values(para["xpath"], self.outputParameters, self) |
|
|
|
clickPath = replace_field_values( |
|
|
|
clickPath, self.outputParameters, self) |
|
|
|
xpath = replace_field_values( |
|
|
|
para["xpath"], self.outputParameters, self) |
|
|
|
if para["useLoop"]: # 使用循环的情况下,传入的clickPath就是实际的xpath |
|
|
|
if xpath == "": |
|
|
|
path = clickPath |
|
|
@ -1375,6 +1388,9 @@ class BrowserThread(Thread): |
|
|
|
pass |
|
|
|
self.history["index"] = self.browser.execute_script( |
|
|
|
"return history.length") |
|
|
|
except Exception as e: |
|
|
|
self.print_and_log("History Length Error") |
|
|
|
self.history["index"] = 0 |
|
|
|
else: |
|
|
|
try: |
|
|
|
self.history["index"] = self.browser.execute_script( |
|
|
@ -1387,6 +1403,9 @@ class BrowserThread(Thread): |
|
|
|
self.history["index"] = self.browser.execute_script( |
|
|
|
"return history.length") |
|
|
|
# 如果打开了新窗口,切换到新窗口 |
|
|
|
except Exception as e: |
|
|
|
self.print_and_log("History Length Error") |
|
|
|
self.history["index"] = 0 |
|
|
|
self.scrollDown(para) # 根据参数配置向下滚动 |
|
|
|
# rt.end() |
|
|
|
|
|
|
@ -1556,7 +1575,8 @@ class BrowserThread(Thread): |
|
|
|
|
|
|
|
# 提取数据事件 |
|
|
|
def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0): |
|
|
|
parentPath = replace_field_values(parentPath, self.outputParameters, self) |
|
|
|
parentPath = replace_field_values( |
|
|
|
parentPath, self.outputParameters, self) |
|
|
|
if para["clear"] == 1: |
|
|
|
self.clearOutputParameters() |
|
|
|
try: |
|
|
@ -1762,8 +1782,8 @@ class BrowserThread(Thread): |
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
from multiprocessing import freeze_support |
|
|
|
freeze_support() # 防止无限死循环多开 |
|
|
|
|
|
|
|
freeze_support() # 防止无限死循环多开 |
|
|
|
|
|
|
|
# 如果需要调试程序,请在命令行参数中加入--keyboard 0 来禁用键盘监听以提升调试速度 |
|
|
|
# If you need to debug the program, please add --keyboard 0 in the command line parameters to disable keyboard listening to improve debugging speed |
|
|
|
config = { |
|
|
@ -1959,13 +1979,17 @@ if __name__ == '__main__': |
|
|
|
elif cloudflare == 1: |
|
|
|
if sys.platform == "win32": |
|
|
|
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器 |
|
|
|
# options.add_argument("--auto-open-devtools-for-tabs") |
|
|
|
# options.add_argument("--auto-open-devtools-for-tabs") |
|
|
|
# options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器 |
|
|
|
browser_t = MyUCChrome(options=options, driver_executable_path=driver_path) |
|
|
|
browser_t = MyUCChrome( |
|
|
|
options=options, driver_executable_path=driver_path) |
|
|
|
links = list(filter(isnotnull, service["links"].split("\n"))) |
|
|
|
browser_t.execute_script('window.open("'+ links[0] +'","_blank");') # open page in new tab |
|
|
|
time.sleep(5) # wait until page has loaded |
|
|
|
browser_t.switch_to.window(browser_t.window_handles[1]) # switch to new tab |
|
|
|
# open page in new tab |
|
|
|
browser_t.execute_script( |
|
|
|
'window.open("' + links[0] + '","_blank");') |
|
|
|
time.sleep(5) # wait until page has loaded |
|
|
|
browser_t.switch_to.window( |
|
|
|
browser_t.window_handles[1]) # switch to new tab |
|
|
|
# browser_t = uc.Chrome() |
|
|
|
else: |
|
|
|
print("Cloudflare模式只支持Windows x64平台。") |
|
|
|