Browse Source

MacOS Test

pull/254/head
Naibo_Mac_M2 9 months ago
parent
commit
5376aa37b0
12 changed files with 508 additions and 248 deletions
  1. +391
    -207
      .temp_to_pub/EasySpider_MacOS_all_arch/Code/easyspider_executestage.py
  2. +24
    -16
      .temp_to_pub/EasySpider_MacOS_all_arch/Code/myChrome.py
  3. +25
    -15
      .temp_to_pub/EasySpider_MacOS_all_arch/Code/utils.py
  4. +57
    -0
      .temp_to_pub/EasySpider_MacOS_all_arch/myCode.py
  5. BIN
      ElectronJS/EasySpider_en.crx
  6. BIN
      ElectronJS/EasySpider_zh.crx
  7. +1
    -1
      ElectronJS/config.json
  8. +4
    -6
      ElectronJS/package-lock.json
  9. +2
    -1
      ElectronJS/package.json
  10. +1
    -0
      ElectronJS/tasks/229.json
  11. +1
    -1
      ElectronJS/update_chrome.py
  12. +2
    -1
      ExecuteStage/.vscode/launch.json

+ 391
- 207
.temp_to_pub/EasySpider_MacOS_all_arch/Code/easyspider_executestage.py
File diff suppressed because it is too large
View File


+ 24
- 16
.temp_to_pub/EasySpider_MacOS_all_arch/Code/myChrome.py View File

@ -37,19 +37,21 @@ class MyChrome(webdriver.Chrome):
except Exception as e:
print(e)
find_element = False
# 遍历所有的 iframe 并点击里面的元素
# 遍历所有的 iframe 并查找里面的元素
for iframe in iframes:
# 切换到 iframe
super().switch_to.default_content()
super().switch_to.frame(iframe)
self.iframe_env = True
try:
# 在 iframe 中查找并点击元素
# 在 iframe 中查找元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
element = super().find_element(by=by, value=value)
find_element = True
except:
print("No such element found in the iframe")
except NoSuchElementException as e:
print(f"No such element found in the iframe: {str(e)}")
except Exception as e:
print(f"Exception: {str(e)}")
# 完成操作后切回主文档
# super().switch_to.default_content()
if find_element:
@ -68,14 +70,14 @@ class MyChrome(webdriver.Chrome):
# 获取所有的 iframe
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
find_element = False
# 遍历所有的 iframe 并点击里面的元素
# 遍历所有的 iframe 并找到里面的元素
for iframe in iframes:
# 切换到 iframe
try:
super().switch_to.default_content()
super().switch_to.frame(iframe)
self.iframe_env = True
# 在 iframe 中查找并点击元素
# 在 iframe 中查找元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
elements = super().find_elements(by=by, value=value)
if len(elements) > 0:
@ -84,8 +86,10 @@ class MyChrome(webdriver.Chrome):
# super().switch_to.default_content()
if find_element:
return elements
except:
print("No such element found in the iframe")
except NoSuchElementException as e:
print(f"No such element found in the iframe: {str(e)}")
except Exception as e:
print(f"Exception: {str(e)}")
if not find_element:
raise NoSuchElementException
else:
@ -117,19 +121,21 @@ if sys.platform != "darwin":
except Exception as e:
print(e)
find_element = False
# 遍历所有的 iframe 并点击里面的元素
# 遍历所有的 iframe 并找到里面的元素
for iframe in iframes:
# 切换到 iframe
super().switch_to.default_content()
super().switch_to.frame(iframe)
self.iframe_env = True
try:
# 在 iframe 中查找并点击元素
# 在 iframe 中查找元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
element = super().find_element(by=by, value=value)
find_element = True
except:
print("No such element found in the iframe")
except NoSuchElementException as e:
print(f"No such element found in the iframe: {str(e)}")
except Exception as e:
print(f"Exception: {str(e)}")
# 完成操作后切回主文档
# super().switch_to.default_content()
if find_element:
@ -148,14 +154,14 @@ if sys.platform != "darwin":
# 获取所有的 iframe
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
find_element = False
# 遍历所有的 iframe 并点击里面的元素
# 遍历所有的 iframe 并查找里面的元素
for iframe in iframes:
# 切换到 iframe
try:
super().switch_to.default_content()
super().switch_to.frame(iframe)
self.iframe_env = True
# 在 iframe 中查找并点击元素
# 在 iframe 中查找元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
elements = super().find_elements(by=by, value=value)
if len(elements) > 0:
@ -164,8 +170,10 @@ if sys.platform != "darwin":
# super().switch_to.default_content()
if find_element:
return elements
except:
print("No such element found in the iframe")
except NoSuchElementException as e:
print(f"No such element found in the iframe: {str(e)}")
except Exception as e:
print(f"Exception: {str(e)}")
if not find_element:
raise NoSuchElementException
else:

+ 25
- 15
.temp_to_pub/EasySpider_MacOS_all_arch/Code/utils.py View File

@ -31,7 +31,7 @@ def lowercase_tags_in_xpath(xpath):
def on_press_creator(press_time, event):
def on_press(key):
try:
if key.char == 'p':
if key.char == press_time["pause_key"]:
if press_time["is_pressed"] == False: # 没按下p键时,记录按下p键的时间
press_time["duration"] = time.time()
press_time["is_pressed"] = True
@ -39,14 +39,14 @@ def on_press_creator(press_time, event):
duration = time.time() - press_time["duration"]
if duration > 2:
if event._flag == False:
print("任务执行中,长按p键暂停执行。")
print("Task is running, long press 'p' to pause.")
print("任务执行中,长按pan>" + press_time["pause_key"] + "键暂停执行。")
print("Task is running, long press '"pan> + press_time["pause_key"] + "' to pause.")
# 设置Event的值为True,使得线程b可以继续执行
event.set()
else:
# 设置Event的值为False,使得线程b暂停执行
print("任务已暂停,长按p键继续执行...")
print("Task paused, long press 'p' to continue...")
print("任务已暂停,长按pan>" + press_time["pause_key"] + "键继续执行...")
print("Task paused, long press '"pan> + press_time["pause_key"] + "' to continue...")
event.clear()
press_time["duration"] = time.time()
press_time["is_pressed"] = False
@ -176,26 +176,36 @@ def write_to_csv(file_name, data, record):
f_csv.writerow(to_write)
f.close()
def eval_repl(matchobj):
print(matchobj.group(1))
return str(eval(matchobj.group(1), globals(), locals()))
def replace_field_values(orginal_text, outputParameters, browser=None):
pattern = r'Field\["([^"]+)"\]'
try:
replaced_text = re.sub(
pattern, lambda match: outputParameters.get(match.group(1), ''), orginal_text)
if replaced_text.find("EVAL") != -1: # 如果返回值中包含EVAL
if re.search(r'eval\(', replaced_text, re.IGNORECASE): # 如果返回值中包含EVAL
replaced_text = replaced_text.replace("self.", "browser.")
replaced_text = re.sub(r'EVAL\("(.*?)"\)', lambda match: str(eval(match.group(1))), replaced_text)
except:
pattern = re.compile(r'(?i)eval\("(.+?)"\)')
# 循环替换所有匹配到的eval语句
while True:
match = pattern.search(replaced_text)
if not match:
break
# 执行eval并将其结果转换为字符串形式
eval_replaced_text = str(eval(match.group(1)))
# 替换eval语句
replaced_text = replaced_text.replace(match.group(0), eval_replaced_text)
except Exception as e:
print("eval替换失败,请检查eval语句是否正确。| Failed to replace eval, please check if the eval statement is correct.")
replaced_text = orginal_text
return replaced_text
def readCode(code):
if code.startswith("outside:"):
file_name = os.path.join(os.path.abspath("./"), code[8:])
with open(file_name, 'r', encoding='utf-8-sig') as file_obj:
code = file_obj.read()
return code
def write_to_json(file_name, data, types, record, keys):
keys = list(keys)
# Prepare empty list for data

+ 57
- 0
.temp_to_pub/EasySpider_MacOS_all_arch/myCode.py View File

@ -0,0 +1,57 @@
"""
Python代码exec操作中调用myCode.pyEasySpider程序目录下Data/exec操作中可以直接写outside:myCode.py来调用此文件中的代码
1. self.browser表示当前操作的浏览器selenium的API进行操作self.browser.find_element(By.CSS_SELECTOR, "body").send_keys(Keys.END)
2. self.myVar = 1
3. self.myVar = self.myVar + 1
4. print(self.myVar)
5. self.myVar = self.outputParameters["字段名"]
6. self.outputParameters["字段名"] = "新值"
This is a sample code snippet file. You can directly write Python code here, and then call it in the program using an `exec` operation. If this file is named myCode.py, please place this file under the EasySpider program directory (at the same level as the Data/ folder). Then, in the program's `exec` operation, you can directly write outside:myCode.py to invoke the code from this file. Examples:
1. Use self.browser to refer to the current browser being operated on. You can directly utilize the selenium API to perform actions. For instance, self.browser.find_element(By.CSS_SELECTOR, "body").send_keys(Keys.END) will scroll to the bottom of the page.
2. Define a global variable: self.myVar = 1
3. Manipulate the above-defined global variable: self.myVar = self.myVar + 1
4. Print the above-defined global variable: print(self.myVar)
5. Assign a value to the custom variable from a value extracted for some field: self.myVar = self.outputParameters["field name"]
6. Modify the value extracted for some field: self.outputParameters["field name"] = "new value"
For more complex operations, please download the source code and compile it for execution.
"""
# 请在下面编写你的代码,不要有代码缩进!!! | Please write your code below, do not indent the code!!!
# 导包 | Import packages
from selenium.common.exceptions import ElementClickInterceptedException
# 定义一个函数 | Define a function
def test(n = 0):
for i in range(0, n):
if i % 2 == 0:
print(i)
return "test"
# 异常捕获 | Exception capture
try:
# 使用XPath定位元素并点击浏览器中元素 | Use XPath to locate the element and click the element in the browser
element = self.browser.find_element(By.XPATH, "//*[contains(@class, 'LeftSide_menu_list__qXCeM')]/div[1]/a[1]") # 这里请忽略IDE的报错,因为代码是嵌入到程序中的,IDE无法识别self变量和By变量是正常的 | Please ignore the error reported by the IDE, because the code is embedded in the program, and the IDE cannot recognize that the self variable and By variable are normal
element.click()
print("点击成功|Click success")
except ElementClickInterceptedException:
# 如果元素被遮挡,点击失败
print("元素被遮挡,无法点击|The element is blocked and cannot be clicked")
except Exception as e:
# 打印其他异常
print("发生了一个异常|An exception occurred", e)
finally:
# 测试函数 | Test function
self.a = 1
print("a = ", self.a)
self.a = self.a + 1
print("a = ", self.a)
print("All parameters:", self.outputParameters)
print(test(3))
print("执行完毕|Execution completed")

BIN
ElectronJS/EasySpider_en.crx View File


BIN
ElectronJS/EasySpider_zh.crx View File


+ 1
- 1
ElectronJS/config.json View File

@ -1 +1 @@
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":1,"sys_version":"x64","mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"}
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":1,"sys_version":"x64","mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"/Users/naibo/Documents/EasySpider/ElectronJS/user_data"}

+ 4
- 6
ElectronJS/package-lock.json View File

@ -15,6 +15,7 @@
"formidable": "^3.5.0",
"http": "^0.0.1-security",
"multer": "^1.4.5-lts.1",
"node-abi": "^3.52.0",
"node-window-manager": "^2.2.4",
"selenium-webdriver": "^4.16.0",
"ws": "^8.12.0",
@ -3914,9 +3915,9 @@
"license": "MIT"
},
"node_modules/node-abi": {
"version": "3.45.0",
"dev": true,
"license": "MIT",
"version": "3.52.0",
"resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.52.0.tgz",
"integrity": "sha512-JJ98b02z16ILv7859irtXn4oUaFWADtvkzy2c0IAatNVX2Mc9Yoh8z6hZInn3QwvMEYhHuQloYi+TTQy67SIdQ==",
"dependencies": {
"semver": "^7.3.5"
},
@ -4814,7 +4815,6 @@
},
"node_modules/semver": {
"version": "7.5.3",
"dev": true,
"license": "ISC",
"dependencies": {
"lru-cache": "^6.0.0"
@ -4834,7 +4834,6 @@
},
"node_modules/semver/node_modules/lru-cache": {
"version": "6.0.0",
"dev": true,
"license": "ISC",
"dependencies": {
"yallist": "^4.0.0"
@ -5665,7 +5664,6 @@
},
"node_modules/yallist": {
"version": "4.0.0",
"dev": true,
"license": "ISC"
},
"node_modules/yargs": {

+ 2
- 1
ElectronJS/package.json View File

@ -37,6 +37,7 @@
"formidable": "^3.5.0",
"http": "^0.0.1-security",
"multer": "^1.4.5-lts.1",
"node-abi": "^3.52.0",
"node-window-manager": "^2.2.4",
"selenium-webdriver": "^4.16.0",
"ws": "^8.12.0",
@ -79,4 +80,4 @@
"publishers": []
}
}
}
}

+ 1
- 0
ElectronJS/tasks/229.json View File

@ -0,0 +1 @@
{"id":229,"name":"知乎 - 有问题,就会有答案","url":"https://www.zhihu.com","links":"https://www.zhihu.com","create_time":"07/12/2023, 03:26:24","update_time":"07/12/2023, 03:43:34","version":"0.6.0","saveThreshold":10,"quitWaitTime":6,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"pauseKey":"t","containJudge":false,"desc":"https://www.zhihu.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环采集数据","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","//div[contains(., '死刑执行前可以谎称肚')]","/html/body/div[last()-7]/div/main/div/div/div[last()-1]/div/div/div/div/div/div[last()-12]/div/div/div/div/h2/div"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"unique_index":"onlvi030w9jlpu5tjzb","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}

+ 1
- 1
ElectronJS/update_chrome.py View File

@ -48,7 +48,7 @@ def copy_folder(source_folder, destination_folder):
def get_chrome_version():
version = "115"
version = "120"
if sys.platform == "win32":
version_re = re.compile(r"^[1-9]\d*\.\d*.\d*")
try:

+ 2
- 1
ExecuteStage/.vscode/launch.json View File

@ -12,7 +12,8 @@
"justMyCode": false,
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
"args": ["--ids", "[52]", "--headless", "0", "--user_data", "1", "--keyboard", "1"]
// "args": ["--ids", "[1]", "--headless", "0", "--user_data", "1", "--keyboard", "1"]
"args": "--ids '[3]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
}
]
}

Loading…
Cancel
Save