Browse Source

version 0.3.0 Beta

pull/43/head
naibo 1 year ago
parent
commit
a76c1cd097
138 changed files with 2685 additions and 660 deletions
  1. +1
    -0
      ElectronJS/.gitignore
  2. BIN
      ElectronJS/EasySpider_en.crx
  3. BIN
      ElectronJS/EasySpider_zh.crx
  4. BIN
      ElectronJS/XPathHelper.crx
  5. +34
    -0
      ElectronJS/check_update.js
  6. +1
    -1
      ElectronJS/config.json
  7. +4
    -3
      ElectronJS/main.js
  8. +2
    -2
      ElectronJS/package.json
  9. +13
    -1
      ElectronJS/server.js
  10. BIN
      ElectronJS/src/img/XPather_helper.png
  11. +23
    -1
      ElectronJS/src/index.html
  12. +1
    -1
      ElectronJS/src/taskGrid/FlowChart.html
  13. +171
    -22
      ElectronJS/src/taskGrid/FlowChart_CN.html
  14. +8
    -0
      ElectronJS/src/taskGrid/FlowChart_CN.js
  15. +27
    -5
      ElectronJS/src/taskGrid/global.js
  16. +42
    -24
      ElectronJS/src/taskGrid/invokeTask.html
  17. +54
    -2
      ElectronJS/src/taskGrid/logic_CN.js
  18. +1
    -1
      ElectronJS/tasks/11.json
  19. +1
    -1
      ElectronJS/tasks/15.json
  20. +1
    -1
      ElectronJS/tasks/16.json
  21. +1
    -1
      ElectronJS/tasks/17.json
  22. +1
    -1
      ElectronJS/tasks/19.json
  23. +1
    -1
      ElectronJS/tasks/2.json
  24. +1
    -1
      ElectronJS/tasks/20.json
  25. +1
    -1
      ElectronJS/tasks/23.json
  26. +1
    -1
      ElectronJS/tasks/25.json
  27. +1
    -1
      ElectronJS/tasks/27.json
  28. +1
    -1
      ElectronJS/tasks/28.json
  29. +1
    -1
      ElectronJS/tasks/29.json
  30. +1
    -1
      ElectronJS/tasks/30.json
  31. +1
    -1
      ElectronJS/tasks/31.json
  32. +1
    -1
      ElectronJS/tasks/32.json
  33. +1
    -1
      ElectronJS/tasks/34.json
  34. +148
    -0
      ElectronJS/tasks/35.json
  35. +193
    -0
      ElectronJS/tasks/36.json
  36. +1
    -0
      ElectronJS/tasks/37.json
  37. +1
    -0
      ElectronJS/tasks/38.json
  38. +1
    -0
      ElectronJS/tasks/39.json
  39. +1
    -1
      ElectronJS/tasks/4.json
  40. +1
    -0
      ElectronJS/tasks/40.json
  41. +139
    -0
      ElectronJS/tasks/41.json
  42. +1
    -0
      ElectronJS/tasks/42.json
  43. +1
    -0
      ElectronJS/tasks/43.json
  44. +1
    -0
      ElectronJS/tasks/44.json
  45. +172
    -0
      ElectronJS/tasks/45.json
  46. +1
    -0
      ElectronJS/tasks/46.json
  47. +278
    -0
      ElectronJS/tasks/47.json
  48. +1
    -0
      ElectronJS/tasks/48.json
  49. +1
    -0
      ElectronJS/tasks/49.json
  50. +1
    -1
      ElectronJS/tasks/5.json
  51. +291
    -0
      ElectronJS/tasks/50.json
  52. +1
    -0
      ElectronJS/tasks/51.json
  53. +1
    -1
      ElectronJS/tasks/6.json
  54. +1
    -1
      ElectronJS/tasks/7.json
  55. +378
    -378
      sks/services.js
  56. +2
    -0
      ExecuteStage/.gitignore
  57. +18
    -0
      ExecuteStage/.vscode/launch.json
  58. +6
    -0
      ExecuteStage/config.json
  59. +272
    -63
      ExecuteStage/easyspider_executestage.py
  60. +2
    -0
      ExecuteStage/requirements.txt
  61. BIN
      Extension/manifest_v3/EasySpider_en.crx
  62. BIN
      Extension/manifest_v3/EasySpider_zh.crx
  63. +12
    -2
      Extension/manifest_v3/package.js
  64. +90
    -26
      Extension/manifest_v3/src/content-scripts/global.js
  65. +78
    -0
      Extension/manifest_v3/src/content-scripts/iframe.vue
  66. +49
    -14
      Extension/manifest_v3/src/content-scripts/main.js
  67. +7
    -1
      Extension/manifest_v3/src/content-scripts/messageInteraction.js
  68. +52
    -17
      Extension/manifest_v3/src/content-scripts/toolkit.vue
  69. +1
    -1
      Extension/manifest_v3/src/manifest.json
  70. +17
    -8
      Readme.md
  71. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/execution_instances/0.json
  72. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/execution_instances/1.json
  73. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/execution_instances/2.json
  74. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/execution_instances/3.json
  75. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/execution_instances/4.json
  76. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/11.json
  77. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/15.json
  78. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/16.json
  79. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/17.json
  80. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/19.json
  81. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/2.json
  82. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/20.json
  83. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/23.json
  84. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/25.json
  85. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/27.json
  86. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/28.json
  87. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/29.json
  88. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/30.json
  89. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/31.json
  90. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/32.json
  91. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/4.json
  92. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/5.json
  93. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/6.json
  94. +1
    -1
      Releases/EasySpider_linux_amd64_Ubuntu/tasks/7.json
  95. +1
    -1
      Releases/EasySpider_windows_386/execution_instances/0.json
  96. +1
    -1
      Releases/EasySpider_windows_386/tasks/11.json
  97. +1
    -1
      Releases/EasySpider_windows_386/tasks/15.json
  98. +1
    -1
      Releases/EasySpider_windows_386/tasks/16.json
  99. +1
    -1
      Releases/EasySpider_windows_386/tasks/17.json
  100. +1
    -1
      Releases/EasySpider_windows_386/tasks/19.json

+ 1
- 0
ElectronJS/.gitignore View File

@ -8,4 +8,5 @@ chromedriver_mac64
user-data/
user_data/
Data/
Chrome/
execution_instances/*

BIN
ElectronJS/EasySpider_en.crx View File


BIN
ElectronJS/EasySpider_zh.crx View File


BIN
ElectronJS/XPathHelper.crx View File


+ 34
- 0
ElectronJS/check_update.js View File

@ -0,0 +1,34 @@
const https = require('https');
const fs = require("fs");
const path = require("path");
// 设置GitHub用户名和仓库名
const owner = 'NaiboWang';
const repo = 'EasySpider';
let config = fs.readFileSync(path.join(__dirname, `package.json`), 'utf8');
config = JSON.parse(config);
const version = config.version;
console.log(`Current version is ${version}`);
// 发送GET请求获取GitHub的Release API响应
https.get(`https://api.github.com/repos/${owner}/${repo}/releases/latest`, {
headers: {
'User-Agent': 'Node.js'
}
}, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
// 解析响应JSON并输出最新版本号
const release = JSON.parse(data);
const latestVersion = release.tag_name.replace('v', '');
console.log(`Latest version is ${latestVersion}`);
if(version !== latestVersion) {
console.log('There is a new version of EasySpider, you can download it from github repo: https://github.com/NaiboWang/EasySpider/releases');
}
});
}).on('error', (err) => {
console.error(`Error: ${err.message}`);
});

+ 1
- 1
ElectronJS/config.json View File

@ -1 +1 @@
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","absolute_user_data_folder":"/Users/naibowang/Documents/EasySpider/ElectronJS/user_data"}
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"}

+ 4
- 3
ElectronJS/main.js View File

@ -1,7 +1,6 @@
// Modules to control application life and create native browser window
const {app, BrowserWindow, dialog, ipcMain, screen} = require('electron');
app.commandLine.appendSwitch("--disable-http-cache");
const {Builder, By, Key, until} = require("selenium-webdriver");
const chrome = require('selenium-webdriver/chrome');
const {ServiceBuilder} = require('selenium-webdriver/chrome');
@ -123,6 +122,7 @@ async function beginInvoke(msg, ws) {
}
flowchart_window.show();
// flowchart_window.openDevTools();
} else if (msg.type == 2) {
//keyboard
// const robot = require("@jitsi/robotjs");
@ -215,13 +215,13 @@ async function beginInvoke(msg, ws) {
// });
let spawn = require("child_process").spawn;
if (process.platform != "darwin") {
if (process.platform != "darwin" && msg.message.execute_type == 1) {
let child_process = spawn(execute_path, parameters);
child_process.stdout.on('data', function (data) {
console.log(data.toString());
});
} else {
ws.send(task_server.getDir() + "/");
ws.send(JSON.stringify({"config_folder": task_server.getDir() + "/", "easyspider_location": task_server.getEasySpiderLocation()}));
}
}
}
@ -284,6 +284,7 @@ async function runBrowser(lang = "en", user_data_folder = '') {
} else if (lang == "zh") {
options.addExtensions(path.join(__dirname, "EasySpider_zh.crx"));
}
options.addExtensions(path.join(__dirname, "XPathHelper.crx"));
options.setChromeBinaryPath(chromeBinaryPath);
if (user_data_folder != "") {
let dir = path.join(task_server.getDir(), user_data_folder);

+ 2
- 2
ElectronJS/package.json View File

@ -1,7 +1,7 @@
{
"name": "easy-spider",
"productName": "EasySpider",
"version": "0.2.0",
"version": "0.3.0",
"icon": "./favicon",
"description": "NoCode Visual Web Crawler",
"main": "main.js",
@ -60,7 +60,7 @@
],
"packagerConfig": {
"icon": "./favicon",
"appVersion": "0.2.0",
"appVersion": "0.3.0",
"name": "EasySpider",
"executableName": "EasySpider",
"appCopyright": "Naibo Wang (naibowang@foxmail.com)",

+ 13
- 1
ElectronJS/server.js View File

@ -29,7 +29,18 @@ function getDir(){
} else {
return path.join(__dirname,"../../..");
}
} else{
} else {
return __dirname;
}
}
function getEasySpiderLocation(){
if(__dirname.indexOf("app") >= 0 && __dirname.indexOf("sources") >= 0){
if(process.platform == "darwin"){
return path.join(__dirname,"../../../");
} else {
return path.join(__dirname,"../../../");
}
} else {
return __dirname;
}
}
@ -44,6 +55,7 @@ if(!fs.existsSync(path.join(getDir(), "config.json"))){
}
exports.getDir = getDir;
exports.getEasySpiderLocation = getEasySpiderLocation;
FileMimes = JSON.parse(fs.readFileSync(path.join(__dirname,'mime.json')).toString());
exports.start = function(port = 8074) {
http.createServer(function(req, res) {

BIN
ElectronJS/src/img/XPather_helper.png View File

Before After
Width: 664  |  Height: 549  |  Size: 36 KiB

+ 23
- 1
ElectronJS/src/index.html View File

@ -24,6 +24,10 @@
<p><a @click="changeLang('en')" class="btn btn-outline-primary btn-lg"
style="margin-top: 15px; width: 300px;height:60px;padding-top:12px;">English</a></p>
<p><a href="https://github.com/NaiboWang/EasySpider/Releases" target="_blank">Github</a>最新版本/Newest Version:{{newest_version}}</p>
<!-- <p>如发现新版本更新,可从以下Github仓库下载最新版本使用/If a new version is found, you can download the latest version from the following Github repository:</p>-->
<!-- <p></p>-->
</div>
<div style="padding: 10px; text-align: center;vertical-align: middle;" v-else>
@ -144,15 +148,33 @@
return "";
}
createApp({
var global = createApp({
data() {
return {
init: true,
lang: 'zh',
user_data_folder: getUrlParam("user_data_folder"),
step: 0,
newest_version: '-', // 最新版本号
}
},
mounted() {
// 发送GET请求获取GitHub的Release API响应
const request = new XMLHttpRequest();
request.open('GET', `https://api.github.com/repos/NaiboWang/EasySpider/releases/latest`);
request.setRequestHeader('User-Agent', 'JavaScript');
request.onload = function() {
// 解析响应JSON并输出最新版本号
const release = JSON.parse(request.responseText);
const latestVersion = release.tag_name;
global.$data.newest_version = latestVersion;
// alert(`Latest version is ${latestVersion}`);
};
request.onerror = function() {
console.error('Error: failed to get latest version.');
};
request.send();
},
methods: {
changeLang(lang = 'zh') {
this.init = false;

+ 1
- 1
ElectronJS/src/taskGrid/FlowChart.html View File

@ -139,7 +139,7 @@
<label>Current parameter name: <strong>{{paras.parameters[paraIndex]["name"]}}</strong></label>
<p><input onkeydown="inputDelete(event)" type="checkbox" v-model='paras.parameters[paraIndex]["relative"]'></input>Use relative XPATH</p>
<label>XPATH: <span style="font-size: 30px!important;" title="Relative XPATH writing: start with /, e.g. the loop item XPATH is /html/body/div[1], your input is /*[@id='tab-customer'], then the final addressed xpath is: /html/body/div[1]/*[@id='tab-customer']"></span></label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='paras.parameters[paraIndex]["relativeXpath"]'></textarea>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='paras.parameters[paraIndex]["relativeXPath"]'></textarea>
<label>Extraction Type</label>
<select v-model='paras.parameters[paraIndex]["contentType"]' class="form-control">
<option :value = 0>Text (include child element)</option>

+ 171
- 22
ElectronJS/src/taskGrid/FlowChart_CN.html View File

@ -18,6 +18,9 @@
<div id="tip" class="alert alert-success alert-dismissible fade show" style="position: fixed; width:100%;display: none;">
<button type="button" class="close" data-dismiss="alert">&times;</button> 提示:保存成功!
</div>
<div id="tip2" class="alert alert-danger alert-dismissible fade show" style="position: fixed; width:100%;display: none;">
<button type="button" class="close" data-dismiss="alert">&times;</button> 提示:保存失败,如果有多个自定义操作,则他们的选项名称必须设置成不同名称!
</div>
<div id="navigator">
<nav aria-label="breadcrumb" v-if="type==1">
<ol class="breadcrumb" style="padding-left:23px;padding-bottom: 0;margin-bottom:0;background-color: white">
@ -30,7 +33,7 @@
</nav>
</div>
<div style="display:flex">
<div style="width: 200px;float:left">
<div style="width: 200px;float:left;overflow: auto">
<div class="toolbox" style="text-align:center;margin: 20px;border-radius:10px;border:navy solid;background-color:rgb(242,243,245);z-index: 9999;">
<div style="padding: 10px;border-radius:10px;font-size: 20px;">工具箱</div>
<button type="button" id="save" data-toggle="modal" data-target="#myModal" onmousedown="$('#myModal').modal('show');" class="btn btn-primary">保存任务</button>
@ -64,8 +67,28 @@
<input style="float:left;margin-left:10px;margin-top:10px;margin-bottom:10px" type="button" class="btn-primary" value="保存任务"></input>
</div> -->
<div class="Modify" id="app">
<div class="modal fade" id="myModal_XPath" tabindex="-1" role="dialog" aria-labelledby="myModalLabel" aria-hidden="true">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<h4 class="modal-title">等价XPath</h4>
<button type="button" class="close" data-dismiss="modal" aria-hidden="true">&times;</button>
</div>
<div class="modal-body">
<label>以下提供除默认生成的XPath外其余等价的XPath,都能定位到同一个元素(但不完全准确,可能可以定位到除该元素外的其他元素,因此只是提供在这里作为参考)。 </label>
<label>每行一个XPath(可使用预装的XPath Helper扩展调试):</label>
<textarea id="otherXPaths" onkeydown="inputDelete(event)" class="form-control" rows="4">{{XPaths}}</textarea>
<div>
<img src="../img/XPather_helper.png" style="width:50%;height:50%; margin: 10px auto"></div>
</div>
</div>
<!-- /.modal-content -->
</div>
<!-- /.modal -->
</div>
<div>
<label>选项名称:</label>
<label>选项名称(鼠标移到笑脸可查看提示)<span style="font-size: 30px!important;" title="修改名称后点击下方“确定”按钮刷新流程图"></span></label>
<input onkeydown="inputDelete(event)" class="form-control" v-model='list.nl[index.nowNodeIndex]["title"]'></input>
</div>
<!-- 下面是10种不同类型操作选项的不同的配置页面 -->
@ -75,9 +98,9 @@
<p><input onkeydown="inputDelete(event)" type="checkbox" v-model='useLoop'></input>使用循环内的链接</p>
</div>
<div v-if='!useLoop'>
<label>url:</label>
<input onkeydown="inputDelete(event)" class="form-control" v-model='nowNode["parameters"]["url"]'></input>
<label>填入的全部链接:</label>
<!-- <label>url:</label>-->
<!-- <input onkeydown="inputDelete(event)" class="form-control" v-model='nowNode["parameters"]["url"]'></input>-->
<label>链接(每行一个链接,有多少行链接整个任务流程就会被执行多少次):</label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["links"]'></textarea>
</div>
<label>执行完是否向下滚动:</label>
@ -98,7 +121,9 @@
<div v-if='!useLoop'>
<label>XPath: <span style="font-size: 30px!important;" title="相对XPATH写法:以/开头,如循环项XPATH为/html/body/div[1],您的输入为/*[@id='tab-customer'],则最终寻址的xpath为:/html/body/div[1]/*[@id='tab-customer']"></span></label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["xpath"]'></textarea>
<p><button type="button" data-toggle="modal" data-target="#myModal_XPath" @click="changeXPaths(nowNode['parameters']['allXPaths'])" class="btn btn-primary" style="margin-top: 10px">点此查看其他等价的XPath</button></p>
</div>
<label>执行完是否向下滚动:</label>
<select v-model='nowNode["parameters"]["scrollType"]' class="form-control">
<option value = 0>不滚动</option>
@ -107,6 +132,26 @@
</select>
<label>滚动次数:</label>
<input onkeydown="inputDelete(event)" class="form-control" v-model.number="nowNode['parameters']['scrollCount']" type="number" required></input>
<p style="margin-top: 10px">
<a class="btn btn-primary" data-toggle="collapse" href="#collapseExample" role="button" aria-expanded="false" aria-controls="collapseExample">
点此展开/折叠高级操作
</a>
</p>
<div :class="{collapse: true, 'show': nowNode['parameters']['beforeJS'].length!=0 || nowNode['parameters']['afterJS'].length!=0}" id="collapseExample">
<div>
<label>点击该元素<strong></strong>针对该元素执行一段JavaScript脚本: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2"
placeholder='该元素用arguments[0]来表示,示例JS代码:arguments[0].innerText = arguments[0].innerText("上海","Shanghai")即实现了将元素文字中所有的“上海”替换成”Shanghai“的功能,然后后续如提取数据时就会提取到替换后的值。' v-model='nowNode["parameters"]["beforeJS"]'></textarea>
<label>最长等待脚本执行时间(0代表无限等待): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["beforeJSWaitTime"]'></input>
<label>点击该元素<strong></strong>针对该元素执行一段JavaScript脚本: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" placeholder='该元素用arguments[0]来表示,示例JS代码:arguments[0].click()即点击此元素' v-model='nowNode["parameters"]["afterJS"]'></textarea>
<label>最长等待脚本执行时间(0代表无限等待): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["afterJSWaitTime"]'></input>
</div>
</div>
</div>
@ -135,17 +180,50 @@
</table>
</div>
<div style="font-size: 13px;" v-if="paraIndex<paras.parameters.length">
<label>当前编辑参数名: <strong>{{paras.parameters[paraIndex]["name"]}}</strong></label>
<p><input onkeydown="inputDelete(event)" type="checkbox" v-model='paras.parameters[paraIndex]["relative"]'></input>使用相对循环内的XPATH</p>
<label>XPATH: <span style="font-size: 30px!important;" title="相对XPATH写法:以/开头,如循环项XPATH为/html/body/div[1],您的输入为/*[@id='tab-customer'],则最终寻址的xpath为:/html/body/div[1]/*[@id='tab-customer']"></span></label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='paras.parameters[paraIndex]["relativeXpath"]'></textarea>
<p>当前编辑字段参数名(点击字段的“修改”选项切换参数): </p>
<label><strong>{{paras.parameters[paraIndex]["name"]}}</strong></label>
<p v-if="nowNode['isInLoop']"><input onkeydown="inputDelete(event)" type="checkbox" v-model='paras.parameters[paraIndex]["relative"]'></input>使用相对循环内的XPATH</p>
<p>XPATH: <span style="font-size: 30px!important;" title="相对XPATH写法:以/开头,如循环项XPATH为/html/body/div[1],您的输入为/*[@id='tab-customer'],则最终寻址的xpath为:/html/body/div[1]/*[@id='tab-customer']"></span></p>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='paras.parameters[paraIndex]["relativeXPath"]'></textarea>
<p><button type="button" data-toggle="modal" data-target="#myModal_XPath" @click="changeXPaths(paras.parameters[paraIndex]['allXPaths'])" class="btn btn-primary" style="margin-top: 10px">点此查看其他等价的XPath</button></p>
<p style="margin-top: 10px">
<a class="btn btn-primary" data-toggle="collapse" href="#elementAdvanced" role="button" aria-expanded="false" aria-controls="collapseExample">
点此展开/折叠高级操作
</a>
</p>
<div :class="{collapse: true, 'show': paras.parameters[paraIndex]['beforeJS'].length!=0 || paras.parameters[paraIndex]['afterJS'].length!=0}" id="elementAdvanced">
<div>
<label>提取该元素数据<strong></strong>针对该元素执行一段JavaScript脚本: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2"
placeholder='该元素用arguments[0]来表示,示例JS代码:arguments[0].innerText = arguments[0].innerText("上海","Shanghai")即实现了将元素文字中所有的“上海”替换成”Shanghai“的功能,然后后续如提取数据时就会提取到替换后的值。' v-model='paras.parameters[paraIndex]["beforeJS"]'></textarea>
<label>最长等待脚本执行时间(0代表无限等待): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='paras.parameters[paraIndex]["beforeJSWaitTime"]'></input>
<label>提取该元素数据<strong></strong>针对该元素执行一段JavaScript脚本: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" placeholder='该元素用arguments[0]来表示,示例JS代码:arguments[0].click()即点击此元素' v-model='paras.parameters[paraIndex]["afterJS"]'></textarea>
<label>最长等待脚本执行时间(0代表无限等待): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='paras.parameters[paraIndex]["afterJSWaitTime"]'></input>
</div>
</div>
<label>采集内容类型</label>
<select v-model='paras.parameters[paraIndex]["contentType"]' class="form-control">
<option :value = 0>文本(包括子元素)</option>
<option :value = 1>文本(不包括子元素)</option>
<option :value = 2>innerHTML</option>
<option :value = 3>outerHTML</option>
<option :value = 4>背景图片地址</option>
<option :value = 5>页面URL</option>
<option :value = 6>页面标题</option>
<option :value = 7>元素截图</option>
<option :value = 8>OCR识别文字</option>
<option :value = 9>针对该元素的JavaScript代码返回值</option>
</select>
<div v-if='paras.parameters[paraIndex]["contentType"] == 9'>
<label>JavaScript代码: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2"
placeholder='该元素用arguments[0]来表示,示例:return arguments[0].innerText + "美元",即实现了提取该元素innerText并后面加“美元”的功能。' v-model='paras.parameters[paraIndex]["JS"]'></textarea>
<label>最长等待脚本执行时间(0代表无限等待): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='paras.parameters[paraIndex]["JSWaitTime"]'></input>
</div>
<label>节点类型</label>
<select v-model='paras.parameters[paraIndex]["nodeType"]' class="form-control">
<option :value = 0>普通节点</option>
@ -154,10 +232,16 @@
<option :value = 3>表单值</option>
<option :value = 4>图片地址</option>
</select>
<!-- <label>提取方式</label>-->
<!-- <select v-model='paras.parameters[paraIndex]["extractType"]' class="form-control">-->
<!-- <option :value = 0>普通提取</option>-->
<!-- <option :value = 1>OCR提取</option>-->
<!-- </select>-->
<label>参数描述:</label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='paras.parameters[paraIndex]["desc"]'></textarea>
<textarea onkeydown="inputDelete(event)" class="form-control" style="min-height: 60px" v-model='paras.parameters[paraIndex]["desc"]'></textarea>
<label>元素找不到时的值:</label>
<input onkeydown="inputDelete(event)" class="form-control" v-model='paras.parameters[paraIndex]["default"]'></textarea>
</div>
</div>
@ -166,7 +250,7 @@
<div class="elements" v-if="nodeType==4">
<div v-if="nowNode['isInLoop']">
<!-- 如果在循环内且循环内是固定文本才显示此行元素 -->
<p><input onkeydown="inputDelete(event)" type="checkbox" v-model='useLoop'></input>使用循环内的文本</p>
<p><input onkeydown="inputDelete(event)" type="checkbox" v-model='useLoop'></input>使用循环内的文本(不勾选则每次输入的文本为下方“输入值”文本框内的文本,勾选后会使用所在循环内设置的文本)</p>
</div>
<div v-if='!useLoop'>
<label>输入值:</label>
@ -175,9 +259,45 @@
<label>XPath: <span style="font-size: 30px!important;" title="相对XPATH写法:以/开头,如循环项XPATH为/html/body/div[1],您的输入为/*[@id='tab-customer'],则最终寻址的xpath为:/html/body/div[1]/*[@id='tab-customer']"></span></label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["xpath"]'></textarea>
<p><button type="button" data-toggle="modal" data-target="#myModal_XPath" @click="changeXPaths(nowNode.parameters['allXPaths'])" class="btn btn-primary" style="margin-top: 10px">点此查看其他等价的XPath</button></p>
<p style="margin-top: 10px">
<a class="btn btn-primary" data-toggle="collapse" href="#inputAdvanced" role="button" aria-expanded="false" aria-controls="collapseExample">
点此展开/折叠高级操作
</a>
</p>
<div :class="{collapse: true, 'show': nowNode['parameters']['beforeJS'].length!=0 || nowNode['parameters']['afterJS'].length!=0}" id="inputAdvanced">
<div>
<label>对该元素输入文字<strong></strong>针对该元素执行一段JavaScript脚本: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2"
placeholder='该元素用arguments[0]来表示,示例JS代码:arguments[0].innerText = arguments[0].innerText("上海","Shanghai")即实现了将元素文字中所有的“上海”替换成”Shanghai“的功能,然后后续如提取数据时就会提取到替换后的值。' v-model='nowNode["parameters"]["beforeJS"]'></textarea>
<label>最长等待脚本执行时间(0代表无限等待): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["beforeJSWaitTime"]'></input>
<label>对该元素输入文字<strong></strong>针对该元素执行一段JavaScript脚本: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" placeholder='该元素用arguments[0]来表示,示例JS代码:arguments[0].click()即点击此元素' v-model='nowNode["parameters"]["afterJS"]'></textarea>
<label>最长等待脚本执行时间(0代表无限等待): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["afterJSWaitTime"]'></input>
</div>
</div>
</div>
<div class="elements" v-if="nodeType==5">
<label>自定义执行模式</label>
<select v-model='nowNode["parameters"]["codeMode"]' class="form-control">
<option value = 0>执行一段JavaScript脚本</option>
<option value = 1>执行一段操作系统级别命令</option>
</select>
<div>
<label>代码/脚本内容: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["code"]' placeholder="输入JS或系统命令,如:document.body.click() 或 python D:/test.py,分别为JS命令和系统命令示例。"></textarea>
<p><input onkeydown="inputDelete(event)" type="checkbox" v-model='nowNode["parameters"]["recordASField"]'></input>将执行后的输出/返回值作为字段记录</p>
<label>最长等待脚本执行时间(0代表无限等待): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
</div>
</div>
@ -193,26 +313,35 @@
<!-- 循环选项 -->
<label>循环类型:</label>
<select v-model='loopType' class="form-control">
<option value = 0>单个元素</option>
<option value = 0>单个元素(多用于循环点击下一页)</option>
<option value = 1>不固定元素列表</option>
<option value = 2>固定元素列表</option>
<option value = 3>文本列表</option>
<option value = 4>网址列表</option>
<option value = 3>文本列表(多用于循环在文本框输入文本)</option>
<option value = 4>网址列表(多用于循环打开网页)</option>
<option value = 5>JavaScript命令返回值</option>
<option value = 6>系统命令返回值</option>
</select>
<div v-if='parseInt(loopType) < 2'>
<label>XPath: <span style="font-size: 30px!important;" title="相对XPATH写法:以/开头,如循环项XPATH为/html/body/div[1],您的输入为/*[@id='tab-customer'],则最终寻址的xpath为:/html/body/div[1]/*[@id='tab-customer']"></span></label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["xpath"]'></textarea>
<p><button type="button" data-toggle="modal" data-target="#myModal_XPath" @click="changeXPaths(nowNode.parameters['allXPaths'])" class="btn btn-primary" style="margin-top: 10px">(测试功能)点此查看其他可能的XPath写法</button></p>
</div>
<div v-if='parseInt(loopType) == 2'>
<div v-else-if='parseInt(loopType) == 2'>
<label>XPath列表:</label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="3" v-model='nowNode["parameters"]["pathList"]'></textarea>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="3" placeholder="每行一个XPath" v-model='nowNode["parameters"]["pathList"]'></textarea>
</div>
<div v-if='parseInt(loopType) &gt; 2'>
<div v-else-if='parseInt(loopType) &lt; 5'>
<label>内容列表:</label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="3" v-model='nowNode["parameters"]["textList"]'></textarea>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="3" placeholder="每行一个文本/网址" v-model='nowNode["parameters"]["textList"]'></textarea>
</div>
<div v-else-if='parseInt(loopType) < 7'>
<label>代码/脚本内容: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="3" v-model='nowNode["parameters"]["code"]' placeholder="命令返回值大于0或为真则继续循环,否则停止循环。如:return document.body.scrollWidth > 1000 或 python D:/test.py,分别为JS命令和系统命令返回值示例。"></textarea>
<label>最长等待脚本执行时间(0代表无限等待): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
</div>
<!-- 这里添加退出循环条件,找不到元素肯定退出循环 -->
<label v-if='parseInt(loopType) == 0'>最多执行循环次数(0代表不设置):</label>
<label v-if='parseInt(loopType) == 0'>最多执行循环次数(0代表无限循环直到找不到元素为止):</label>
<input onkeydown="inputDelete(event)" required v-if='parseInt(loopType) == 0' class="form-control" type="number" v-model.number='nowNode["parameters"]["exitCount"]'></input>
<label><b>历史记录回退后</b>等待秒数:</label>
<input onkeydown="inputDelete(event)" required type="number" class="form-control" v-model.number='list.nl[index.nowNodeIndex]["parameters"]["historyWait"]'></input>
@ -239,9 +368,26 @@
<option value = 2>当前页面包括元素</option>
<option v-if="nowNode['isInLoop']" value = 3>当前循环项包括文本</option>
<option v-if="nowNode['isInLoop']" value = 4>当前循环项包括元素</option>
<option value = 5>JavaScript命令返回值</option>
<option value = 6>系统命令返回值</option>
<option v-if="nowNode['isInLoop']" value = 7>针对当前循环项的JavaScript命令返回值</option>
</select>
<label v-if='TClass'>包含的文字/元素XPATH: <span style="font-size: 30px!important;" title="相对XPATH写法:以/开头,如循环项XPATH为/html/body/div[1],您的输入为/*[@id='tab-customer'],则最终寻址的xpath为:/html/body/div[1]/*[@id='tab-customer']"></span></label>
<textarea onkeydown="inputDelete(event)" required placeholder="如果是当前循环包含元素,则输入相对元素的xpath。" v-if='TClass' class="form-control" rows="3" v-model='nowNode["parameters"]["value"]'></textarea>
<div v-if='TClass>0 && TClass <5'>
<label>包含的文字/元素XPATH: <span style="font-size: 30px!important;" title="相对XPATH写法:以/开头,如循环项XPATH为/html/body/div[1],您的输入为/*[@id='tab-customer'],则最终寻址的xpath为:/html/body/div[1]/*[@id='tab-customer']"></span></label>
<textarea onkeydown="inputDelete(event)" required placeholder="如果是当前循环包含元素,则输入相对元素的xpath。" class="form-control" rows="3" v-model='nowNode["parameters"]["value"]'></textarea>
</div>
<div v-else-if='TClass > 0 && TClass < 7'>
<label>代码/脚本内容: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="3" v-model='nowNode["parameters"]["code"]' placeholder="命令返回值大于0或为真则执行此分支内操作,否则不执行。如:return document.body.scrollWidth > 1000 或 python D:/test.py,分别为JS命令和系统命令返回值示例。"></textarea>
<label>最长等待脚本执行时间(0代表无限等待): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
</div>
<div v-else-if='TClass == 7'>
<label>代码/脚本内容: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="3" v-model='nowNode["parameters"]["code"]' placeholder="输入针对该循环项的JS命令,该循环项用arguments[0]表示,返回值大于0或为真则执行此分支内操作,否则不执行。如:return arguments[0].innerText.indexOf('123') >=0 即判断当前循环项的文本是否包含123,注意要配合循环类型为元素相关(如不固定元素列表)使用。"></textarea>
<label>最长等待脚本执行时间(0代表无限等待): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
</div>
</div>
<div style="margin-top:5px">
<label><b>执行后</b>等待秒数:</label>
@ -280,6 +426,8 @@
</div>
<!-- /.modal -->
</div>
</body>
<script src="FlowChart_CN.js"></script>
<script src="logic_CN.js"></script>
@ -309,9 +457,10 @@
url = "taskInfo.html?id="+getUrlParam("id")+"&lang=en&type="+getUrlParam("type")+"&wsport="+getUrlParam("wsport")+"&backEndAddressServiceWrapper=" + this.backEndAddressServiceWrapper
}
window.location.href = url;
},
}
}
});
</script>
</html>

+ 8
- 0
ElectronJS/src/taskGrid/FlowChart_CN.js View File

@ -40,6 +40,7 @@ var app = new Vue({
paras: { "parameters": [] }, //提取数据的参数列表
TClass: -1, //条件分支的条件类别
paraIndex: 0, //当前参数的index
XPaths: "", //xpath列表
},
watch: {
nowArrow: { //变量发生变化的时候进行一些操作
@ -86,6 +87,13 @@ var app = new Vue({
},
},
methods: {
changeXPaths: function (XPaths){
let result = "";
for (var i = 0; i < XPaths.length; i++) {
result += XPaths[i] + "\n";
}
this.XPaths = result;
},
modifyParas: function(i) { //修改第i个参数
this.paraIndex = i;
},

+ 27
- 5
ElectronJS/src/taskGrid/global.js View File

@ -1,13 +1,35 @@
function getUrlParam(name) {
var reg = new RegExp("(^|&)" + name + "=([^&]*)(&|$)"); //构造一个含有目标参数的正则表达式对象
var r = window.location.search.substr(1).match(reg); //匹配目标参数
let reg = new RegExp("(^|&)" + name + "=([^&]*)(&|$)"); //构造一个含有目标参数的正则表达式对象
let r = window.location.search.substr(1).match(reg); //匹配目标参数
if (r != null) return unescape(r[2]);
return ""; //返回参数值,默认后台地址
}
function isMac() {
return /macintosh|mac os x/i.test(navigator.userAgent);
};
function getOperatingSystemInfo() {
let platform = navigator.platform;
let osVersion = "";
let osBit = "";
let agent = navigator.userAgent.toLowerCase();
if (platform.startsWith("Win")) {
osVersion = "win";
if (agent.indexOf("win32") >= 0 || agent.indexOf("wow32") >= 0) {
osBit = 32;
}
if (agent.indexOf("win64") >= 0 || agent.indexOf("wow64") >= 0) {
osBit = 64;
}
} else if (platform.startsWith("Mac")) {
osVersion = "macOS";
osBit = 64;
} else if (platform.startsWith("Linux")) {
osVersion = "linux";
osBit = 64;
}
return {
version: osVersion,
bit: osBit
};
}
Vue.filter('lang', function (value) {
if (getUrlParam("lang") == "zh") {

+ 42
- 24
ElectronJS/src/taskGrid/invokeTask.html View File

@ -40,7 +40,7 @@
<div class="col-md-6" id="taskInfo" style="margin:0 auto" v-if="show">
<div class="modal fade" id="myModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel" aria-hidden="true">
<div class="modal-dialog">
<div class="modal-dialog modal-lg">
<div class="modal-content">
<div class="modal-header">
<h4 class="modal-title" id="myModalLabel">{{"Task Invocation Instruction~执行任务说明" | lang}}</h4>
@ -49,8 +49,10 @@
<div class="modal-body">
<input onkeydown="inputDelete(event)" id="serviceId" type="hidden" name="serviceId" value="-1"></input>
<input onkeydown="inputDelete(event)" id="url" type="hidden" name="url" value="about:blank"></input>
<label>{{"For MacOS, please open a terminal, go to EasySpider's folder, and then copy (command + c) the following command to run the task (EasySpider cannot quit when executing command):~对于MacOS系统,请在EasySpider目录下打开命令行工具Terminal,然后复制(command + c)和运行以下命令以执行任务(执行命令时不能退出EasySpider):" | lang}}</label>
<textarea class="form-control" style="height:150px">{{command}} --config_folder "{{config_folder}}"</textarea>
<label>{{`Please open a terminal, go to EasySpider's folder, and then copy (Command/Ctrl + c) the following command to run the task (EasySpider cannot quit when executing command, unless --read_type is set to "local"):~请在EasySpider目录下打开命令行工具Terminal,然后复制(Command/Ctrl + c)和运行以下命令以执行任务(执行命令时不能退出EasySpider,除非将--read_type设置为local):` | lang}}</label>
<label><a href="https://github.com/NaiboWang/EasySpider/wiki/Argument-Instruction" target="_blank">{{`Click Here~点击这里` | lang}}</a> {{`Here to see argument instruction.~这里查看参数配置说明。` | lang}}</label>
<textarea class="form-control" style="height:150px">cd {{easyspider_location}}
{{command}} --config_folder "{{config_folder}}" --headless 0 --read_type remote --config_name config.json --saved_file_name </textarea>
</div>
<!-- <div class="modal-footer">
<button type="button" id="saveAsButton" class="btn btn-outline-primary">另存为</button>
@ -116,13 +118,13 @@
<input class="form-control" v-model="ID"></input>
<p></p>
<!-- <p>提示:点击下方按钮获得任务ID,然后根据此ID进行服务执行;也可自己POST调用接口得到ID,具体参照POST调用文档。</p> -->
<p>{{"Hint: Click the following button to get the Execution ID (EID) of current running task.~提示:点击下方按钮获得任务执行ID,然后根据此执行ID运行任务。" | lang}}</p>
<p>{{"Hint: Click the \"Get Execution ID\" button at the bottom to get the task ID, and click the \"Execute task by commandline\" button at the back to get the prompt command on how to run this task using the command line.~提示:点击下方“获得任务执行ID”按钮得到任务ID,点击后面的“使用命令行执行任务”按钮获得如何使用命令行运行任务的提示命令。" | lang}}</p>
<button class="btn btn-primary" href="javascript:void(0)" v-on:click="invokeTask">{{"Get Execution ID~获得任务执行ID" |
lang}}</button>
<button class="btn btn-primary" style="margin-left: 8px;" v-on:click="localExecute(false)">{{"Run Locally (Clean Mode)~本地执行(纯净模式)"
<button class="btn btn-primary" style="margin-left: 8px;" v-on:click="localExecute(false)">{{"Execute task by commandline (Clean Mode)~使用命令行执行任务(纯净模式)"
| lang}}
</button>
<button class="btn btn-primary" style="margin-left: 8px;" v-on:click="localExecute(true)">{{"Run Locally (Data Mode)~本地执行(带用户信息模式)"
<button class="btn btn-primary" style="margin-left: 8px;" v-on:click="localExecute(true)">{{"Execute task by commandline (Data Mode)~使用命令行执行任务(带用户信息模式)"
| lang}}
</button>
<!-- <button v-on:click="remoteExcute" style="margin-left: 8px;" class="btn btn-primary">Run remotely</button></div> -->
@ -154,6 +156,7 @@
backEndAddressServiceWrapper: getUrlParam("backEndAddressServiceWrapper"),
command: "./easyspider_executestage ",
config_folder: "",
easyspider_location: "",
}, mounted() {
$.get(this.backEndAddressServiceWrapper + "/getConfig", function (result) {
app.$data.user_data_folder = result.user_data_folder;
@ -182,7 +185,7 @@
if (getUrlParam("lang") == "en" || getUrlParam("lang") == "") {
text = "Are you sure to get the Execution ID (EID) of current running task?";
} else {
text = "确定要获得当前运行任务的执行吗?";
text = "确定要获得当前运行任务的执行ID吗?";
}
if (confirm(text)) {
var para = {};
@ -203,31 +206,30 @@
if (getUrlParam("lang") == "en" || getUrlParam("lang") == "") {
alert("Please get EID first!");
} else {
alert("请先获得执行!");
alert("请先获得执行ID!");
}
return;
}
this.with_user_data = with_user_data;
let text = "";
if (getUrlParam("lang") == "en" || getUrlParam("lang") == "") {
text = "Are you sure to run this task locally?";
} else {
text = "确定要在本地运行此任务吗?";
}
if (confirm(text)) {
// if (getUrlParam("lang") == "en" || getUrlParam("lang") == "") {
// text = "Are you sure to run this task locally?";
// } else {
// text = "确定要在本地运行此任务吗?";
// }
// if (confirm(text)) {
let message = { //显示flowchart
type: 5, //消息类型,调用执行程序
message: {
"id": app.$data.ID,
"user_data_folder": app.$data.with_user_data ? app.$data.user_data_folder : "",
"execute_type": 0,
}
};
ws.send(JSON.stringify(message));
if(isMac()){
app.$data.command = "./easyspider_executestage --id " + app.$data.ID.toString() + " --user_data " + (app.$data.with_user_data ? "1" : "0") + " --server_address " + app.$data.backEndAddressServiceWrapper;
$('#myModal').modal('show');
}
}
changeCommand();
$('#myModal').modal('show');
// }
},
remoteExecute: function () {
@ -256,21 +258,35 @@
message: {
"id": result,
"user_data_folder": app.$data.with_user_data ? app.$data.user_data_folder : "",
"execute_type": 1,
}
};
ws.send(JSON.stringify(message));
if(isMac()){
app.$data.command = "./easyspider_executestage --id " + result.toString() + " --user_data " + (app.$data.with_user_data ? "1" : "0") + " --server_address " + app.$data.backEndAddressServiceWrapper;
if(getOperatingSystemInfo().version == 'macOS'){
// app.$data.command = "./easyspider_executestage --id " + result.toString() + " --user_data " + (app.$data.with_user_data ? "1" : "0") + " --server_address " + app.$data.backEndAddressServiceWrapper;
changeCommand();
$('#myModal').modal('show');
}
});
}
},
remoteExcuteInstant: function () {
remoteExecuteInstant: function () {
},
}
});
function changeCommand() {
let OSInfo = getOperatingSystemInfo();
if(OSInfo.version == 'win' && OSInfo.bit == 64){
app.$data.command = "./EasySpider/resources/app/chrome_win64/easyspider_executestage.exe --id " + app.$data.ID.toString() + " --user_data " + (app.$data.with_user_data ? "1" : "0") + " --server_address " + app.$data.backEndAddressServiceWrapper;
} else if(OSInfo.version == 'win' && OSInfo.bit == 32){
app.$data.command = "./EasySpider/resources/app/chrome_win32/easyspider_executestage.exe --id " + app.$data.ID.toString() + " --user_data " + (app.$data.with_user_data ? "1" : "0") + " --server_address " + app.$data.backEndAddressServiceWrapper;
} else if(OSInfo.version == 'linux'){
app.$data.command = "./EasySpider/resources/app/chrome_linux64/easyspider_executestage --id " + app.$data.ID.toString() + " --user_data " + (app.$data.with_user_data ? "1" : "0") + " --server_address " + app.$data.backEndAddressServiceWrapper;
} else if(OSInfo.version == 'mac'){
app.$data.command = "./easyspider_executestage --id " + app.$data.ID.toString() + " --user_data " + (app.$data.with_user_data ? "1" : "0") + " --server_address " + app.$data.backEndAddressServiceWrapper;
}
}
$.get(app.$data.backEndAddressServiceWrapper + "/queryTask?id=" + sId, function (result) {
app.$data.task = result;
app.$data.show = true;
@ -289,7 +305,9 @@
this.send(JSON.stringify(message));
};
ws.onmessage = function(message){
app.$data.config_folder = message.data;
message = JSON.parse(message.data);
app.$data.config_folder = message.config_folder;
app.$data.easyspider_location = message.easyspider_location;
}
ws.onclose = function () {
// 关闭 websocket

+ 54
- 2
ElectronJS/src/taskGrid/logic_CN.js View File

@ -110,6 +110,10 @@ function addParameters(t) {
useLoop: false, //是否使用循环中的元素
xpath: "", //xpath
wait: 0, //执行后等待
beforeJS: "", //执行前执行的js
beforeJSWaitTime: 0, //执行前js等待时间
afterJS: "", //执行后执行的js
afterJSWaitTime: 0, //执行后js等待时间
}; //公共参数处理
if (t.option == 1) {
t["parameters"]["url"] = "about:blank";
@ -120,10 +124,23 @@ function addParameters(t) {
t["parameters"]["scrollType"] = 0; //滚动类型,0不滚动,1向下滚动1屏,2滚动到底部
t["parameters"]["scrollCount"] = 0; //滚动次数
t["parameters"]["paras"] = []; //默认参数列表
t["parameters"]["beforeJS"] = ""; //执行前执行的js
t["parameters"]["beforeJSWaitTime"] = 0; //执行前js等待时间
t["parameters"]["afterJS"] = ""; //执行后执行的js
t["parameters"]["afterJSWaitTime"] = 0; //执行后js等待时间
} else if (t.option == 3) { //提取数据
t["parameters"]["paras"] = []; //默认参数列表
} else if (t.option == 4) { //输入文字
t["parameters"]["value"] = "";
t["parameters"]["beforeJS"] = ""; //执行前执行的js
t["parameters"]["beforeJSWaitTime"] = 0; //执行前js等待时间
t["parameters"]["afterJS"] = ""; //执行后执行的js
t["parameters"]["afterJSWaitTime"] = 0; //执行后js等待时间
} else if(t.option == 5) { //自定义操作
t["parameters"]["codeMode"] = 0; //代码模式,0代表JS, 2代表系统级别
t["parameters"]["code"] = "";
t["parameters"]["waitTime"] = 0; //最长等待时间
t["parameters"]["recordASField"] = 0; //是否记录脚本输出
} else if (t.option == 8) { //循环
t["parameters"]["scrollType"] = 0; //滚动类型,0不滚动,1向下滚动1屏,2滚动到底部
t["parameters"]["scrollCount"] = 0; //滚动次数
@ -131,6 +148,8 @@ function addParameters(t) {
t["parameters"]["xpath"] = "";
t["parameters"]["pathList"] = "";
t["parameters"]["textList"] = "";
t["parameters"]["code"] = ""; //执行的代码
t["parameters"]["waitTime"] = 0; //最长等待时间
t["parameters"]["exitCount"] = 0; //执行多少次后退出循环,0代表不设置此条件
t["parameters"]["historyWait"] = 2; //历史记录回退时间,用于循环点击每个链接的情况下点击链接后不打开新标签页的情况
} else if (t.option == 9) { //条件
@ -138,6 +157,8 @@ function addParameters(t) {
} else if (t.option == 10) { //条件分支
t["parameters"]["class"] = 0; //0代表什么条件都没有,1代表当前页面包括文本,2代表当前页面包括元素,3代表当前循环包括文本,4代表当前循环包括元素
t["parameters"]["value"] = ""; //相关值
t["parameters"]["code"] = ""; //code
t["parameters"]["waitTime"] = 0; //最长等待时间
}
}
@ -153,12 +174,15 @@ function modifyParameters(t, para) {
} else if (t.option == 4) { //输入文字事件
t["parameters"]["value"] = para["value"];
t["parameters"]["xpath"] = para["xpath"];
t["parameters"]["allXPaths"] = para["allXPaths"];
} else if (t.option == 2) { //鼠标点击事件
t["parameters"]["xpath"] = para["xpath"];
t["parameters"]["useLoop"] = para["useLoop"];
t["parameters"]["allXPaths"] = para["allXPaths"];
} else if (t.option == 8) { //循环事件
t["parameters"]["loopType"] = para["loopType"];
t["parameters"]["xpath"] = para["xpath"];
t["parameters"]["allXPaths"] = para["allXPaths"];
if (para["nextPage"]) { //循环点击下一页的情况下
t["title"] = "循环点击下一页"
} else {
@ -171,6 +195,12 @@ function modifyParameters(t, para) {
} else if (t.option == 3) { //采集数据
for (let i = 0; i < para["parameters"].length; i++) {
para["parameters"][i]["default"] = ""; //找不到元素时候的默认值
para["parameters"][i]["beforeJS"] = ""; //执行前执行的js
para["parameters"][i]["beforeJSWaitTime"] = 0; //执行前js等待时间
para["parameters"][i]["JS"] = ""; //如果是JS,需要执行的js
para["parameters"][i]["JSWaitTime"] = 0; //JS等待时间
para["parameters"][i]["afterJS"] = ""; //执行后执行的js
para["parameters"][i]["afterJSWaitTime"] = 0; //执行后js等待时间
}
t["parameters"]["paras"] = para["parameters"];
}
@ -233,7 +263,7 @@ function saveService(type) {
nodeId: i, //记录操作位于的节点位置,重要!!!
nodeName: nodeList[i]["title"],
value: nodeList[i]["parameters"]["links"],
desc: "要采集的网址列表,多行以\\n分开",
desc: "要采集的网址列表多行以\\n分开",
type: "string",
exampleValue: nodeList[i]["parameters"]["links"]
});
@ -256,7 +286,7 @@ function saveService(type) {
}
} else if (nodeList[i]["option"] == 8) //循环操作
{
if (parseInt(nodeList[i]["parameters"]["loopType"]) > 2) { //循环中的循环输入文本或循环输入网址
if (parseInt(nodeList[i]["parameters"]["loopType"]) > 2 && parseInt(nodeList[i]["parameters"]["loopType"]) < 5) { //循环中的循环输入文本或循环输入网址
inputParameters.push({
id: inputIndex,
name: "loopText_" + inputIndex++,
@ -293,6 +323,28 @@ function saveService(type) {
});
}
}
} else if (nodeList[i]["option"] == 5) //自定义操作
{
if (nodeList[i]["parameters"]["recordASField"]) {
let id = outputIndex++;
let title = nodeList[i]["title"];
if (outputNames.indexOf(title) >= 0) { //参数名称已经被添加
$('#myModal').modal('hide');
$("#tip2").slideDown(); //提示框
fadeout = setTimeout(function() {
$("#tip2").slideUp();
}, 5000);
return;
}
outputNames.push(title);
outputParameters.push({
id: id,
name: title,
desc: "自定义操作返回的数据",
type: "string",
exampleValue: "",
});
}
} else if (nodeList[i]["option"] == 9) //条件判断
{
containJudge = true;

+ 1
- 1
ElectronJS/tasks/11.json
File diff suppressed because it is too large
View File


+ 1
- 1
ElectronJS/tasks/15.json
File diff suppressed because it is too large
View File


+ 1
- 1
ElectronJS/tasks/16.json
File diff suppressed because it is too large
View File


+ 1
- 1
ElectronJS/tasks/17.json
File diff suppressed because it is too large
View File


+ 1
- 1
ElectronJS/tasks/19.json
File diff suppressed because it is too large
View File


+ 1
- 1
ElectronJS/tasks/2.json
File diff suppressed because it is too large
View File


+ 1
- 1
ElectronJS/tasks/20.json View File

@ -1 +1 @@
{"id": 20, "name": "Bilibili\u7c89\u4e1d", "url": "https://space.bilibili.com/291929894/fans/fans", "links": "https://space.bilibili.com/291929894/fans/fans", "containJudge": false, "desc": "https://space.bilibili.com/291929894/fans/fans", "inputParameters": [{"id": 0, "name": "urlList_0", "nodeId": 1, "nodeName": "Open Page", "value": "https://space.bilibili.com/291929894/fans/fans", "desc": "List of URLs to be collected, separated by \\n for multiple lines", "type": "string", "exampleValue": "https://space.bilibili.com/291929894/fans/fans"}, {"id": 1, "name": "loopTimes_Loop_1", "nodeId": 2, "nodeName": "Loop", "desc": "Number of loop executions, 0 means unlimited loops (until element not found)", "type": "int", "exampleValue": 0, "value": 0}], "outputParameters": [{"id": 0, "name": "\u53c2\u65701_\u6587\u672c", "desc": "", "type": "string", "exampleValue": "\u5bf9\u65b9\u7b54\u590d5"}], "graph": [{"index": 0, "id": 0, "parentId": 0, "type": -1, "option": 0, "title": "root", "sequence": [1, 2], "parameters": {"history": 1, "tabIndex": 0, "useLoop": false, "xpath": "", "wait": 0}, "isInLoop": false}, {"id": 1, "index": 1, "parentId": 0, "type": 0, "option": 1, "title": "Open Page", "sequence": [], "isInLoop": false, "position": 0, "parameters": {"useLoop": false, "xpath": "", "wait": 0, "url": "https://space.bilibili.com/291929894/fans/fans", "links": "https://space.bilibili.com/291929894/fans/fans", "scrollType": 0, "scrollCount": 0}}, {"id": 2, "index": 2, "parentId": 0, "type": 1, "option": 8, "title": "Loop", "sequence": [4], "isInLoop": false, "position": 1, "parameters": {"history": 4, "tabIndex": -1, "useLoop": false, "xpath": "//a[contains(text(),\"\u4e0b\u4e00\u9875\")]", "wait": 0, "scrollType": 0, "scrollCount": 0, "loopType": 0, "pathList": "", "textList": "", "exitCount": 0, "historyWait": 2}}, {"id": -1, "index": 3, "parentId": 2, "type": 0, "option": 2, "title": "Click Element", "sequence": [], "isInLoop": true, "position": 1, "parameters": {"history": 4, "tabIndex": -1, "useLoop": true, "xpath": "//*[@id=\"page-follows\"]/div[1]/div[2]/div[2]/div[2]/ul[2]/li[7]", "wait": 1, "scrollType": 0, "scrollCount": 0, "paras": [], "loopType": 0}}, {"id": 3, "index": 4, "parentId": 2, "type": 1, "option": 8, "title": "Loop", "sequence": [5], "isInLoop": true, "position": 0, "parameters": {"history": 4, "tabIndex": -1, "useLoop": false, "xpath": "/html/body/div[2]/div[4]/div[1]/div[1]/div[1]/div[2]/div[2]/div[2]/ul[1]/li/div[2]/a[1]/span[1]", "wait": 0, "scrollType": 0, "scrollCount": 0, "loopType": 1, "pathList": "", "textList": "", "exitCount": 0, "historyWait": 2}}, {"id": 4, "index": 5, "parentId": 3, "type": 0, "option": 3, "title": "Extract Data", "sequence": [], "isInLoop": true, "position": 0, "parameters": {"history": 4, "tabIndex": -1, "useLoop": false, "xpath": "", "wait": 0, "paras": [{"nodeType": 0, "contentType": 0, "relative": true, "name": "\u53c2\u65701_\u6587\u672c", "desc": "", "relativeXpath": "", "exampleValues": [{"num": 0, "value": "\u5bf9\u65b9\u7b54\u590d5"}], "default": ""}], "loopType": 1}}]}
{"id": 20, "name": "Bilibili\u7c89\u4e1d", "url": "https://space.bilibili.com/291929894/fans/fans", "links": "https://space.bilibili.com/291929894/fans/fans", "containJudge": false, "desc": "https://space.bilibili.com/291929894/fans/fans", "inputParameters": [{"id": 0, "name": "urlList_0", "nodeId": 1, "nodeName": "Open Page", "value": "https://space.bilibili.com/291929894/fans/fans", "desc": "List of URLs to be collected, separated by \\n for multiple lines", "type": "string", "exampleValue": "https://space.bilibili.com/291929894/fans/fans"}, {"id": 1, "name": "loopTimes_Loop_1", "nodeId": 2, "nodeName": "Loop", "desc": "Number of loop executions, 0 means unlimited loops (until element not found)", "type": "int", "exampleValue": 0, "value": 0}], "outputParameters": [{"id": 0, "name": "\u53c2\u65701_\u6587\u672c", "desc": "", "type": "string", "exampleValue": "\u5bf9\u65b9\u7b54\u590d5"}], "graph": [{"index": 0, "id": 0, "parentId": 0, "type": -1, "option": 0, "title": "root", "sequence": [1, 2], "parameters": {"history": 1, "tabIndex": 0, "useLoop": false, "xpath": "", "wait": 0}, "isInLoop": false}, {"id": 1, "index": 1, "parentId": 0, "type": 0, "option": 1, "title": "Open Page", "sequence": [], "isInLoop": false, "position": 0, "parameters": {"useLoop": false, "xpath": "", "wait": 0, "url": "https://space.bilibili.com/291929894/fans/fans", "links": "https://space.bilibili.com/291929894/fans/fans", "scrollType": 0, "scrollCount": 0}}, {"id": 2, "index": 2, "parentId": 0, "type": 1, "option": 8, "title": "Loop", "sequence": [4], "isInLoop": false, "position": 1, "parameters": {"history": 4, "tabIndex": -1, "useLoop": false, "xpath": "//a[contains(text(),\"\u4e0b\u4e00\u9875\")]", "wait": 0, "scrollType": 0, "scrollCount": 0, "loopType": 0, "pathList": "", "textList": "", "exitCount": 0, "historyWait": 2}}, {"id": -1, "index": 3, "parentId": 2, "type": 0, "option": 2, "title": "Click Element", "sequence": [], "isInLoop": true, "position": 1, "parameters": {"history": 4, "tabIndex": -1, "useLoop": true, "xpath": "//*[@id=\"page-follows\"]/div[1]/div[2]/div[2]/div[2]/ul[2]/li[7]", "wait": 1, "scrollType": 0, "scrollCount": 0, "paras": [], "loopType": 0}}, {"id": 3, "index": 4, "parentId": 2, "type": 1, "option": 8, "title": "Loop", "sequence": [5], "isInLoop": true, "position": 0, "parameters": {"history": 4, "tabIndex": -1, "useLoop": false, "xpath": "/html/body/div[2]/div[4]/div[1]/div[1]/div[1]/div[2]/div[2]/div[2]/ul[1]/li/div[2]/a[1]/span[1]", "wait": 0, "scrollType": 0, "scrollCount": 0, "loopType": 1, "pathList": "", "textList": "", "exitCount": 0, "historyWait": 2}}, {"id": 4, "index": 5, "parentId": 3, "type": 0, "option": 3, "title": "Extract Data", "sequence": [], "isInLoop": true, "position": 0, "parameters": {"history": 4, "tabIndex": -1, "useLoop": false, "xpath": "", "wait": 0, "paras": [{"nodeType": 0, "contentType": 0, "relative": true, "name": "\u53c2\u65701_\u6587\u672c", "desc": "", "relativeXPath": "", "exampleValues": [{"num": 0, "value": "\u5bf9\u65b9\u7b54\u590d5"}], "default": ""}], "loopType": 1}}]}

+ 1
- 1
ElectronJS/tasks/23.json
File diff suppressed because it is too large
View File


+ 1
- 1
ElectronJS/tasks/25.json
File diff suppressed because it is too large
View File


+ 1
- 1
ElectronJS/tasks/27.json
File diff suppressed because it is too large
View File


+ 1
- 1
ElectronJS/tasks/28.json
File diff suppressed because it is too large
View File


+ 1
- 1
ElectronJS/tasks/29.json
File diff suppressed because it is too large
View File


+ 1
- 1
ElectronJS/tasks/30.json
File diff suppressed because it is too large
View File


+ 1
- 1
ElectronJS/tasks/31.json
File diff suppressed because it is too large
View File


+ 1
- 1
ElectronJS/tasks/32.json View File

@ -1 +1 @@
{"id":32,"name":"知乎_登录后采集","url":"https://www.zhihu.com","links":"https://www.zhihu.com","containJudge":false,"desc":"https://www.zhihu.com\n使用带用户配置的浏览器模式来先手工登录后保存信息,再接着执行。","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"历史上有哪些通过“正当手段”干出不正当事的人物?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"scrollType":0,"scrollCount":0,"loopType":2,"pathList":"//*[contains(@class, \"css-0\")]/div[2]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[3]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[4]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[5]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[6]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[7]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[8]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[9]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[10]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[11]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[12]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[13]/div[1]/div[1]/div[1]/h2[1]/div[1]","textList":"","exitCount":0,"historyWait":2}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","relativeXpath":"","exampleValues":[{"num":0,"value":"历史上有哪些通过“正当手段”干出不正当事的人物?"},{"num":1,"value":"新加坡有哪些不好的地方?"},{"num":2,"value":"孙悟空可以秒杀山村老尸那样的厉鬼吗?"},{"num":3,"value":"为什么渐渐厌倦玩《原神》了?"},{"num":4,"value":"历史上有哪些著名的考古乌龙事件?"},{"num":5,"value":"苹果公司为什么能把用户调教得这么好?"},{"num":6,"value":"哪个瞬间让你发现了世界的bug?"},{"num":7,"value":"假如中国的院士,想为亲属谋体制内的工作,难度大吗?为什么?"},{"num":8,"value":"你一直珍藏的视频是哪个?"},{"num":9,"value":"如何评价《原神》角色艾莉丝?"},{"num":10,"value":"索罗斯如何做空的英镑、泰铢?为什么做空香港失败了?"},{"num":11,"value":"如何在婚前认清并杜绝王力宏这种男人?"}],"default":""}],"loopType":2}}]}
{"id":32,"name":"知乎_登录后采集","url":"https://www.zhihu.com","links":"https://www.zhihu.com","containJudge":false,"desc":"https://www.zhihu.com\n使用带用户配置的浏览器模式来先手工登录后保存信息,再接着执行。","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"历史上有哪些通过“正当手段”干出不正当事的人物?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"scrollType":0,"scrollCount":0,"loopType":2,"pathList":"//*[contains(@class, \"css-0\")]/div[2]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[3]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[4]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[5]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[6]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[7]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[8]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[9]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[10]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[11]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[12]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[13]/div[1]/div[1]/div[1]/h2[1]/div[1]","textList":"","exitCount":0,"historyWait":2}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","relativeXPath":"","exampleValues":[{"num":0,"value":"历史上有哪些通过“正当手段”干出不正当事的人物?"},{"num":1,"value":"新加坡有哪些不好的地方?"},{"num":2,"value":"孙悟空可以秒杀山村老尸那样的厉鬼吗?"},{"num":3,"value":"为什么渐渐厌倦玩《原神》了?"},{"num":4,"value":"历史上有哪些著名的考古乌龙事件?"},{"num":5,"value":"苹果公司为什么能把用户调教得这么好?"},{"num":6,"value":"哪个瞬间让你发现了世界的bug?"},{"num":7,"value":"假如中国的院士,想为亲属谋体制内的工作,难度大吗?为什么?"},{"num":8,"value":"你一直珍藏的视频是哪个?"},{"num":9,"value":"如何评价《原神》角色艾莉丝?"},{"num":10,"value":"索罗斯如何做空的英镑、泰铢?为什么做空香港失败了?"},{"num":11,"value":"如何在婚前认清并杜绝王力宏这种男人?"}],"default":""}],"loopType":2}}]}

+ 1
- 1
ElectronJS/tasks/34.json View File

@ -1 +1 @@
{"id":34,"name":"新web采adsf","url":"https://www.jd.com","links":"https://www.jd.com","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"提取数据","nodeId":2,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"string","exampleValue":"赛跟","value":"赛跟"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"url":"https://www.jd.com","links":"https://www.jd.com","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":0,"option":4,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","wait":0,"value":"赛跟"}},{"id":3,"index":3,"parentId":0,"type":0,"option":2,"title":"打开网页","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"search-btn\"]/i[1]","wait":0,"scrollType":0,"scrollCount":0,"paras":[]}}]}
{"id":34,"name":"新web采adsf","url":"https://www.jd.com","links":"https://www.jd.com","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"url":"https://www.jd.com","links":"https://www.jd.com","scrollType":0,"scrollCount":0}},{"id":-1,"index":2,"parentId":0,"type":0,"option":4,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","wait":0,"value":"赛跟"}},{"id":2,"index":3,"parentId":0,"type":0,"option":2,"title":"打开网页","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"search-btn\"]/i[1]","wait":0,"scrollType":0,"scrollCount":0,"paras":[]}}]}

+ 148
- 0
ElectronJS/tasks/35.json View File

@ -0,0 +1,148 @@
{
"id": 35,
"name": "测试",
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"containJudge": false,
"desc": "https://www.jd.com",
"inputParameters": [
{
"id": 0,
"name": "urlList_0",
"nodeId": 1,
"nodeName": "打开网页",
"value": "https://www.jd.com",
"desc": "要采集的网址列表,多行以\\n分开",
"type": "string",
"exampleValue": "https://www.jd.com"
}
],
"outputParameters": [
{
"id": 0,
"name": "参数1_背景图片地址",
"desc": "",
"type": "string",
"exampleValue": "url(\"https://i.ebayimg.com/images/g/jyoAAOSwVl1kUFr-/s-l200.webp\")"
}
],
"graph": [
{
"index": 0,
"id": 0,
"parentId": 0,
"type": -1,
"option": 0,
"title": "root",
"sequence": [
1,
4,
2,
3
],
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0
},
"isInLoop": false
},
{
"id": 1,
"index": 1,
"parentId": 0,
"type": 0,
"option": 1,
"title": "打开网页",
"sequence": [],
"isInLoop": false,
"position": 0,
"parameters": {
"useLoop": false,
"xpath": "",
"wait": 0,
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"scrollType": 0,
"scrollCount": 0
}
},
{
"id": 3,
"index": 2,
"parentId": 0,
"type": 0,
"option": 3,
"title": "提取数据",
"sequence": [],
"isInLoop": false,
"position": 2,
"parameters": {
"history": 5,
"tabIndex": -1,
"useLoop": false,
"xpath": "",
"wait": 0,
"paras": [
{
"nodeType": 0,
"contentType": 4,
"relative": false,
"name": "参数1_背景图片地址",
"desc": "",
"relativeXPath": "/html/body/div[6]/div[5]/ul[1]/li[1]/a[1]/div[1]/div[1]",
"exampleValues": [
{
"num": 0,
"value": "url(\"https://i.ebayimg.com/images/g/jyoAAOSwVl1kUFr-/s-l200.webp\")"
}
],
"default": ""
}
]
}
},
{
"id": 4,
"index": 3,
"parentId": 0,
"type": 0,
"option": 5,
"title": "自定义操作",
"sequence": [],
"isInLoop": false,
"position": 3,
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0,
"codeMode": 0,
"code": "alert(\"My name is naibo\")"
}
},
{
"id": 2,
"index": 4,
"parentId": 0,
"type": 0,
"option": 5,
"title": "自定义操作",
"sequence": [],
"isInLoop": false,
"position": 1,
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0,
"codeMode": "1",
"code": "ping www.baidu.com"
}
}
]
}

+ 193
- 0
ElectronJS/tasks/36.json View File

@ -0,0 +1,193 @@
{
"id": 36,
"name": "JDJD",
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"containJudge": false,
"desc": "https://www.jd.com",
"inputParameters": [
{
"id": 0,
"name": "urlList_0",
"nodeId": 1,
"nodeName": "打开网页",
"value": "https://www.jd.com",
"desc": "要采集的网址列表,多行以\\n分开",
"type": "string",
"exampleValue": "https://www.jd.com"
}
],
"outputParameters": [
{
"id": 0,
"name": "参数1_页面标题",
"desc": "",
"type": "string",
"exampleValue": "京东全球版-专业的综合网上购物商城"
},
{
"id": 1,
"name": "参数2_页面URL",
"desc": "",
"type": "string",
"exampleValue": "https://global.jd.com/"
},
{
"id": 2,
"name": "参数3_背景图片地址",
"desc": "",
"type": "string",
"exampleValue": ""
}
],
"graph": [
{
"index": 0,
"id": 0,
"parentId": 0,
"type": -1,
"option": 0,
"title": "root",
"sequence": [
1,
3,
2,
4
],
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0
},
"isInLoop": false
},
{
"id": 1,
"index": 1,
"parentId": 0,
"type": 0,
"option": 1,
"title": "打开网页",
"sequence": [],
"isInLoop": false,
"position": 0,
"parameters": {
"useLoop": false,
"xpath": "",
"wait": 0,
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"scrollType": 0,
"scrollCount": 0
}
},
{
"id": 3,
"index": 2,
"parentId": 0,
"type": 0,
"option": 3,
"title": "提取数据",
"sequence": [],
"isInLoop": false,
"position": 2,
"parameters": {
"history": 4,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0,
"paras": [
{
"nodeType": 0,
"contentType": 6,
"relative": false,
"name": "参数1_页面标题",
"desc": "",
"extractType": 0,
"relativeXPath": "/html/body/div[4]",
"exampleValues": [
{
"num": 0,
"value": "京东全球版-专业的综合网上购物商城"
}
],
"default": ""
},
{
"nodeType": 0,
"contentType": 5,
"relative": false,
"name": "参数2_页面URL",
"desc": "",
"extractType": 0,
"relativeXPath": "/html/body/div[4]",
"exampleValues": [
{
"num": 0,
"value": "https://global.jd.com/"
}
]
},
{
"nodeType": 0,
"contentType": 4,
"relative": false,
"name": "参数3_背景图片地址",
"desc": "",
"extractType": 0,
"relativeXPath": "/html/body/div[4]/div[1]",
"exampleValues": [
{
"num": 0,
"value": ""
}
]
}
]
}
},
{
"id": 2,
"index": 3,
"parentId": 0,
"type": 0,
"option": 5,
"title": "自定义操作",
"sequence": [],
"isInLoop": false,
"position": 1,
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0,
"codeMode": 0,
"code": "alert(\"My name is Naibo Wang\")"
}
},
{
"id": 4,
"index": 4,
"parentId": 0,
"type": 0,
"option": 5,
"title": "自定义操作",
"sequence": [],
"isInLoop": false,
"position": 3,
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0,
"codeMode": "1",
"code": "ping www.baidu.com"
}
}
]
}

+ 1
- 0
ElectronJS/tasks/37.json
File diff suppressed because it is too large
View File


+ 1
- 0
ElectronJS/tasks/38.json
File diff suppressed because it is too large
View File


+ 1
- 0
ElectronJS/tasks/39.json View File

@ -0,0 +1 @@
{"id":39,"name":"百度文库","url":"https://wenku.baidu.com/view/98593fe25ff7ba0d4a7302768e9951e79b896989.html?fr=hp_Database&_wkts_=1684099456020","links":"https://wenku.baidu.com/view/98593fe25ff7ba0d4a7302768e9951e79b896989.html?fr=hp_Database&_wkts_=1684099456020","containJudge":false,"desc":"https://wenku.baidu.com/view/98593fe25ff7ba0d4a7302768e9951e79b896989.html?fr=hp_Database&_wkts_=1684099456020","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://wenku.baidu.com/view/98593fe25ff7ba0d4a7302768e9951e79b896989.html?fr=hp_Database&_wkts_=1684099456020","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://wenku.baidu.com/view/98593fe25ff7ba0d4a7302768e9951e79b896989.html?fr=hp_Database&_wkts_=1684099456020"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":10,"url":"https://wenku.baidu.com/view/98593fe25ff7ba0d4a7302768e9951e79b896989.html?fr=hp_Database&_wkts_=1684099456020","links":"https://wenku.baidu.com/view/98593fe25ff7ba0d4a7302768e9951e79b896989.html?fr=hp_Database&_wkts_=1684099456020","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//canvas","wait":0,"scrollType":0,"scrollCount":0,"loopType":1,"pathList":"","textList":"","exitCount":0,"historyWait":2}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"paras":[{"nodeType":0,"contentType":8,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","exampleValues":[{"num":0,"value":""},{"num":1,"value":""}],"default":""}],"loopType":1}}]}

+ 1
- 1
ElectronJS/tasks/4.json
File diff suppressed because it is too large
View File


+ 1
- 0
ElectronJS/tasks/40.json
File diff suppressed because it is too large
View File


+ 139
- 0
ElectronJS/tasks/41.json View File

@ -0,0 +1,139 @@
{
"id": 41,
"name": "新web采集任务",
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"containJudge": false,
"desc": "https://www.jd.com",
"inputParameters": [
{
"id": 0,
"name": "urlList_0",
"nodeId": 1,
"nodeName": "打开网页",
"value": "https://www.jd.com",
"desc": "要采集的网址列表,多行以\\n分开",
"type": "string",
"exampleValue": "https://www.jd.com"
}
],
"outputParameters": [
{
"id": 0,
"name": "参数1_图片地址",
"desc": "",
"type": "string",
"exampleValue": "//m.360buyimg.com/babel/s580x740_jfs/t1/142264/15/32716/14415/63b40389F4a38dbcf/47dcbd8c9f404498.jpg!q70.dpg"
}
],
"graph": [
{
"index": 0,
"id": 0,
"parentId": 0,
"type": -1,
"option": 0,
"title": "root",
"sequence": [
1,
2
],
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0
},
"isInLoop": false
},
{
"id": 1,
"index": 1,
"parentId": 0,
"type": 0,
"option": 1,
"title": "打开网页",
"sequence": [],
"isInLoop": false,
"position": 0,
"parameters": {
"useLoop": false,
"xpath": "",
"wait": 2,
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"scrollType": 0,
"scrollCount": 0
}
},
{
"id": 2,
"index": 2,
"parentId": 0,
"type": 1,
"option": 8,
"title": "循环",
"sequence": [
3
],
"isInLoop": false,
"position": 1,
"parameters": {
"history": 4,
"tabIndex": -1,
"useLoop": false,
"xpath": "/html/body/div[5]/div[1]/div[4]/div[1]/div[1]/div[1]/a/img[1]",
"wait": 0,
"scrollType": 0,
"scrollCount": 0,
"loopType": 1,
"pathList": "",
"textList": "",
"exitCount": 0,
"historyWait": 2
}
},
{
"id": 3,
"index": 3,
"parentId": 2,
"type": 0,
"option": 3,
"title": "提取数据",
"sequence": [],
"isInLoop": true,
"position": 0,
"parameters": {
"history": 4,
"tabIndex": -1,
"useLoop": false,
"xpath": "",
"wait": 0,
"paras": [
{
"nodeType": 4,
"contentType": 7,
"relative": true,
"name": "参数1_图片地址",
"desc": "",
"extractType": 0,
"relativeXPath": "",
"exampleValues": [
{
"num": 0,
"value": "//m.360buyimg.com/babel/s580x740_jfs/t1/142264/15/32716/14415/63b40389F4a38dbcf/47dcbd8c9f404498.jpg!q70.dpg"
},
{
"num": 1,
"value": "//m.360buyimg.com/babel/s580x740_jfs/t1/42759/37/22689/30450/63b40382F7411d238/d4622671799c75bd.jpg!q70.dpg"
}
],
"default": ""
}
],
"loopType": 1
}
}
]
}

+ 1
- 0
ElectronJS/tasks/42.json View File

@ -0,0 +1 @@
{"id":42,"name":"ebay截图","url":"https://www.ebay.com","links":"https://www.ebay.com","containJudge":false,"desc":"https://www.ebay.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.ebay.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.ebay.com"}],"outputParameters":[{"id":0,"name":"参数1_背景图片地址","desc":"","type":"string","exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"url":"https://www.ebay.com","links":"https://www.ebay.com","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"paras":[{"nodeType":0,"contentType":7,"relative":false,"name":"参数1_背景图片地址","desc":"","extractType":0,"relativeXPath":"/html/body","allXPaths":["/html/body","//body[contains(., '')]","//BODY[@class='desktop gh-flex']"],"exampleValues":[{"num":0,"value":""}],"default":""}]}}]}

+ 1
- 0
ElectronJS/tasks/43.json
File diff suppressed because it is too large
View File


+ 1
- 0
ElectronJS/tasks/44.json View File

@ -0,0 +1 @@
{"id":44,"name":"ebay自定义","url":"https://www.ebay.com","links":"https://www.ebay.com","containJudge":false,"desc":"https://www.ebay.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.ebay.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.ebay.com"}],"outputParameters":[{"id":0,"name":"自定义操作","desc":"自定义操作返回的数据","type":"string","exampleValue":""},{"id":1,"name":"自定义操作2","desc":"自定义操作返回的数据","type":"string","exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"url":"https://www.ebay.com","links":"https://www.ebay.com","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":0,"option":5,"title":"自定义操作","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"codeMode":0,"code":"return document.querySelectorAll(\"#mainContent > div.hl-cat-nav > ul > li.hl-cat-nav__active\")[0].tagName","waitTime":0,"recordASField":true}},{"id":3,"index":3,"parentId":0,"type":0,"option":5,"title":"自定义操作2","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"codeMode":"1","code":"python D:/tes123t.py --test 123","waitTime":0,"recordASField":true}}]}

+ 172
- 0
ElectronJS/tasks/45.json View File

@ -0,0 +1,172 @@
{
"id": 45,
"name": "定义JS操作",
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"containJudge": false,
"desc": "https://www.jd.com",
"inputParameters": [
{
"id": 0,
"name": "urlList_0",
"nodeId": 1,
"nodeName": "打开网页",
"value": "https://www.jd.com",
"desc": "要采集的网址列表,多行以\\n分开",
"type": "string",
"exampleValue": "https://www.jd.com"
},
{
"id": 1,
"name": "loopTimes_循环_1",
"nodeId": 3,
"nodeName": "循环",
"desc": "循环循环执行的次数(0代表无限循环)",
"type": "int",
"exampleValue": 0,
"value": 0
},
{
"id": 2,
"name": "inputText_2",
"nodeName": "输入文字",
"nodeId": 4,
"desc": "要输入的文本,如京东搜索框输入:电脑",
"type": "string",
"exampleValue": "",
"value": ""
}
],
"outputParameters": [],
"graph": [
{
"index": 0,
"id": 0,
"parentId": 0,
"type": -1,
"option": 0,
"title": "root",
"sequence": [
1,
3
],
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0
},
"isInLoop": false
},
{
"id": 1,
"index": 1,
"parentId": 0,
"type": 0,
"option": 1,
"title": "打开网页",
"sequence": [],
"isInLoop": false,
"position": 0,
"parameters": {
"useLoop": false,
"xpath": "",
"wait": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"scrollType": 0,
"scrollCount": 0
}
},
{
"id": 4,
"index": 2,
"parentId": 2,
"type": 0,
"option": 2,
"title": "点击元素",
"sequence": [],
"isInLoop": true,
"position": 1,
"parameters": {
"history": 4,
"tabIndex": 0,
"useLoop": true,
"xpath": "//*[contains(@ceM\")]/div[1]",
"wait": 0,
"beforeJS": "arguments[0].click()",
"beforeJSWaitTime": 4,
"afterJS": "arguments[0].innerText = \"test\"",
"afterJSWaitTime": 5,
"scrollType": 0,
"scrollCount": 0,
"paras": [],
"allXPaths": [
"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]",
"//div[contains(., '/手机/数码')]",
"//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']"
]
}
},
{
"id": 2,
"index": 3,
"parentId": 0,
"type": 1,
"option": 8,
"title": "循环",
"sequence": [
4,
2
],
"isInLoop": false,
"position": 1,
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "//*[contains(@class, \"LeftSide_menu_list__qXCeM\")]/div[1]",
"wait": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"scrollType": 0,
"scrollCount": 0,
"loopType": 0,
"pathList": "",
"textList": "",
"exitCount": 0,
"historyWait": 2
}
},
{
"id": 3,
"index": 4,
"parentId": 2,
"type": 0,
"option": 4,
"title": "输入文字",
"sequence": [],
"isInLoop": true,
"position": 0,
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0,
"beforeJS": "adsf",
"beforeJSWaitTime": 1,
"afterJS": "qwe",
"afterJSWaitTime": 2,
"value": ""
}
}
]
}

+ 1
- 0
ElectronJS/tasks/46.json View File

@ -0,0 +1 @@
{"id":46,"name":"新web采集任务","url":"https://www.jd.com","links":"https://www.jd.com","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"/手机/数码"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","exampleValues":[{"num":0,"value":"/手机/数码"}],"default":"","beforeJS":"asdf","beforeJSWaitTime":1,"afterJS":"asdfasdf","afterJSWaitTime":2}]}}]}

+ 278
- 0
ElectronJS/tasks/47.json View File

@ -0,0 +1,278 @@
{
"id": 47,
"name": "新web采集任务",
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"containJudge": false,
"desc": "https://www.jd.com",
"inputParameters": [
{
"id": 0,
"name": "urlList_0",
"nodeId": 1,
"nodeName": "打开网页",
"value": "https://www.jd.com",
"desc": "要采集的网址列表,多行以\\n分开",
"type": "string",
"exampleValue": "https://www.jd.com"
},
{
"id": 1,
"name": "inputText_1",
"nodeName": "输入文字",
"nodeId": 3,
"desc": "要输入的文本,如京东搜索框输入:电脑",
"type": "string",
"exampleValue": "123",
"value": "123"
}
],
"outputParameters": [
{
"id": 0,
"name": "参数1_文本",
"desc": "",
"type": "string",
"exampleValue": "/手机/数码"
}
],
"graph": [
{
"index": 0,
"id": 0,
"parentId": 0,
"type": -1,
"option": 0,
"title": "root",
"sequence": [
1,
2,
3,
4
],
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0
},
"isInLoop": false
},
{
"id": 1,
"index": 1,
"parentId": 0,
"type": 0,
"option": 1,
"title": "打开网页",
"sequence": [],
"isInLoop": false,
"position": 0,
"parameters": {
"useLoop": false,
"xpath": "",
"wait": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"scrollType": 0,
"scrollCount": 0
}
},
{
"id": 2,
"index": 2,
"parentId": 0,
"type": 0,
"option": 2,
"title": "点击元素",
"sequence": [],
"isInLoop": false,
"position": 1,
"parameters": {
"history": 4,
"tabIndex": 0,
"useLoop": false,
"xpath": "//*[contains(@class, \"LeftSide_menu_list__qXCeM\")]/div[1]",
"wait": 0,
"beforeJS": "4",
"beforeJSWaitTime": 0,
"afterJS": "3",
"afterJSWaitTime": 0,
"scrollType": 0,
"scrollCount": 0,
"paras": [],
"allXPaths": [
"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]",
"//div[contains(., '/手机/数码')]",
"//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']"
]
}
},
{
"id": 3,
"index": 3,
"parentId": 0,
"type": 0,
"option": 4,
"title": "输入文字",
"sequence": [],
"isInLoop": false,
"position": 2,
"parameters": {
"history": 4,
"tabIndex": 0,
"useLoop": false,
"xpath": "//*[@id=\"key\"]",
"wait": 0,
"beforeJS": "1",
"beforeJSWaitTime": 0,
"afterJS": "2",
"afterJSWaitTime": 0,
"value": "123",
"allXPaths": [
"/html/body/div[4]/div[1]/div[2]/div[1]/input[1]",
"//input[contains(., '')]",
"id(\"key\")",
"//INPUT[@class='text defcolor']"
]
}
},
{
"id": 4,
"index": 4,
"parentId": 0,
"type": 1,
"option": 8,
"title": "循环",
"sequence": [
5
],
"isInLoop": false,
"position": 3,
"parameters": {
"history": 4,
"tabIndex": 0,
"useLoop": false,
"xpath": "/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div",
"wait": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"scrollType": 0,
"scrollCount": 0,
"loopType": 1,
"pathList": "",
"textList": "",
"exitCount": 0,
"historyWait": 2,
"allXPaths": [
"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]",
"//div[contains(., '/手机/数码')]",
"//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']"
]
}
},
{
"id": 5,
"index": 5,
"parentId": 4,
"type": 0,
"option": 3,
"title": "提取数据",
"sequence": [],
"isInLoop": true,
"position": 0,
"parameters": {
"history": 4,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"paras": [
{
"nodeType": 0,
"contentType": 9,
"relative": true,
"name": "参数1_文本",
"desc": "",
"extractType": 0,
"relativeXPath": "",
"allXPaths": "",
"exampleValues": [
{
"num": 0,
"value": "/手机/数码"
},
{
"num": 1,
"value": "/家用电器"
},
{
"num": 2,
"value": "/电脑/办公"
},
{
"num": 3,
"value": "/家纺/家居/厨具"
},
{
"num": 4,
"value": "/家具/家装/灯具/工业品"
},
{
"num": 5,
"value": "/内衣/男装/女装/童装"
},
{
"num": 6,
"value": "/箱包/钟表/珠宝/女鞋"
},
{
"num": 7,
"value": "/运动/户外/男鞋"
},
{
"num": 8,
"value": "/汽车用品/车载电器"
},
{
"num": 9,
"value": "/母婴/洗护喂养"
},
{
"num": 10,
"value": "/玩具乐器/宠物生活"
},
{
"num": 11,
"value": "/家庭清洁/个人护理/计生情趣"
},
{
"num": 12,
"value": "/图书/童书/文学"
}
],
"default": "",
"beforeJS": "5",
"beforeJSWaitTime": 0,
"JS": "7",
"JSWaitTime": 0,
"afterJS": "6",
"afterJSWaitTime": 0
}
],
"loopType": 1
}
}
]
}

+ 1
- 0
ElectronJS/tasks/48.json
File diff suppressed because it is too large
View File


+ 1
- 0
ElectronJS/tasks/49.json View File

@ -0,0 +1 @@
{"id":49,"name":"任意代码条件判断示例","url":"https://www.jd.com","links":"https://www.jd.com","containJudge":true,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"/手机/数码"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[4],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":3,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":0,"loopType":"6","pathList":"","textList":"","code":"python D:/test.py --test 1","waitTime":4,"exitCount":0,"historyWait":2}},{"id":-1,"index":3,"parentId":4,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","exampleValues":[{"num":0,"value":"/手机/数码"},{"num":1,"value":"/家用电器"},{"num":2,"value":"/电脑/办公"},{"num":3,"value":"/家纺/家居/厨具"},{"num":4,"value":"/家具/家装/灯具/工业品"},{"num":5,"value":"/内衣/男装/女装/童装"},{"num":6,"value":"/箱包/钟表/珠宝/女鞋"},{"num":7,"value":"/运动/户外/男鞋"},{"num":8,"value":"/汽车用品/车载电器"},{"num":9,"value":"/母婴/洗护喂养"},{"num":10,"value":"/玩具乐器/宠物生活"},{"num":11,"value":"/家庭清洁/个人护理/计生情趣"},{"num":12,"value":"/图书/童书/文学"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0}],"loopType":1}},{"id":3,"index":4,"parentId":2,"type":2,"option":9,"title":"判断条件","sequence":[5,6],"isInLoop":true,"position":0,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0}},{"id":4,"parentId":3,"index":5,"type":3,"option":10,"title":"条件分支","sequence":[7],"isInLoop":true,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"class":"6","value":"","code":"python D:/test.py","waitTime":5},"position":0},{"id":5,"parentId":3,"index":6,"type":3,"option":10,"title":"条件分支","sequence":[],"isInLoop":true,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"class":0,"value":"","code":"","waitTime":0},"position":1},{"id":6,"index":7,"parentId":4,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":3,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG LeftSide_menu_hover__OCHiO']"],"exampleValues":[{"num":0,"value":"/手机/数码"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0}]}},{"index":8,"id":-1,"parentId":3,"type":3,"option":10,"title":"条件分支","sequence":[],"isInLoop":true,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"class":0,"value":"","code":"","waitTime":0},"position":0},{"index":9,"id":-1,"parentId":3,"type":3,"option":10,"title":"条件分支","sequence":[],"isInLoop":true,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"class":0,"value":"","code":"","waitTime":0},"position":0}]}

+ 1
- 1
ElectronJS/tasks/5.json
File diff suppressed because it is too large
View File


+ 291
- 0
ElectronJS/tasks/50.json View File

@ -0,0 +1,291 @@
{
"id": 50,
"name": "任意代码条件判",
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"containJudge": true,
"desc": "https://www.jd.com",
"inputParameters": [
{
"id": 0,
"name": "urlList_0",
"nodeId": 1,
"nodeName": "打开网页",
"value": "https://www.jd.com",
"desc": "要采集的网址列表,多行以\\n分开",
"type": "string",
"exampleValue": "https://www.jd.com"
},
{
"id": 1,
"name": "loopText_1",
"nodeId": 2,
"nodeName": "循环",
"desc": "要输入的文本/网址,多行以\\n分开",
"type": "string",
"exampleValue": "",
"value": ""
}
],
"outputParameters": [
{
"id": 0,
"name": "参数1_文本",
"desc": "",
"type": "string",
"exampleValue": "/手机/数码"
}
],
"graph": [
{
"index": 0,
"id": 0,
"parentId": 0,
"type": -1,
"option": 0,
"title": "root",
"sequence": [
1,
2
],
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0
},
"isInLoop": false
},
{
"id": 1,
"index": 1,
"parentId": 0,
"type": 0,
"option": 1,
"title": "打开网页",
"sequence": [],
"isInLoop": false,
"position": 0,
"parameters": {
"useLoop": false,
"xpath": "",
"wait": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"scrollType": 0,
"scrollCount": 0
}
},
{
"id": 2,
"index": 2,
"parentId": 0,
"type": 1,
"option": 8,
"title": "循环",
"sequence": [
4
],
"isInLoop": false,
"position": 1,
"parameters": {
"history": 4,
"tabIndex": -1,
"useLoop": false,
"xpath": "/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div",
"wait": 3,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"scrollType": 0,
"scrollCount": 0,
"loopType": "5",
"pathList": "",
"textList": "",
"code": "return document.body.scrollWidth > 1000",
"waitTime": 4,
"exitCount": 0,
"historyWait": 2
}
},
{
"id": 5,
"index": 3,
"parentId": 4,
"type": 0,
"option": 3,
"title": "提取数据",
"sequence": [],
"isInLoop": true,
"position": 0,
"parameters": {
"history": 4,
"tabIndex": -1,
"useLoop": false,
"xpath": "",
"wait": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"paras": [
{
"nodeType": 0,
"contentType": 0,
"relative": true,
"name": "参数1_文本",
"desc": "",
"extractType": 0,
"relativeXpath": "",
"exampleValues": [
{
"num": 0,
"value": "/手机/数码"
},
{
"num": 1,
"value": "/家用电器"
},
{
"num": 2,
"value": "/电脑/办公"
},
{
"num": 3,
"value": "/家纺/家居/厨具"
},
{
"num": 4,
"value": "/家具/家装/灯具/工业品"
},
{
"num": 5,
"value": "/内衣/男装/女装/童装"
},
{
"num": 6,
"value": "/箱包/钟表/珠宝/女鞋"
},
{
"num": 7,
"value": "/运动/户外/男鞋"
},
{
"num": 8,
"value": "/汽车用品/车载电器"
},
{
"num": 9,
"value": "/母婴/洗护喂养"
},
{
"num": 10,
"value": "/玩具乐器/宠物生活"
},
{
"num": 11,
"value": "/家庭清洁/个人护理/计生情趣"
},
{
"num": 12,
"value": "/图书/童书/文学"
}
],
"default": "",
"beforeJS": "",
"beforeJSWaitTime": 0,
"JS": "",
"JSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0
}
],
"loopType": 1
}
},
{
"id": 3,
"index": 4,
"parentId": 2,
"type": 2,
"option": 9,
"title": "判断条件",
"sequence": [
5
],
"isInLoop": true,
"position": 0,
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0
}
},
{
"id": 4,
"parentId": 3,
"index": 5,
"type": 3,
"option": 10,
"title": "条件分支",
"sequence": [
3
],
"isInLoop": true,
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"class": "5",
"value": "",
"code": "return document.body.scrollheight > 500",
"waitTime": 5
},
"position": 0
},
{
"id": -1,
"parentId": 3,
"index": 6,
"type": 3,
"option": 10,
"title": "条件分支",
"sequence": [],
"isInLoop": true,
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"class": 0,
"value": "",
"code": "",
"waitTime": 0
},
"position": 1
}
]
}

+ 1
- 0
ElectronJS/tasks/51.json View File

@ -0,0 +1 @@
{"id":51,"name":"循环内条件判断","url":"https://www.jd.com","links":"https://www.jd.com","containJudge":true,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"/手机/数码"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,5],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","scrollType":0,"scrollCount":0}},{"id":-1,"index":2,"parentId":0,"type":2,"option":9,"title":"判断条件","sequence":[3,4],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0}},{"id":-1,"parentId":2,"index":3,"type":3,"option":10,"title":"条件分支","sequence":[],"isInLoop":false,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"class":"5","value":"","code":"","waitTime":0},"position":0},{"id":-1,"parentId":2,"index":4,"type":3,"option":10,"title":"条件分支","sequence":[],"isInLoop":false,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"class":0,"value":"","code":"","waitTime":0},"position":1},{"id":2,"index":5,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[7],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":0,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":0,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']"]}},{"id":6,"index":6,"parentId":4,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"/手机/数码"},{"num":1,"value":"/家用电器"},{"num":2,"value":"/电脑/办公"},{"num":3,"value":"/家纺/家居/厨具"},{"num":4,"value":"/家具/家装/灯具/工业品"},{"num":5,"value":"/内衣/男装/女装/童装"},{"num":6,"value":"/箱包/钟表/珠宝/女鞋"},{"num":7,"value":"/运动/户外/男鞋"},{"num":8,"value":"/汽车用品/车载电器"},{"num":9,"value":"/母婴/洗护喂养"},{"num":10,"value":"/玩具乐器/宠物生活"},{"num":11,"value":"/家庭清洁/个人护理/计生情趣"},{"num":12,"value":"/图书/童书/文学"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0}],"loopType":1}},{"id":3,"index":7,"parentId":2,"type":2,"option":9,"title":"判断条件","sequence":[8,9],"isInLoop":true,"position":0,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0}},{"id":4,"parentId":3,"index":8,"type":3,"option":10,"title":"条件分支","sequence":[6],"isInLoop":true,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"class":"7","value":"","code":"return arguments[0].innerText.split(\"/\").length > 2","waitTime":0},"position":0},{"id":5,"parentId":3,"index":9,"type":3,"option":10,"title":"条件分支","sequence":[],"isInLoop":true,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"class":0,"value":"","code":"","waitTime":0},"position":1}]}

+ 1
- 1
ElectronJS/tasks/6.json
File diff suppressed because it is too large
View File


+ 1
- 1
ElectronJS/tasks/7.json
File diff suppressed because it is too large
View File


sks/services.js
File diff suppressed because it is too large
View File


+ 2
- 0
ExecuteStage/.gitignore View File

@ -10,3 +10,5 @@ Chrome/
Data/
tasks/
Application/
.history
execution_instances/

+ 18
- 0
ExecuteStage/.vscode/launch.json View File

@ -0,0 +1,18 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: EasySpider",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": true,
// "args": ["--id", "38", "--read_type", "local", "--headless", "1"]
"args": ["--id", "78", "--headless", "0"]
}
]
}

+ 6
- 0
ExecuteStage/config.json View File

@ -0,0 +1,6 @@
{
"webserver_address": "http://localhost",
"webserver_port": 8074,
"user_data_folder": "./user_data",
"absolute_user_data_folder": "/Users/naibowang/Documents/EasySpider/ElectronJS/user_data"
}

+ 272
- 63
ExecuteStage/easyspider_executestage.py View File

@ -1,8 +1,10 @@
# -*- coding: utf-8 -*-
import atexit # 遇到错误退出时应执行的代码
import atexit
import io # 遇到错误退出时应执行的代码
import json
from lib2to3.pgen2 import driver
import re
import subprocess
import sys
from urllib import parse
import base64
@ -26,6 +28,8 @@ import csv
import os
from selenium.webdriver.common.by import By
from commandline_config import Config
import pytesseract
from PIL import Image
saveName, log, OUTPUT, browser, SAVED = None, "", "", None, False
@ -88,9 +92,69 @@ def scrollDown(para, rt=""):
if rt != "":
rt.end()
def execute_code(codeMode, code, max_wait_time, element=None):
output = ""
if code == "":
return ""
if max_wait_time == 0:
max_wait_time = 999999
# print(codeMode, code)
if int(codeMode) == 0:
recordLog("Execute JavaScript:" + code)
recordLog("执行JavaScript:" + code)
browser.set_script_timeout(max_wait_time)
try:
output = browser.execute_script(code)
except:
output = ""
recordLog("JavaScript execution failed")
elif int(codeMode) == 2:
recordLog("Execute JavaScript for element:" + code)
recordLog("对元素执行JavaScript:" + code)
browser.set_script_timeout(max_wait_time)
try:
output = browser.execute_script(code, element)
except:
output = ""
recordLog("JavaScript execution failed")
elif int(codeMode) == 1:
recordLog("Execute System Call:" + code)
recordLog("执行系统命令:" + code)
# 执行系统命令,超时时间为5秒
try:
output = subprocess.run(code, capture_output=True, text=True, timeout=max_wait_time, encoding="utf-8")
# 输出命令返回值
output = output.stdout
except subprocess.TimeoutExpired:
# 命令执行时间超过5秒,抛出异常
recordLog("Command timed out")
recordLog("命令执行超时")
except:
recordLog("Command execution failed")
recordLog("命令执行失败")
return str(output)
def customOperation(node, loopValue):
paras = node["parameters"]
codeMode = paras["codeMode"]
code = paras["code"]
max_wait_time = int(paras["waitTime"])
output = execute_code(codeMode, code, max_wait_time)
recordASField = paras["recordASField"]
if recordASField:
global OUTPUT, outputParameters
outputParameters[node["title"]] = output
line = []
for value in outputParameters.values():
line.append(value)
print(value[:15], " ", end="")
print("")
OUTPUT.append(line)
# 执行节点关键函数部分
def excuteNode(nodeId, loopValue="", clickPath="", index=0):
def executeNode(nodeId, loopValue="", clickPath="", index=0):
node = procedure[nodeId]
WebDriverWait(browser, 10).until
# 等待元素出现才进行操作,10秒内未出现则报错
@ -99,7 +163,7 @@ def excuteNode(nodeId, loopValue="", clickPath="", index=0):
# 根据不同选项执行不同操作
if node["option"] == 0 or node["option"] == 10: # root操作,条件分支操作
for i in node["sequence"]: # 从根节点开始向下读取
excuteNode(i, loopValue, clickPath, index)
executeNode(i, loopValue, clickPath, index)
elif node["option"] == 1: # 打开网页操作
recordLog("openPage")
openPage(node["parameters"], loopValue)
@ -113,6 +177,8 @@ def excuteNode(nodeId, loopValue="", clickPath="", index=0):
saveData()
elif node["option"] == 4: # 输入文字
inputInfo(node["parameters"], loopValue)
elif node["option"] == 5: # 自定义操作
customOperation(node, loopValue)
elif node["option"] == 8: # 循环
recordLog("loop")
loopExcute(node, loopValue, clickPath, index) # 执行循环
@ -133,43 +199,61 @@ def excuteNode(nodeId, loopValue="", clickPath="", index=0):
def judgeExcute(node, loopElement, clickPath="", index=0):
rt = Time("IF Condition")
global bodyText # 引入bodyText
excuteBranchId = 0 # 要执行的BranchId
executeBranchId = 0 # 要执行的BranchId
for i in node["sequence"]:
cnode = procedure[i] # 获得条件分支
tType = int(cnode["parameters"]["class"]) # 获得判断条件类型
if tType == 0: # 什么条件都没有
excuteBranchId = i
executeBranchId = i
break
elif tType == 1: # 当前页面包含文本
try:
if bodyText.find(cnode["parameters"]["value"]) >= 0:
excuteBranchId = i
executeBranchId = i
break
except: # 找不到元素下一个条件
continue
elif tType == 2: # 当前页面包含元素
try:
if browser.find_element(By.XPATH, cnode["parameters"]["value"]):
excuteBranchId = i
executeBranchId = i
break
except: # 找不到元素或者xpath写错了,下一个条件
continue
elif tType == 3: # 当前循环元素包括文本
try:
if loopElement.text.find(cnode["parameters"]["value"]) >= 0:
excuteBranchId = i
executeBranchId = i
break
except: # 找不到元素或者xpath写错了,下一个条件
continue
elif tType == 4: # 当前循环元素包括元素
try:
if loopElement.find_element(By.XPATH, cnode["parameters"]["value"][1:]):
excuteBranchId = i
executeBranchId = i
break
except: # 找不到元素或者xpath写错了,下一个条件
continue
elif tType <= 7: # JS命令返回值
if tType == 5: # JS命令返回值等于
output = execute_code(0, cnode["parameters"]["code"], cnode["parameters"]["waitTime"])
elif tType == 6: # System
output = execute_code(1, cnode["parameters"]["code"], cnode["parameters"]["waitTime"])
elif tType == 7: # 针对当前循环项的JS命令返回值
output = execute_code(2, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], loopElement)
try:
if output.find("rue") != -1: # 如果返回值中包含true
code = 1
else:
code = int(output)
except:
code = 0
if code > 0:
executeBranchId = i
break
rt.end()
excuteNode(excuteBranchId, loopElement, clickPath, index)
if executeBranchId != 0:
executeNode(executeBranchId, loopElement, clickPath, index)
# 对循环的处理
@ -193,7 +277,7 @@ def loopExcute(node, loopValue, clickPath="", index=0):
element = browser.find_element(
By.XPATH, node["parameters"]["xpath"])
for i in node["sequence"]: # 挨个执行操作
excuteNode(i, element, node["parameters"]["xpath"], 0)
executeNode(i, element, node["parameters"]["xpath"], 0)
finished = True
Log("click: ", node["parameters"]["xpath"])
recordLog("click:" + node["parameters"]["xpath"])
@ -204,7 +288,7 @@ def loopExcute(node, loopValue, clickPath="", index=0):
recordLog("clickNotFound:" + node["parameters"]["xpath"])
for i in node["sequence"]: # 不带点击元素的把剩余的如提取数据的操作执行一遍
if node["option"] != 2:
excuteNode(i, None, node["parameters"]["xpath"], 0)
executeNode(i, None, node["parameters"]["xpath"], 0)
finished = True
break # 如果找不到元素,退出循环
finally:
@ -215,7 +299,7 @@ def loopExcute(node, loopValue, clickPath="", index=0):
recordLog("clickNotFound:" + node["parameters"]["xpath"])
for i in node["sequence"]: # 不带点击元素的把剩余的如提取数据的操作执行一遍
if node["option"] != 2:
excuteNode(i, None, node["parameters"]["xpath"], 0)
executeNode(i, None, node["parameters"]["xpath"], 0)
break # 如果找不到元素,退出循环
count = count + 1
@ -230,7 +314,7 @@ def loopExcute(node, loopValue, clickPath="", index=0):
node["parameters"]["xpath"])
for index in range(len(elements)):
for i in node["sequence"]: # 挨个顺序执行循环里所有的操作
excuteNode(i, elements[index],
executeNode(i, elements[index],
node["parameters"]["xpath"], index)
if browser.current_window_handle != thisHandle: # 如果执行完一次循环之后标签页的位置发生了变化
while True: # 一直关闭窗口直到当前标签页
@ -263,7 +347,7 @@ def loopExcute(node, loopValue, clickPath="", index=0):
try:
element = browser.find_element(By.XPATH, path)
for i in node["sequence"]: # 挨个执行操作
excuteNode(i, element, path, 0)
executeNode(i, element, path, 0)
if browser.current_window_handle != thisHandle: # 如果执行完一次循环之后标签页的位置发生了变化
while True: # 一直关闭窗口直到当前标签页
browser.close() # 关闭使用完的标签页
@ -294,7 +378,7 @@ def loopExcute(node, loopValue, clickPath="", index=0):
for text in textList:
recordLog("input: " + text)
for i in node["sequence"]: # 挨个执行操作
excuteNode(i, text, "", 0)
executeNode(i, text, "", 0)
elif int(node["parameters"]["loopType"]) == 4: # 固定网址列表
# tempList = node["parameters"]["textList"].split("\r\n")
urlList = list(
@ -306,7 +390,24 @@ def loopExcute(node, loopValue, clickPath="", index=0):
for url in urlList:
recordLog("input: " + url)
for i in node["sequence"]:
excuteNode(i, url, "", 0)
executeNode(i, url, "", 0)
elif int(node["parameters"]["loopType"]) <= 6: # 命令返回值
while True: # do while循环
if int(node["parameters"]["loopType"]) == 5: # JS
output = execute_code(0, node["parameters"]["code"], node["parameters"]["waitTime"])
elif int(node["parameters"]["loopType"]) == 6: # System
output = execute_code(1, node["parameters"]["code"], node["parameters"]["waitTime"])
try:
if output.find("rue") != -1: # 如果返回值中包含true
code = 1
else:
code = int(output)
except:
code = 0
if code <= 0:
break
for i in node["sequence"]: # 挨个执行操作
executeNode(i, code, node["parameters"]["xpath"], 0)
history["index"] = thisHistoryLength
history["handle"] = browser.current_window_handle
scrollDown(node["parameters"])
@ -393,9 +494,9 @@ def inputInfo(para, loopValue):
para["xpath"] + "Please try to set the wait time before executing this operation")
recordLog("Cannot find input box element:" +
para["xpath"] + "Please try to set the wait time before executing this operation")
exit()
# textbox.send_keys(Keys.CONTROL, 'a')
# textbox.send_keys(Keys.BACKSPACE)
execute_code(2, para["beforeJS"], para["beforeJSWaitTime"], textbox) # 执行前置JS
# Send the HOME key
textbox.send_keys(Keys.HOME)
# Send the SHIFT + END key combination
@ -406,6 +507,7 @@ def inputInfo(para, loopValue):
textbox.send_keys(loopValue)
else:
textbox.send_keys(para["value"])
execute_code(2, para["afterJS"], para["afterJSWaitTime"], textbox) # 执行后置js
global bodyText # 每次执行点击,输入元素和打开网页操作后,需要更新bodyText
bodyText = browser.find_element(By.CSS_SELECTOR, "body").text
rt.end()
@ -421,6 +523,16 @@ def clickElement(para, loopElement=None, clickPath="", index=0):
path = clickPath
else:
path = para["xpath"] # 不然使用元素定义的xpath
# 点击前对该元素执行一段JavaScript代码
try:
if para["beforeJS"] != "":
element = browser.find_element(By.XPATH, path)
execute_code(2, para["beforeJS"], para["beforeJSWaitTime"], element)
except:
Log("Cannot find element:" +
path + "Please try to set the wait time before executing this operation")
recordLog("Cannot find element:" +
path + "Please try to set the wait time before executing this operation")
tempHandleNum = len(browser.window_handles) # 记录之前的窗口位置
try:
script = 'var result = document.evaluate(`' + path + \
@ -437,7 +549,17 @@ def clickElement(para, loopElement=None, clickPath="", index=0):
recordLog(str(e))
time.sleep(0.5) # 点击之后等半秒
Log("Wait 0.5 second after clicking element")
time.sleep(random.uniform(1, 10)) # 生成一个a到b的小数等待时间
time.sleep(random.uniform(1, 3)) # 生成一个a到b的小数等待时间
# 点击前对该元素执行一段JavaScript代码
try:
if para["afterJS"] != "":
element = browser.find_element(By.XPATH, path)
execute_code(2, para["afterJS"], para["afterJSWaitTime"], element)
except:
Log("Cannot find element:" +
path + "Please try to set the wait time before executing this operation")
recordLog("Cannot find element:" +
path + "Please try to set the wait time before executing this operation")
if tempHandleNum != len(browser.window_handles): # 如果有新标签页的行为发生
browser.switch_to.window(browser.window_handles[-1]) # 跳转到新的标签页
history["handle"] = browser.current_window_handle
@ -483,49 +605,83 @@ def getData(para, loopElement, isInLoop=True, parentPath="", index=0):
rt = Time("Extract Data")
for p in para["paras"]:
content = ""
try:
if p["relative"]: # 是否相对xpath
if p["relativeXpath"] == "": # 相对xpath有时候就是元素本身,不需要二次查找
element = loopElement
if not (p["contentType"] == 5 or p["contentType"] == 6): # 如果不是页面标题或URL,去找元素
try:
if p["relative"]: # 是否相对xpath
if p["relativeXPath"] == "": # 相对xpath有时候就是元素本身,不需要二次查找
element = loopElement
else:
if p["relativeXPath"].find("//") >= 0: # 如果字串里有//即子孙查找,则不动语句
full_path = "(" + parentPath + \
p["relativeXPath"] + ")" + \
"[" + str(index + 1) + "]"
element = browser.find_element(By.XPATH, full_path)
else:
element = loopElement.find_element(By.XPATH,
p["relativeXPath"][1:])
else:
if p["relativeXpath"].find("//") >= 0: # 如果字串里有//即子孙查找,则不动语句
full_path = "(" + parentPath + \
p["relativeXpath"] + ")" + \
"[" + str(index + 1) + "]"
element = browser.find_element(By.XPATH, full_path)
element = browser.find_element(By.XPATH, p["relativeXPath"])
except NoSuchElementException: # 找不到元素的时候,使用默认值
# print(p)
try:
content = p["default"]
except Exception as e:
content = ""
outputParameters[p["name"]] = content
Log('Element %s not found, use default' % p["relativeXPath"])
recordLog('Element %s not found, use default' % p["relativeXPath"])
continue
except TimeoutException: # 超时的时候设置超时值
Log('time out after 10 seconds when getting data')
recordLog('time out after 10 seconds when getting data')
browser.execute_script('window.stop()')
if p["relative"]: # 是否相对xpath
if p["relativeXPath"] == "": # 相对xpath有时候就是元素本身,不需要二次查找
element = loopElement
else:
element = loopElement.find_element(By.XPATH,
p["relativeXpath"][1:])
else:
element = browser.find_element(By.XPATH, p["relativeXpath"])
except NoSuchElementException: # 找不到元素的时候,使用默认值
# print(p)
try:
content = p["default"]
except Exception as e:
content = ""
outputParameters[p["name"]] = content
Log('Element %s not found,use default' % p["relativeXpath"])
recordLog('Element %s not found, use default' % p["relativeXpath"])
continue
except TimeoutException: # 超时的时候设置超时值
Log('time out after 10 seconds when getting data')
recordLog('time out after 10 seconds when getting data')
browser.execute_script('window.stop()')
if p["relative"]: # 是否相对xpath
if p["relativeXpath"] == "": # 相对xpath有时候就是元素本身,不需要二次查找
element = loopElement
p["relativeXPath"][1:])
else:
element = loopElement.find_element(By.XPATH,
p["relativeXpath"][1:])
else:
element = browser.find_element(By.XPATH, p["relativeXpath"])
rt.end()
element = browser.find_element(By.XPATH, p["relativeXPath"])
rt.end()
try:
execute_code(2, p["beforeJS"], p["beforeJSWaitTime"], element) # 执行前置js
if p["contentType"] == 2:
content = element.get_attribute('innerHTML')
elif p["contentType"] == 3:
content = element.get_attribute('outerHTML')
elif p["contentType"] == 4:
# 获取元素的背景图片地址
bg_url = element.value_of_css_property('background-image')
# 清除背景图片地址中的多余字符
bg_url = bg_url.replace('url("', '').replace('")', '')
content = bg_url
elif p["contentType"] == 5:
content = browser.current_url
elif p["contentType"] == 6:
content = browser.title
elif p["contentType"] == 7:
# 获取整个网页的高度和宽度
height = browser.execute_script("return document.body.scrollHeight");
width = browser.execute_script("return document.body.scrollWidth");
# 调整浏览器窗口的大小
browser.set_window_size(width, height)
element.screenshot("Data/" +saveName + "/"+ str(time.time()) + ".png")
elif p["contentType"] == 8:
try:
screenshot = element.screenshot_as_png
screenshot_stream = io.BytesIO(screenshot)
# 使用Pillow库打开截图,并转换为灰度图像
image = Image.open(screenshot_stream).convert('L')
# 使用Tesseract OCR引擎识别图像中的文本
text = pytesseract.image_to_string(image, lang='chi_sim+eng')
content = text
except Exception as e:
content = "OCR失败"
print("To use OCR, You need to install Tesseract-OCR and add it to the environment variable PATH: https://tesseract-ocr.github.io/tessdoc/Installation.html")
print("要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中:https://blog.csdn.net/u010454030/article/details/80515501")
elif p["contentType"] == 9:
content = execute_code(2, p["JS"], p["JSWaitTime"], element)
elif p["contentType"] == 1: # 只采集当期元素下的文本,不包括子元素
command = 'var arr = [];\
var content = arguments[0];\
@ -571,26 +727,58 @@ def getData(para, loopElement, isInLoop=True, parentPath="", index=0):
else:
content = ""
except StaleElementReferenceException: # 发生找不到元素的异常后,等待几秒重新查找
recordLog('StaleElementReferenceException:'+p["relativeXpath"])
recordLog('StaleElementReferenceException:'+p["relativeXPath"])
time.sleep(3)
try:
if p["relative"]: # 是否相对xpath
if p["relativeXpath"] == "": # 相对xpath有时候就是元素本身,不需要二次查找
if p["relativeXPath"] == "": # 相对xpath有时候就是元素本身,不需要二次查找
element = loopElement
recordLog('StaleElementReferenceException:loopElement')
else:
element = loopElement.find_element(By.XPATH,
p["relativeXpath"][1:])
p["relativeXPath"][1:])
recordLog(
'StaleElementReferenceException:loopElement+relativeXPath')
else:
element = browser.find_element(
By.XPATH, p["relativeXpath"])
recordLog('StaleElementReferenceException:relativeXpath')
By.XPATH, p["relativeXPath"])
recordLog('StaleElementReferenceException:relativeXPath')
if p["contentType"] == 2:
content = element.get_attribute('innerHTML')
elif p["contentType"] == 3:
content = element.get_attribute('outerHTML')
elif p["contentType"] == 4:
# 获取元素的背景图片地址
bg_url = element.value_of_css_property('background-image')
# 清除背景图片地址中的多余字符
bg_url = bg_url.replace('url("', '').replace('")', '')
content = bg_url
elif p["contentType"] == 5:
content = browser.current_url
elif p["contentType"] == 6:
content = browser.title
elif p["contentType"] == 7:
# 获取整个网页的高度和宽度
height = browser.execute_script("return document.body.scrollHeight");
width = browser.execute_script("return document.body.scrollWidth");
# 调整浏览器窗口的大小
browser.set_window_size(width, height)
element.screenshot("Data/" +saveName + "/"+ str(time.time()) + ".png")
elif p["contentType"] == 8:
try:
screenshot = element.screenshot_as_png
screenshot_stream = io.BytesIO(screenshot)
# 使用Pillow库打开截图,并转换为灰度图像
image = Image.open(screenshot_stream).convert('L')
# 使用Tesseract OCR引擎识别图像中的文本
text = pytesseract.image_to_string(image, lang='chi_sim+eng')
content = text
except Exception as e:
content = "OCR失败"
print("To use OCR, You need to install Tesseract-OCR and add it to the environment variable path: https://tesseract-ocr.github.io/tessdoc/Installation.html")
print("要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量path中:")
elif p["contentType"] == 9:
content = execute_code(2, p["JS"], p["JSWaitTime"], element)
elif p["contentType"] == 1: # 只采集当期元素下的文本,不包括子元素
command = 'var arr = [];\
var content = arguments[0];\
@ -636,9 +824,10 @@ def getData(para, loopElement, isInLoop=True, parentPath="", index=0):
else:
content = ""
except StaleElementReferenceException:
recordLog('StaleElementReferenceException:'+p["relativeXpath"])
recordLog('StaleElementReferenceException:'+p["relativeXPath"])
continue # 再出现类似问题直接跳过
outputParameters[p["name"]] = content
execute_code(2, p["afterJS"], p["afterJSWaitTime"], element) # 执行后置JS
global OUTPUT
line = []
for value in outputParameters.values():
@ -667,6 +856,7 @@ def saveData(exit=False):
f.close()
OUTPUT = []
log = ""
@atexit.register
def clean():
@ -683,7 +873,9 @@ if __name__ == '__main__':
"saved_file_name": "",
"read_type": "remote",
"user_data": False,
"config_folder": ""
"config_folder": "",
"config_name": "config.json",
"headless": False,
}
c = Config(config)
print(c)
@ -760,12 +952,18 @@ if __name__ == '__main__':
# 3. 就算User Profile相同,chrome版本不同所存储的cookie信息也不同,也不能爬
# 4. TMALL如果一直弹出验证码,而且无法通过验证,那么需要在其他浏览器上用
if c.user_data:
with open(c.config_folder + "config.json","r";) as f:
with open(c.config_folder + c.config_name,"r", encoding='utf-8';) as f:
config = json.load(f)
absolute_user_data_folder = config["absolute_user_data_folder"]
print("\nAbsolute_user_data_folder:",absolute_user_data_folder,"\n")
option.add_argument(f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒
option.add_argument("--profile-directory=Default")
if c.headless:
print("Headless mode")
print("无头模式")
option.add_argument("--headless")
options.add_argument("--headless")
# options.add_argument(
# '--user-data-dir=C:\\Users\\q9823\\AppData\\Local\\Google\\Chrome\\User Data') # TMALL 反扒
option.add_argument(
@ -791,15 +989,18 @@ if __name__ == '__main__':
else:
saveName = "task_" + str(id) + "_" + \
str(random.randint(0, 999999999)) # 保存文件的名字
print("saveName: ", saveName)
os.mkdir("Data/" + saveName) # 创建保存文件夹用来保存截图
backEndAddress = c.server_address
if c.read_type == "remote":
print("remote")
content = requests.get(backEndAddress + "/queryExecutionInstance?id=" + str(id))
service = json.loads(content.text) # 加载服务信息
else:
print("local")
with open("execution_instances/" + str(id) + ".json", 'r', encoding='utf-8') as f:
content = f.read()
service = json.loads(content.text) # 加载服务信息
service = json.loads(content) # 加载服务信息
print("name: ", service["name"])
procedure = service["graph"] # 程序执行流程
links = list(filter(isnull, service["links"].split("\n"))) # 要执行的link的列表
@ -818,9 +1019,17 @@ if __name__ == '__main__':
# 挨个执行程序
urlId = 0 # 全局记录变量
for i in range(len(links)):
excuteNode(0)
executeNode(0)
urlId = urlId + 1
files = os.listdir("Data/" + saveName)
# 如果目录为空,则删除该目录
if not files:
os.rmdir("Data/" + saveName)
print("Done!")
print("执行完成!")
recordLog("Done!")
# dataPath = os.path.abspath(os.path.join(os.getcwd(), "../Data"))
# with open("Data/"+saveName + '_log.txt', 'a', encoding='utf-8-sig') as file_obj:

+ 2
- 0
ExecuteStage/requirements.txt View File

@ -5,3 +5,5 @@ pymongo==4.2.0
requests==2.28.1
selenium==4.5.0
pyinstaller
Pillow
pytesseract

BIN
Extension/manifest_v3/EasySpider_en.crx View File


BIN
Extension/manifest_v3/EasySpider_zh.crx View File


+ 12
- 2
Extension/manifest_v3/package.js View File

@ -11,7 +11,12 @@ let config = fs.readFileSync(path.join(__dirname, `src/content-scripts/config.js
config = JSON.parse(config);
// 生成英文插件
removeDir(path.join(__dirname, `EasySpider_en`));
try{
removeDir(path.join(__dirname, `EasySpider_en`));
} catch (e) {
}
config.language = "en";
let data = JSON.stringify(config);
// write JSON string to a file
@ -47,7 +52,12 @@ fs.copyFileSync(path.join(__dirname, './EasySpider_en.crx'), path.join(__dirname
// 生成中文插件
removeDir(path.join(__dirname, `EasySpider_zh`));
try{
removeDir(path.join(__dirname, `EasySpider_zh`));
} catch (e) {
}
config.language = "zh";
data = JSON.stringify(config);
// write JSON string to a file

+ 90
- 26
Extension/manifest_v3/src/content-scripts/global.js View File

@ -17,7 +17,7 @@ export var global = {
defaultbgColor: 'rgba(221,221,255,0.8)',
boxShadowColor: "blue 0px 0px 5px",
lang: config.language,
id: "C" + Math.floor(Math.random() * (99999999)).toString(),
id: "C" + Math.floor(Math.random() * (99999999)).toString(), //处理不同标签页的handles,生成的id
ws: null,
};
@ -46,6 +46,27 @@ export function getOS () {
}
}
export function getElementXPaths(element, parentElement = document.body) {
const paths = [];
paths.push(readXPath(element,1, parentElement));
paths.push("//" + element.tagName.toLowerCase() + "[contains(., '" + element.textContent.slice(0, 10).trim() + "')]");
if (element.id) {
paths.push(`id("${element.id}")`);
}
if (element.className) {
paths.push("//" + element.tagName + "[@class='" + element.className + "']");
}
if (element.name) {
paths.push("//" + element.tagName + "[@name='" + element.name + "']");
}
if (element.alt) {
paths.push("//" + element.tagName + "[@alt='" + element.alt + "']");
}
console.log("ALL PATHS: " + paths);
return paths;
}
//返回element相对node节点的xpath,默认的node节点是: /
export function readXPath(element, type = 1, node = document.body) {
try {
@ -129,7 +150,7 @@ export function addEl() {
} else //不然只添加一个元素
{
clearReady(); //readylist清零重新算
global.nodeList.push({ node: global.NowNode, "step": global.step, bgColor: global.style, "boxShadow": global.NowNode.style.boxShadow == "" || global.boxShadowColor ? "none" : global.NowNode.style.boxShadow, xpath: readXPath(global.NowNode, 1) });
global.nodeList.push({ node: global.NowNode, "step": global.step, bgColor: global.style, "boxShadow": global.NowNode.style.boxShadow == "" || global.boxShadowColor ? "none" : global.NowNode.style.boxShadow, xpath: readXPath(global.NowNode, 1), "allXPaths": getElementXPaths(global.NowNode) });
global.NowNode.style.backgroundColor = global.selectedColor;
}
handleElement(); //处理新状态
@ -235,6 +256,12 @@ function parameterName(value){
case "参数": return "para";
case "_图片": return "_image";
case "_图片地址": return "_image_address";
case "背景图片地址": return "background_image_address";
case "_背景图片": return "_background_image";
case "页面URL": return "page_url";
case "页面标题": return "page_title";
case "_页面URL": return "_page_url";
case "_页面标题": return "_page_title";
default: return "";
}
}
@ -242,13 +269,12 @@ function parameterName(value){
//根据nodelist列表内的元素生成参数列表
//适合:nodelist中的元素为同类型元素
//type:0为全部文本 1为节点内直接的文字 2为innerhtml 3为outerhtml
//type:0为全部文本 1为节点内直接的文字 2为innerhtml 3为outerhtml 4为backgroundImg 5为当前页面URL 6为当前页面标题 7为元素截图 8为OCR识别
//nodetype:0,对应全type0123
//nodetype:1 链接,对应type0123
//nodetype:2 链接地址 对应type0
//nodetype:3 按钮和输入文本框 对应type
//nodetype:4 按钮和输入文本框 对应type
export function generateParameters(type, linktext = true, linkhref = true) {
clearParameters(false);
let n = 1;
@ -256,9 +282,11 @@ export function generateParameters(type, linktext = true, linkhref = true) {
let at = parseInt(new Date().getTime());
n = items.parameterNum;
let ndPath = "";
let ndAllXPaths = [];
for (let num = 0; num < global.nodeList.length; num++) {
let nd = global.nodeList[num]["node"];
ndPath = global.nodeList[num]["xpath"];
ndAllXPaths = global.nodeList[num]["allXPaths"];
global.outputParameterNodes.push({ "node": nd, "boxShadow": nd.style.boxShadow == "" || global.boxShadowColor ? "none" : nd.style.boxShadow });
nd.style.boxShadow = global.boxShadowColor;
let pname = parameterName("文本");
@ -298,6 +326,15 @@ export function generateParameters(type, linktext = true, linkhref = true) {
// ndText = $(nd).prop("outerHTML");
ndText = nd.outerHTML;
pname = "outerHTML";
} else if(type == 4){
ndText = nd.style.backgroundImage.replace(/url\((['"])?(.*?)\1\)/gi, '$2').split(',')[0];
pname = parameterName("背景图片地址");
} else if(type == 5){
ndText = window.location.href;
pname = parameterName("页面URL");
} else if(type == 6){
ndText = document.title;
pname = parameterName("页面标题");
}
if (num == 0) { //第一个节点新建,后面的增加即可
if (nd.tagName == "IMG") { //如果元素是图片
@ -307,7 +344,9 @@ export function generateParameters(type, linktext = true, linkhref = true) {
"relative": global.nodeList.length > 1 ? true : false, //是否为相对xpath路径
"name": parameterName("参数") + (n++) + parameterName("_图片") + pname,
"desc": "", //参数描述
"relativeXpath": global.nodeList.length > 1 ? "" : ndPath,
"extractType": 0, //提取方式 0 普通 1 OCR
"relativeXPath": global.nodeList.length > 1 ? "" : ndPath,
"allXPaths": global.nodeList.length > 1 ? "" : ndAllXPaths,
"exampleValues": [{ "num": num, "value": ndText }]
});
} else if (nd.tagName == "A") { //如果元素是超链接
@ -318,7 +357,9 @@ export function generateParameters(type, linktext = true, linkhref = true) {
"relative": global.nodeList.length > 1 ? true : false, //是否为相对xpath路径
"name": parameterName("参数") + (n++) + parameterName("_链接") + pname,
"desc": "", //参数描述
"relativeXpath": global.nodeList.length > 1 ? "" : ndPath,
"extractType": 0, //提取方式 0 普通 1 OCR
"relativeXPath": global.nodeList.length > 1 ? "" : ndPath,
"allXPaths": global.nodeList.length > 1 ? "" : ndAllXPaths,
"exampleValues": [{ "num": num, "value": ndText }]
});
}
@ -329,7 +370,8 @@ export function generateParameters(type, linktext = true, linkhref = true) {
"relative": global.nodeList.length > 1 ? true : false, //是否为相对xpath路径
"name": parameterName("参数") + (n++) + parameterName("_链接地址"),
"desc": "", //参数描述
"relativeXpath": global.nodeList.length > 1 ? "" : ndPath,
"relativeXPath": global.nodeList.length > 1 ? "" : ndPath,
"allXPaths": global.nodeList.length > 1 ? "" : ndAllXPaths,
"exampleValues": [{ "num": num, "value": nd.getAttribute("href") == null ? "" : nd.getAttribute("href") }]
});
}
@ -340,7 +382,9 @@ export function generateParameters(type, linktext = true, linkhref = true) {
"relative": global.nodeList.length > 1 ? true : false, //是否为相对xpath路径
"name": parameterName("参数") + (n++) + "_" + pname,
"desc": "", //参数描述
"relativeXpath": global.nodeList.length > 1 ? "" : ndPath,
"extractType": 0, //提取方式 0 普通 1 OCR
"relativeXPath": global.nodeList.length > 1 ? "" : ndPath,
"allXPaths": global.nodeList.length > 1 ? "" : ndAllXPaths,
"exampleValues": [{ "num": num, "value": ndText }]
});
} else { //其他所有情况
@ -350,7 +394,9 @@ export function generateParameters(type, linktext = true, linkhref = true) {
"relative": global.nodeList.length > 1 ? true : false, //是否为相对xpath路径
"name": parameterName("参数") + (n++) + "_" + pname,
"desc": "", //参数描述
"relativeXpath": global.nodeList.length > 1 ? "" : ndPath,
"extractType": 0, //提取方式 0 普通 1 OCR
"relativeXPath": global.nodeList.length > 1 ? "" : ndPath,
"allXPaths": global.nodeList.length > 1 ? "" : ndAllXPaths,
"exampleValues": [{ "num": num, "value": ndText }]
});
}
@ -384,10 +430,11 @@ export function generateMultiParameters() {
chrome.storage.local.get({ parameterNum: 1 }, function(items) {
let at = parseInt(new Date().getTime());
n = items.parameterNum;
let nd, ndText, ndPath, pname;
let nd, ndText, ndPath, pname, ndAllXPaths;
for (let num = 0; num < global.nodeList.length; num++) {
let nd = global.nodeList[num]["node"];
ndPath = global.nodeList[num]["xpath"];
ndAllXPaths = global.nodeList[num]["allXPaths"];
global.outputParameterNodes.push({ "node": nd, "boxShadow": nd.style.boxShadow == "" || global.boxShadowColor ? "none" : nd.style.boxShadow });
nd.style.boxShadow = global.boxShadowColor;
// ndText = $(nd).text();
@ -399,7 +446,8 @@ export function generateMultiParameters() {
"relative": false, //是否为相对xpath路径
"name": parameterName("参数") + (n++) + parameterName("_图片地址"),
"desc": "", //参数描述
"relativeXpath": ndPath,
"relativeXPath": ndPath,
"allXPaths": ndAllXPaths,
"exampleValues": [{ "num": 0, "value": nd.getAttribute("src") == null ? "" : nd.getAttribute("src") }]
});
} else if (nd.tagName == "A") { //如果元素是超链接
@ -409,7 +457,8 @@ export function generateMultiParameters() {
"relative": false, //是否为相对xpath路径
"name": parameterName("参数") + (n++) + parameterName("_链接文本"),
"desc": "", //参数描述
"relativeXpath": ndPath,
"relativeXPath": ndPath,
"allXPaths": ndAllXPaths,
"exampleValues": [{ "num": 0, "value": ndText }]
});
global.outputParameters.push({
@ -418,7 +467,8 @@ export function generateMultiParameters() {
"relative": false, //是否为相对xpath路径
"name": parameterName("参数") + (n++) + parameterName("_链接地址"),
"desc": "", //参数描述
"relativeXpath": ndPath,
"relativeXPath": ndPath,
"allXPaths": ndAllXPaths,
"exampleValues": [{ "num": 0, "value": nd.getAttribute("href") == null ? "" : nd.getAttribute("href") }]
});
} else if (nd.tagName == "INPUT") { //如果元素是输入项
@ -428,7 +478,8 @@ export function generateMultiParameters() {
"relative": false, //是否为相对xpath路径
"name": parameterName("参数") + (n++) + parameterName("_文本"),
"desc": "", //参数描述
"relativeXpath": ndPath,
"relativeXPath": ndPath,
"allXPaths": ndAllXPaths,
"exampleValues": [{ "num": 0, "value": nd.getAttribute("value") == null ? "" : nd.getAttribute("value") }]
});
} else { //其他所有情况
@ -438,7 +489,8 @@ export function generateMultiParameters() {
"relative": false, //是否为相对xpath路径
"name": parameterName("参数") + (n++) + parameterName("_文本"),
"desc": "", //参数描述
"relativeXpath": ndPath,
"relativeXPath": ndPath,
"allXPaths": ndAllXPaths,
"exampleValues": [{ "num": 0, "value": ndText }]
});
}
@ -459,7 +511,7 @@ export function handleDescendents() {
n = items.parameterNum;
clearParameters(); //清除原来的参数列表
global.app._data.selectedDescendents = true;
let nd, ndText, ndPath, pname;
let nd, ndText, ndPath, pname, ndAllPaths;
for (let num = 0; num < global.nodeList.length; num++) {
let tnode = global.nodeList[num]["node"];
let stack = new Array(); //深度优先搜索遍历元素
@ -470,9 +522,10 @@ export function handleDescendents() {
continue; //对A标签内的SPAN元素不进行处理,剪枝,此时子元素根本不加入stack,即实现了此功能
}
ndPath = readXPath(nd, 1, tnode);
ndAllPaths = getElementXPaths(nd, tnode);
let index = -1;
for (let i = 0; i < global.outputParameters.length; i++) {
if (global.outputParameters[i]["relativeXpath"] == ndPath) {
if (global.outputParameters[i]["relativeXPath"] == ndPath) {
index = i;
break;
}
@ -503,7 +556,8 @@ export function handleDescendents() {
"relative": global.nodeList.length > 1 ? true : false, //是否为相对xpath路径,注意当只选择了子元素没有选中全部的时候,需要判断
"name": parameterName("参数") + (n++) + parameterName("_图片地址"),
"desc": "", //参数描述
"relativeXpath": global.nodeList.length > 1 ? ndPath : readXPath(nd), //同理需要判断
"relativeXPath": global.nodeList.length > 1 ? ndPath : readXPath(nd), //同理需要判断
"allXPaths": global.nodeList.length > 1 ? ndAllPaths : getElementXPaths(nd),
"exampleValues": [{
"num": num,
"value": nd.getAttribute("src") == null ? "" : nd.getAttribute("src")
@ -516,7 +570,8 @@ export function handleDescendents() {
"relative": global.nodeList.length > 1 ? true : false, //是否为相对xpath路径
"name": parameterName("参数") + (n++) + parameterName("_链接文本"),
"desc": "", //参数描述
"relativeXpath": global.nodeList.length > 1 ? ndPath : readXPath(nd),
"relativeXPath": global.nodeList.length > 1 ? ndPath : readXPath(nd),
"allXPaths": global.nodeList.length > 1 ? ndAllPaths : getElementXPaths(nd),
"exampleValues": [{ "num": num, "value": nd.textContent }] //注意这里的ndtext是整个a的文字!!!
});
global.outputParameters.push({
@ -525,7 +580,8 @@ export function handleDescendents() {
"relative": global.nodeList.length > 1 ? true : false, //是否为相对xpath路径
"name": parameterName("参数") + (n++) + parameterName("_链接地址"),
"desc": "", //参数描述
"relativeXpath": global.nodeList.length > 1 ? ndPath : readXPath(nd),
"relativeXPath": global.nodeList.length > 1 ? ndPath : readXPath(nd),
"allXPaths": global.nodeList.length > 1 ? ndAllPaths : getElementXPaths(nd),
"exampleValues": [{
"num": num,
"value": nd.getAttribute("href") == null ? "" : nd.getAttribute("href")
@ -538,7 +594,8 @@ export function handleDescendents() {
"relative": global.nodeList.length > 1 ? true : false, //是否为相对xpath路径
"name": parameterName("参数") + (n++) + parameterName("_文本"),
"desc": "", //参数描述
"relativeXpath": global.nodeList.length > 1 ? ndPath : readXPath(nd),
"relativeXPath": global.nodeList.length > 1 ? ndPath : readXPath(nd),
"allXPaths": global.nodeList.length > 1 ? ndAllPaths : getElementXPaths(nd),
"exampleValues": [{
"num": num,
"value": nd.getAttribute("value") == null ? "" : nd.getAttribute("value")
@ -551,7 +608,8 @@ export function handleDescendents() {
"relative": global.nodeList.length > 1 ? true : false, //是否为相对xpath路径
"name": parameterName("参数") + (n++) + parameterName("_文本"),
"desc": "", //参数描述
"relativeXpath": global.nodeList.length > 1 ? ndPath : readXPath(nd),
"relativeXPath": global.nodeList.length > 1 ? ndPath : readXPath(nd),
"allXPaths": global.nodeList.length > 1 ? ndAllPaths : getElementXPaths(nd),
"exampleValues": [{ "num": num, "value": ndText }]
});
}
@ -630,7 +688,7 @@ export function findRelated() {
nodeIndexList.push(-1);
}
}
var tempPath = "";
let tempPath = "";
for (let i = nodeIndexList.length - 1; i >= 0; i--) {
if (nodeIndexList[i] == -1) { //没有索引值直接跳过
continue;
@ -638,10 +696,13 @@ export function findRelated() {
let tempIndexList = [...nodeIndexList]; //复刻一个index数组
tempIndexList[i] = -1; //删除索引值
tempPath = combineXpath(nodeNameList, tempIndexList); //生成新的xpath
var result = document.evaluate(tempPath, document, null, XPathResult.ANY_TYPE, null);
let result = document.evaluate(tempPath, document, null, XPathResult.ANY_TYPE, null);
result.iterateNext(); //枚举第一个元素
if (result.iterateNext() != null) { //如果能枚举到第二个元素,说明存在同类元素,选中同类元素,结束循环
global.app.$data.nowPath = tempPath; //标记此元素xpath
let element = document.evaluate(tempPath, document, null, XPathResult.ANY_TYPE, null).iterateNext();
console.log("tempPath:", tempPath, "element:", element);
global.app.$data.nowAllPaths = getElementXPaths(element); //标记此元素xpath
pushToReadyList(tempPath);
break;
}
@ -674,7 +735,7 @@ export function pushToReadyList(path) {
//将readyList中的元素放入选中节点中
export function readyToList(step, dealparameters = true) {
for (let o of global.readyList) {
global.nodeList.push({ node: o["node"], "step": global.step, bgColor: o["bgColor"], "boxShadow": o["boxShadow"], xpath: readXPath(o["node"], 1) });
global.nodeList.push({ node: o["node"], "step": global.step, bgColor: o["bgColor"], "boxShadow": o["boxShadow"], xpath: readXPath(o["node"], 1), "allXPaths": getElementXPaths(o["node"]) });
o["node"].style.backgroundColor = global.selectedColor;
}
clearReady();
@ -738,7 +799,10 @@ export function relatedTest() {
}
}
}
var finalPath = combineXpath(testpath, indexList);
let finalPath = combineXpath(testpath, indexList);
let element = document.evaluate(finalPath, document, null, XPathResult.ANY_TYPE, null).iterateNext();
global.app.$data.nowAllPaths = getElementXPaths(element); //标记此元素xpath
console.log("finalPath:", finalPath, "element:", element);
global.app.$data.nowPath = finalPath; //标记此元素xpath
pushToReadyList(finalPath);
let at2 = parseInt(new Date().getTime());

+ 78
- 0
Extension/manifest_v3/src/content-scripts/iframe.vue View File

@ -0,0 +1,78 @@
<template>
<div id="realcontent">
<div v-if="lang == 'zh'">
<div class="toolcannotdrag">提示</div>
<div class="realcontent">
<p style="font-size: 15px">检测到此页面在iframe中如想提取此页面数据请重新设计任务将网页URL改为下面显示的此iframe的URL地址</p>
<textarea style="font-size: 15px;width: 100%">{{url}}</textarea>
</div>
</div>
<div v-else-if="lang=='en'">
<div class="toolcannotdrag">Hint</div>
<div class="realcontent">
<p style="font-size: 15px">Detected that this page is in an iframe. If you want to extract data from this page, please redesign the task and change the URL of the webpage to the URL address of this iframe displayed below:</p>
<textarea style="font-size: 15px;width: 100%">{{url}}</textarea>
</div>
</div>
</div>
</template>
<script>
import {
global,
getOS,
readXPath,
addEl,
clearEl,
clearReady,
handleElement,
clearParameters,
generateParameters,
generateMultiParameters,
handleDescendents,
generateValTable,
findRelated,
pushToReadyList,
readyToList,
combineXpath,
relatedTest
} from "./global.js";
import {
input,
sendSingleClick,
collectSingle,
collectMultiNoPattern,
collectMultiWithPattern,
sendLoopClickSingle,
sendLoopClickEvery,
detectAllSelected
} from "./messageInteraction.js";
import $ from "jquery";
export default {
el: '#realcontent',
data: {
url: window.location.href,
lang: global.lang,
},
}
</script>
<style>
#wrapperToolkitIframe{
position: absolute;
top:0;
}
.toolcannotdrag{
background-color: navy;
width: 100%;
text-align: center;
font-size: 13px;
height: 26px !important;
padding-top: 8px !important;
color: white;
}
</style>

+ 49
- 14
Extension/manifest_v3/src/content-scripts/main.js View File

@ -2,7 +2,15 @@ import $ from "jquery";
import Vue from "vue";
import {global, getOS, readXPath, addEl, clearEl, clearReady, handleElement, clearParameters, generateParameters, generateMultiParameters, handleDescendents, generateValTable, findRelated, pushToReadyList, readyToList, combineXpath, relatedTest} from "./global.js";
import ToolKit from "./toolkit.vue";
import iframe from "./iframe.vue";
function isInIframe() {
try {
return window.self !== window.parent;
} catch (e) {
return true;
}
}
//表现逻辑层的处理
@ -32,10 +40,15 @@ global.tdiv.style.pointerEvents = "none";
var mousemovebind = false; //如果出现元素默认绑定了mousemove事件导致匹配不到元素的时候,开启第二种模式获得元素
var toolkit = document.createElement("div")
// @ts-ignore
var toolkit = document.createElement("div");
toolkit.classList = "tooltips"; //添加样式
toolkit.setAttribute("id", "wrapperToolkit");
// @ts-ignore
if(isInIframe()){
toolkit.setAttribute("id", "wrapperToolkitIframe");
} else {
toolkit.setAttribute("id", "wrapperToolkit");
}
var tooltips = false; //标记鼠标是否在提示框上
@ -114,15 +127,31 @@ document.addEventListener("mousemove", function() {
});
window.addEventListener("beforeunload", function(event) {
event.preventDefault();
let message = {
type: 10,
message: {
id: global.id, //socket id
}
};
global.ws.send(JSON.stringify(message));
// window.addEventListener("beforeunload", function(event) {
// event.preventDefault();
// let message = {
// type: 10,
// message: {
// id: global.id, //socket id
// }
// };
// global.ws.send(JSON.stringify(message));
// // Remove the confirmation message
// event.returnValue = '';
// });
window.addEventListener('DOMContentLoaded', () => {
// Remove any existing beforeunload events
window.onbeforeunload = null;
// Override the beforeunload event with a custom function
window.addEventListener('beforeunload', (event) => {
// Prevent the event's default action
event.preventDefault();
// Remove the confirmation message
event.returnValue = '';
});
});
//点击没反应时候的替代方案
@ -153,12 +182,18 @@ document.body.append(toolkit);
var timer;
//生成Toolkit
function generateToolkit() {
$(".tooltips").html(`
<div id="realcontent"></div>
`);
global.app = new Vue(ToolKit);
if(isInIframe()){
global.app = new Vue(iframe);
} else{
global.app = new Vue(ToolKit);
}
let h = $(".tooldrag").height();
let difference = 26 - h; //获得高度值差
if (difference > 0) {
@ -200,7 +235,7 @@ function generateToolkit() {
});
});
timer = setInterval(function() { //时刻监测相应元素是否存在(防止出现如百度一样元素消失重写body的情况),如果不存在,添加进来
if (document.body != null && document.getElementById("wrapperToolkit") == null) {
if (document.body != null && document.getElementsByClassName("tooltips").length == 0) {
this.clearInterval(); //先取消原来的计时器,再设置新的计时器
document.body.append(global.div); //默认如果toolkit不存在则div和tdiv也不存在
document.body.append(global.tdiv);

+ 7
- 1
Extension/manifest_v3/src/content-scripts/messageInteraction.js View File

@ -1,6 +1,6 @@
//实现与后台和流程图部分的交互
import {global, readXPath} from "./global.js";
import {getElementXPaths, global, readXPath} from "./global.js";
var startMsg = { "type": 0, msg: ""};
@ -21,6 +21,7 @@ export function input(value) {
"history": history.length, //记录history的长度
"tabIndex": -1,
"xpath": readXPath(global.nodeList[0]["node"], 0),
"allXPaths": getElementXPaths(global.nodeList[0]["node"]),
"value": value,
};
let msg = { type: 3, msg: message };
@ -38,6 +39,7 @@ export function sendSingleClick() {
"tabIndex": -1,
"useLoop": false, //是否使用循环内元素
"xpath": readXPath(global.nodeList[0]["node"], 0),
"allXPaths": getElementXPaths(global.nodeList[0]["node"]),
};
let msg = { "type": 3, msg: message };
chrome.runtime.sendMessage(msg);
@ -76,6 +78,7 @@ export function collectMultiWithPattern() {
"tabIndex": -1,
"loopType": 1,
"xpath": "", //默认值设置为空
"allXPaths": "",
"isDescendents": global.app._data.selectedDescendents, //标记是否采集的是子元素
"parameters": global.outputParameters,
};
@ -85,6 +88,7 @@ export function collectMultiWithPattern() {
}
if (message.loopType == 1) {
message["xpath"] = global.app._data.nowPath;
message["allXPaths"] = global.app._data.nowAllPaths;
} else { //固定元素列表
message["pathList"] = [];
for (let i = 0; i < global.nodeList.length; i++) {
@ -103,6 +107,7 @@ export function sendLoopClickSingle(name) {
"tabIndex": -1,
"useLoop": true, //是否使用循环内元素
"xpath": readXPath(global.nodeList[0]["node"], 0),
"allXPaths": getElementXPaths(global.nodeList[0]["node"]),
"loopType": 0, //循环类型,0为单个元素
"nextPage": false, //是否循环点击下一页
};
@ -120,6 +125,7 @@ export function sendLoopClickEvery() {
"history": history.length, //记录history的长度
"tabIndex": -1,
"xpath": "", //默认值设置为空
"allXPaths": "",
"useLoop": true, //是否使用循环内元素
"loopType": 1, //循环类型,1为不固定元素列表
};

+ 52
- 17
Extension/manifest_v3/src/content-scripts/toolkit.vue View File

@ -4,22 +4,27 @@
<div class="tooldrag">操作提示框可点此拖动</div>
<div class="realcontent">
<div v-if="page==0">
<input style="width:15px;height:15px;vertical-align:middle;" type="checkbox"
v-on:mousedown="specialSelect"/>
<p style="margin-bottom:10px;display:inline-block">特殊点选模式</p>
<div v-if="list.nl.length==0" :style="{overflow: 'auto', maxHeight: winHeight * 0.4 + 'px'}">
<input style="width:15px;height:15px;vertical-align:middle;" type="checkbox"
v-on:mousedown="specialSelect"/>
<p style="margin-bottom:10px;display:inline-block">特殊点选模式</p>
<div class="innercontent" v-if = "list.nl.length==0">
<div><a v-on:mousedown="getCurrentTitle">采集当前页面的标题</a><span title=""></span></div>
<div><a v-on:mousedown="getCurrentURL">采集当前页面的URL地址</a><span title=""></span></div>
</div>
<p style="color:black; margin-top: 10px"> 鼠标移动到元素上后<strong>右键</strong>点击或者按<strong>F7</strong>键选中页面元素
</p>
<p style="color:black; margin-top: 10px"> 通过鼠标左键进行点击时页面也会有反应但此点击操作不会被记录在任务流程中
<p style="color:black; margin-top: 10px"> 操作完成后如点击确认采集后任务流程图内没有提取数据操作被添加<strong>重试一次</strong>即可</p>
<p style="color:black; margin-top: 10px"> 通过鼠标左键进行点击时页面也会有反应但左键点击发生的操作不会被记录在任务流程中同理如果想输入文本框但并不想将动作记录可以鼠标移动到文本框并按键盘的<strong>F9</strong>进行输入
</p>
<p style="color:black; margin-top: 10px"> 同理如果想输入文本框但并不想将动作记录如想要在数据模式输入密码仅此一次的操作下次加载页面已经是已登录状态可以鼠标移动到文本框并按键盘的<strong>F9</strong>进行输入</p>
<p style="color:black; margin-top: 10px"> 如果不小心左键点选了元素导致页面跳转直接后退或者切换回标签页即可</p>
{{ initial() }}
</div>
<div v-if="list.nl.length==1">
<div v-if="tname()!='null'">
已选中{{ numOfList() }}{{ tname() }}<span
v-if="numOfReady()>0&&tname()!='下一页元素'">同时发现{{ numOfReady() }}个同类元素</span>您可以:
v-if="numOfReady()>0&&tname()!='下一页元素'">同时发现{{ numOfReady() }}个同类元素如果不全或不准请继续手动选择其余您认为的同类元素</span>您可以:
<div class="innercontent">
<div v-if="numOfReady()>0 && !selectStatus"><a v-on:mousedown="selectAll">选中全部</a> <span
title=""></span></div>
@ -37,13 +42,15 @@
v-on:mousedown="clickElement">点击该{{ tname() }}</a><span title=""></span></div>
<div v-if="tname()!='选择框' && tname()!='文本框'"><a
v-on:mousedown="loopClickSingleElement">循环点击该{{ tname() }}</a><span title=""></span></div>
<div><a v-on:mousedown="getBackgroundPic">采集该{{ tname() }}的背景图片地址</a><span title=""></span></div>
<div v-if="tname()=='链接'||tname()=='元素'"><a v-on:mousedown="getInnerHtml">采集该{{
tname()
}}的Inner
Html</a><span title=""></span></div>
<div><a v-on:mousedown="getOuterHtml">采集该{{ tname() }}的Outer Html</a><span title=""></span></div>
<div><a href="#">鼠标移动到该{{ tname() }}</a><span title=""></span></div>
<div v-if="tname()=='文本框'"><a>识别验证码</a><span title=""></span></div>
<!-- <div><a href="#">鼠标移动到该{{ tname() }}</a><span title=""></span></div>-->
<!-- <div v-if="tname()=='文本框'"><a>识别验证码</a><span title=""></span></div>-->
</div>
<div v-if="selectedDescendents" id="Single">
<div><a v-on:mousedown="confirmCollectSingle">采集数据</a><span title=""></span></div>
@ -67,7 +74,7 @@
<div v-if="option!=100">
已选择了{{ numOfList() }}个同类元素<span
v-if="numOfReady()>0">另外发现{{ numOfReady() }}个同类元素</span>您可以
v-if="numOfReady()>0">另外发现{{ numOfReady() }}个同类元素如果不全或不准请继续手动选择其余您认为的同类元素</span>您可以
<div class="innercontent">
<div v-if="numOfReady()>0"><a v-on:mousedown="selectAll">选中全部</a><span title=""></span></div>
<div v-if="existDescendents()&&(tname()=='元素' || tname()=='链接')"><a
@ -125,21 +132,25 @@
<div class="tooldrag">Operation Toolbox (Can drag)</div>
<div class="realcontent">
<div v-if="page==0">
<input style="width:15px;height:15px;vertical-align:middle;" type="checkbox"
v-on:mousedown="specialSelect"> </input>
<p style="margin-bottom:10px;display:inline-block">Special click mode</p>
<div v-if="list.nl.length==0" :style="{overflow: 'auto', maxHeight: winHeight * 0.4 + 'px'}">
<input style="width:15px;height:15px;vertical-align:middle;" type="checkbox"
v-on:mousedown="specialSelect"> </input>
<p style="margin-bottom:10px;display:inline-block">Special click mode</p>
<div class="innercontent" v-if = "list.nl.length==0">
<div><a v-on:mousedown="getCurrentTitle">Collect Title of current page</a><span title=""></span></div>
<div><a v-on:mousedown="getCurrentURL">Collect URL of current page</a><span title=""></span></div>
</div>
<p style="color:black"> When your mouse moves to the element, please <strong>right-click</strong> your
mouse button or press <strong>F7</strong> on the keyboard to select it.</p>
<p style="color:black; margin-top: 10px"> When clicked with the left mouse button, the page will also respond, but this click operation will not be recorded in the task flow.</p>
<p style="color:black; margin-top: 10px"> Similarly, if you want to input in a text box but do not want the action to be recorded (such as wanting to input a password in data mode, this operation is only performed once, and the next time the page is loaded, it is already logged in), you can move the mouse to the text box and press <strong>F9</strong> on the keyboard to input.</p>
<p style="color:black; margin-top: 10px"> After the operation is completed, such as if no "Collect Data" operation is added in the task flowchart after clicking "Confirm Collect", just <strong> retry </strong> again.</p>
<p style="color:black; margin-top: 10px"> When clicked with the left mouse button, the page will also respond, but this click operation will not be recorded in the task flow. Similarly, if you want to input in a text box but do not want the action to be recorded , you can move the mouse to the text box and press <strong>F9</strong> on the keyboard to input.</p>
<p style="color:black; margin-top: 10px"> If you accidentally left-click on an element and cause the page to jump, simply go back or switch back to the tab.</p>
{{ initial() }}
</div>
<div v-if="list.nl.length==1">
<div v-if="tname()!='null'">
Already selected {{ numOfList() }} {{ tname() | toEng }}, <span
v-if="numOfReady()>0&&tname()!='下一页元素'"> meanwhile we find {{ numOfReady() }} element with the same type, </span>you
v-if="numOfReady()>0&&tname()!='下一页元素'"> meanwhile we find {{ numOfReady() }} element with the same type (If unsatisfied with auto-detected similar elements, you can continue to manually select the rest of the elements that you think are similar), </span>you
can:
<div class="innercontent">
<div v-if="numOfReady()>0 && !selectStatus"><a v-on:mousedown="selectAll">Select All</a><span
@ -162,10 +173,12 @@
v-on:mousedown="loopClickSingleElement">Loop-click this {{ tname() | toEng }}</a><span
title=""></span>
</div>
<div><a v-on:mousedown="getBackgroundPic">Collect background image URL</a><span title=""></span></div>
<div v-if="tname()=='链接'||tname()=='元素'"><a v-on:mousedown="getInnerHtml">Collect Inner Html of
this {{ tname() | toEng }}</a><span title=""></span></div>
<div><a v-on:mousedown="getOuterHtml">Collect Outer Html of this element</a><span title=""></span>
</div>
<!-- <div> <a href="#">鼠标移动到该元素上----{{tname()}}-</a><span title=""></span></div> -->
<!-- <div v-if="tname()=='text box'"> <a>识别验证码</a><span title=""></span></div> -->
</div>
@ -191,7 +204,7 @@
<div v-if="option!=100">
Already selected {{ numOfList() }} similar elements, <span
v-if="numOfReady()>0">and we find other{{ numOfReady() }} similar elements, </span>you can:
v-if="numOfReady()>0">and we find other{{ numOfReady() }} similar elements (If unsatisfied with auto-detected similar elements, you can continue to manually select the rest of the elements that you think are similar), </span>you can:
<div class="innercontent">
<div v-if="numOfReady()>0"><a v-on:mousedown="selectAll">Select All</a><span title=""></span></div>
<div v-if="existDescendents()&&(tname()=='元素' || tname()=='链接')"><a
@ -271,7 +284,7 @@ import {
pushToReadyList,
readyToList,
combineXpath,
relatedTest
relatedTest, getElementXPaths
} from "./global.js";
import {
input,
@ -299,6 +312,7 @@ export default {
text: "", //
tNodeName: "", //
nowPath: "", //xpath
nowAllPaths: [], //xpath
winHeight: window.outerHeight,
},
mounted(){
@ -421,6 +435,21 @@ export default {
$(".tooltips").css("width", width);
return "";
},
getCurrentURL: function () { //URL
addEl(); //
generateParameters(5, true, false);
this.selectStatus = true;
clearReady();
},
getCurrentTitle: function () { //Title
//
// const elements = document.querySelectorAll('*');
// global.nodeList.push(elements[0]); //
addEl(); //
generateParameters(6, true, false);
this.selectStatus = true;
clearReady();
},
getText: function () { //
generateParameters(0, true, false);
this.selectStatus = true;
@ -441,6 +470,11 @@ export default {
this.selectStatus = true;
clearReady();
},
getBackgroundPic: function () { //
generateParameters(4, true, false);
this.selectStatus = true;
clearReady();
},
tname: function () {
let tag = global.nodeList.length == 0 ? "" : global.nodeList[0]["node"].tagName;
let inputType = global.nodeList.length == 0 ? "" : global.nodeList[0]["node"].getAttribute("type");
@ -520,6 +554,7 @@ export default {
global.nodeList[global.nodeList.length - 1]["node"] = tNode;
global.nodeList[global.nodeList.length - 1]["bgColor"] = sty;
global.nodeList[global.nodeList.length - 1]["xpath"] = readXPath(tNode, 1);
global.nodeList[global.nodeList.length - 1]["allXPaths"] = getElementXPaths(tNode);
//
var pos = tNode.getBoundingClientRect();
global.div.style.display = "block";

+ 1
- 1
Extension/manifest_v3/src/manifest.json View File

@ -30,7 +30,7 @@
"css": ["style/toolkit.css"],
"js": ["content-scripts/main.js"],
"run_at": "document_end",
"all_frames": false
"all_frames": true
}
],
"web_accessible_resources": [

+ 17
- 8
Readme.md View File

@ -22,6 +22,22 @@ A visual code-free/no-code web crawler/spider, just select the content you want
Refer to the [Releases Page](https://github.com/NaiboWang/EasySpider/releases) to download the latest version of EasySpider.
## 声明/Declaration
本软件仅供学习交流使用,**严禁使用软件进行任何违法违规的操作,如爬取不允许爬取的政府军事机关网站等**。使用本软件所造成的一切后果由使用者自负,作者不负任何责任。同时,软件受到专利权保护,如要用于商业用途,请联系**浙江大学天道专利事务所**进行付费等操作。
This software is for learning and communication only. **It is strictly forbidden to use the software for any illegal operations, such as crawling government military websites that are not allowed to be crawled.** All consequences caused by the use of this software are at the user's own risk, and the author is not responsible for any consequences. At the same time, the software is protected by patent rights. If you want to use it for commercial purposes, please contact **Zhejiang University Tiandao Patent Office** for payment and other operations.
对于政府和军事机关等网站的爬虫操作,**作者将不会进行任何答疑**,以免触碰国家相关法律法规和政策。
For crawler operations on government and military websites, the author will not answer any questions to avoid touching relevant national laws, regulations and policies.
## 文档/Documentation
请点此进入[教程文档](https://github.com/NaiboWang/EasySpider/wiki),如有英文可暂时翻译一下,或看作者的[硕士毕业论文](Docs/%E9%9D%A2%E5%90%91WEB%E5%BA%94%E7%94%A8%E7%9A%84%E6%99%BA%E8%83%BD%E5%8C%96%E6%9C%8D%E5%8A%A1%E5%B0%81%E8%A3%85%E7%B3%BB%E7%BB%9F%E8%AE%BE%E8%AE%A1%E4%B8%8E%E5%AE%9E%E7%8E%B0.pdf)(主要看第三章和第五章)。
Documentation can be found from [Github Wiki](https://github.com/NaiboWang/EasySpider/wiki).
## 视频教程/Video Tutorials
@ -41,16 +57,9 @@ Bilibili/B站视频教程:
Refer to [Youtube Playlist](https://youtube.com/playlist?list=PL0kEFEkWrT7mt9MUlEBV2DTo1QsaanUTp) to see the video tutorials of EasySpider.
## 文档/Documentation
请点此进入[教程文档](https://github.com/NaiboWang/EasySpider/wiki),如有英文可暂时翻译一下,或看作者的[硕士毕业论文](Docs/%E9%9D%A2%E5%90%91WEB%E5%BA%94%E7%94%A8%E7%9A%84%E6%99%BA%E8%83%BD%E5%8C%96%E6%9C%8D%E5%8A%A1%E5%B0%81%E8%A3%85%E7%B3%BB%E7%BB%9F%E8%AE%BE%E8%AE%A1%E4%B8%8E%E5%AE%9E%E7%8E%B0.pdf)(主要看第三章和第五章)。
Documentation can be found from [Github Wiki](https://github.com/NaiboWang/EasySpider/wiki).
## 出版物/Publications
- This software has been accepted by The Web Conference (WWW) 2023: [EasySpider: A No-Code Visual System for Crawling the Web](https://dl.acm.org/doi/abs/10.1145/3543873.3587345), March 2023.
- This software has been accepted by The Web Conference (WWW) 2023 (中国计算机学会顶级会议 CCF A): [EasySpider: A No-Code Visual System for Crawling the Web](https://dl.acm.org/doi/abs/10.1145/3543873.3587345), March 2023.
- 中国国家知识产权局发明专利,[一种自定义提取流程的服务封装系统](media/patent.png), 2022年5月。

+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/execution_instances/0.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/execution_instances/1.json View File

@ -1 +1 @@
{"id":1,"name":"知乎_登录后采集","url":"https://www.zhihu.com","links":"https://www.zhihu.com","containJudge":false,"desc":"https://www.zhihu.com\n使用带用户配置的浏览器模式来先手工登录后保存信息,再接着执行。","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"历史上有哪些通过“正当手段”干出不正当事的人物?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"scrollType":0,"scrollCount":0,"loopType":2,"pathList":"//*[contains(@class, \"css-0\")]/div[2]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[3]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[4]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[5]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[6]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[7]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[8]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[9]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[10]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[11]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[12]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[13]/div[1]/div[1]/div[1]/h2[1]/div[1]","textList":"","exitCount":0,"historyWait":2}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","relativeXpath":"","exampleValues":[{"num":0,"value":"历史上有哪些通过“正当手段”干出不正当事的人物?"},{"num":1,"value":"新加坡有哪些不好的地方?"},{"num":2,"value":"孙悟空可以秒杀山村老尸那样的厉鬼吗?"},{"num":3,"value":"为什么渐渐厌倦玩《原神》了?"},{"num":4,"value":"历史上有哪些著名的考古乌龙事件?"},{"num":5,"value":"苹果公司为什么能把用户调教得这么好?"},{"num":6,"value":"哪个瞬间让你发现了世界的bug?"},{"num":7,"value":"假如中国的院士,想为亲属谋体制内的工作,难度大吗?为什么?"},{"num":8,"value":"你一直珍藏的视频是哪个?"},{"num":9,"value":"如何评价《原神》角色艾莉丝?"},{"num":10,"value":"索罗斯如何做空的英镑、泰铢?为什么做空香港失败了?"},{"num":11,"value":"如何在婚前认清并杜绝王力宏这种男人?"}],"default":""}],"loopType":2}}]}
{"id":1,"name":"知乎_登录后采集","url":"https://www.zhihu.com","links":"https://www.zhihu.com","containJudge":false,"desc":"https://www.zhihu.com\n使用带用户配置的浏览器模式来先手工登录后保存信息,再接着执行。","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"历史上有哪些通过“正当手段”干出不正当事的人物?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"scrollType":0,"scrollCount":0,"loopType":2,"pathList":"//*[contains(@class, \"css-0\")]/div[2]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[3]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[4]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[5]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[6]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[7]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[8]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[9]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[10]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[11]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[12]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[13]/div[1]/div[1]/div[1]/h2[1]/div[1]","textList":"","exitCount":0,"historyWait":2}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","relativeXPath":"","exampleValues":[{"num":0,"value":"历史上有哪些通过“正当手段”干出不正当事的人物?"},{"num":1,"value":"新加坡有哪些不好的地方?"},{"num":2,"value":"孙悟空可以秒杀山村老尸那样的厉鬼吗?"},{"num":3,"value":"为什么渐渐厌倦玩《原神》了?"},{"num":4,"value":"历史上有哪些著名的考古乌龙事件?"},{"num":5,"value":"苹果公司为什么能把用户调教得这么好?"},{"num":6,"value":"哪个瞬间让你发现了世界的bug?"},{"num":7,"value":"假如中国的院士,想为亲属谋体制内的工作,难度大吗?为什么?"},{"num":8,"value":"你一直珍藏的视频是哪个?"},{"num":9,"value":"如何评价《原神》角色艾莉丝?"},{"num":10,"value":"索罗斯如何做空的英镑、泰铢?为什么做空香港失败了?"},{"num":11,"value":"如何在婚前认清并杜绝王力宏这种男人?"}],"default":""}],"loopType":2}}]}

+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/execution_instances/2.json View File

@ -1 +1 @@
{"id":2,"name":"知乎_登录后采集","url":"https://www.zhihu.com","links":"https://www.zhihu.com","containJudge":false,"desc":"https://www.zhihu.com\n使用带用户配置的浏览器模式来先手工登录后保存信息,再接着执行。","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"历史上有哪些通过“正当手段”干出不正当事的人物?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"scrollType":0,"scrollCount":0,"loopType":2,"pathList":"//*[contains(@class, \"css-0\")]/div[2]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[3]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[4]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[5]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[6]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[7]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[8]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[9]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[10]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[11]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[12]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[13]/div[1]/div[1]/div[1]/h2[1]/div[1]","textList":"","exitCount":0,"historyWait":2}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","relativeXpath":"","exampleValues":[{"num":0,"value":"历史上有哪些通过“正当手段”干出不正当事的人物?"},{"num":1,"value":"新加坡有哪些不好的地方?"},{"num":2,"value":"孙悟空可以秒杀山村老尸那样的厉鬼吗?"},{"num":3,"value":"为什么渐渐厌倦玩《原神》了?"},{"num":4,"value":"历史上有哪些著名的考古乌龙事件?"},{"num":5,"value":"苹果公司为什么能把用户调教得这么好?"},{"num":6,"value":"哪个瞬间让你发现了世界的bug?"},{"num":7,"value":"假如中国的院士,想为亲属谋体制内的工作,难度大吗?为什么?"},{"num":8,"value":"你一直珍藏的视频是哪个?"},{"num":9,"value":"如何评价《原神》角色艾莉丝?"},{"num":10,"value":"索罗斯如何做空的英镑、泰铢?为什么做空香港失败了?"},{"num":11,"value":"如何在婚前认清并杜绝王力宏这种男人?"}],"default":""}],"loopType":2}}]}
{"id":2,"name":"知乎_登录后采集","url":"https://www.zhihu.com","links":"https://www.zhihu.com","containJudge":false,"desc":"https://www.zhihu.com\n使用带用户配置的浏览器模式来先手工登录后保存信息,再接着执行。","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"历史上有哪些通过“正当手段”干出不正当事的人物?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"scrollType":0,"scrollCount":0,"loopType":2,"pathList":"//*[contains(@class, \"css-0\")]/div[2]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[3]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[4]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[5]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[6]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[7]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[8]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[9]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[10]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[11]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[12]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[13]/div[1]/div[1]/div[1]/h2[1]/div[1]","textList":"","exitCount":0,"historyWait":2}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","relativeXPath":"","exampleValues":[{"num":0,"value":"历史上有哪些通过“正当手段”干出不正当事的人物?"},{"num":1,"value":"新加坡有哪些不好的地方?"},{"num":2,"value":"孙悟空可以秒杀山村老尸那样的厉鬼吗?"},{"num":3,"value":"为什么渐渐厌倦玩《原神》了?"},{"num":4,"value":"历史上有哪些著名的考古乌龙事件?"},{"num":5,"value":"苹果公司为什么能把用户调教得这么好?"},{"num":6,"value":"哪个瞬间让你发现了世界的bug?"},{"num":7,"value":"假如中国的院士,想为亲属谋体制内的工作,难度大吗?为什么?"},{"num":8,"value":"你一直珍藏的视频是哪个?"},{"num":9,"value":"如何评价《原神》角色艾莉丝?"},{"num":10,"value":"索罗斯如何做空的英镑、泰铢?为什么做空香港失败了?"},{"num":11,"value":"如何在婚前认清并杜绝王力宏这种男人?"}],"default":""}],"loopType":2}}]}

+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/execution_instances/3.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/execution_instances/4.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/11.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/15.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/16.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/17.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/19.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/2.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/20.json View File

@ -1 +1 @@
{"id": 20, "name": "Bilibili\u7c89\u4e1d", "url": "https://space.bilibili.com/291929894/fans/fans", "links": "https://space.bilibili.com/291929894/fans/fans", "containJudge": false, "desc": "https://space.bilibili.com/291929894/fans/fans", "inputParameters": [{"id": 0, "name": "urlList_0", "nodeId": 1, "nodeName": "Open Page", "value": "https://space.bilibili.com/291929894/fans/fans", "desc": "List of URLs to be collected, separated by \\n for multiple lines", "type": "string", "exampleValue": "https://space.bilibili.com/291929894/fans/fans"}, {"id": 1, "name": "loopTimes_Loop_1", "nodeId": 2, "nodeName": "Loop", "desc": "Number of loop executions, 0 means unlimited loops (until element not found)", "type": "int", "exampleValue": 0, "value": 0}], "outputParameters": [{"id": 0, "name": "\u53c2\u65701_\u6587\u672c", "desc": "", "type": "string", "exampleValue": "\u5bf9\u65b9\u7b54\u590d5"}], "graph": [{"index": 0, "id": 0, "parentId": 0, "type": -1, "option": 0, "title": "root", "sequence": [1, 2], "parameters": {"history": 1, "tabIndex": 0, "useLoop": false, "xpath": "", "wait": 0}, "isInLoop": false}, {"id": 1, "index": 1, "parentId": 0, "type": 0, "option": 1, "title": "Open Page", "sequence": [], "isInLoop": false, "position": 0, "parameters": {"useLoop": false, "xpath": "", "wait": 0, "url": "https://space.bilibili.com/291929894/fans/fans", "links": "https://space.bilibili.com/291929894/fans/fans", "scrollType": 0, "scrollCount": 0}}, {"id": 2, "index": 2, "parentId": 0, "type": 1, "option": 8, "title": "Loop", "sequence": [4], "isInLoop": false, "position": 1, "parameters": {"history": 4, "tabIndex": -1, "useLoop": false, "xpath": "//a[contains(text(),\"\u4e0b\u4e00\u9875\")]", "wait": 0, "scrollType": 0, "scrollCount": 0, "loopType": 0, "pathList": "", "textList": "", "exitCount": 0, "historyWait": 2}}, {"id": -1, "index": 3, "parentId": 2, "type": 0, "option": 2, "title": "Click Element", "sequence": [], "isInLoop": true, "position": 1, "parameters": {"history": 4, "tabIndex": -1, "useLoop": true, "xpath": "//*[@id=\"page-follows\"]/div[1]/div[2]/div[2]/div[2]/ul[2]/li[7]", "wait": 1, "scrollType": 0, "scrollCount": 0, "paras": [], "loopType": 0}}, {"id": 3, "index": 4, "parentId": 2, "type": 1, "option": 8, "title": "Loop", "sequence": [5], "isInLoop": true, "position": 0, "parameters": {"history": 4, "tabIndex": -1, "useLoop": false, "xpath": "/html/body/div[2]/div[4]/div[1]/div[1]/div[1]/div[2]/div[2]/div[2]/ul[1]/li/div[2]/a[1]/span[1]", "wait": 0, "scrollType": 0, "scrollCount": 0, "loopType": 1, "pathList": "", "textList": "", "exitCount": 0, "historyWait": 2}}, {"id": 4, "index": 5, "parentId": 3, "type": 0, "option": 3, "title": "Extract Data", "sequence": [], "isInLoop": true, "position": 0, "parameters": {"history": 4, "tabIndex": -1, "useLoop": false, "xpath": "", "wait": 0, "paras": [{"nodeType": 0, "contentType": 0, "relative": true, "name": "\u53c2\u65701_\u6587\u672c", "desc": "", "relativeXpath": "", "exampleValues": [{"num": 0, "value": "\u5bf9\u65b9\u7b54\u590d5"}], "default": ""}], "loopType": 1}}]}
{"id": 20, "name": "Bilibili\u7c89\u4e1d", "url": "https://space.bilibili.com/291929894/fans/fans", "links": "https://space.bilibili.com/291929894/fans/fans", "containJudge": false, "desc": "https://space.bilibili.com/291929894/fans/fans", "inputParameters": [{"id": 0, "name": "urlList_0", "nodeId": 1, "nodeName": "Open Page", "value": "https://space.bilibili.com/291929894/fans/fans", "desc": "List of URLs to be collected, separated by \\n for multiple lines", "type": "string", "exampleValue": "https://space.bilibili.com/291929894/fans/fans"}, {"id": 1, "name": "loopTimes_Loop_1", "nodeId": 2, "nodeName": "Loop", "desc": "Number of loop executions, 0 means unlimited loops (until element not found)", "type": "int", "exampleValue": 0, "value": 0}], "outputParameters": [{"id": 0, "name": "\u53c2\u65701_\u6587\u672c", "desc": "", "type": "string", "exampleValue": "\u5bf9\u65b9\u7b54\u590d5"}], "graph": [{"index": 0, "id": 0, "parentId": 0, "type": -1, "option": 0, "title": "root", "sequence": [1, 2], "parameters": {"history": 1, "tabIndex": 0, "useLoop": false, "xpath": "", "wait": 0}, "isInLoop": false}, {"id": 1, "index": 1, "parentId": 0, "type": 0, "option": 1, "title": "Open Page", "sequence": [], "isInLoop": false, "position": 0, "parameters": {"useLoop": false, "xpath": "", "wait": 0, "url": "https://space.bilibili.com/291929894/fans/fans", "links": "https://space.bilibili.com/291929894/fans/fans", "scrollType": 0, "scrollCount": 0}}, {"id": 2, "index": 2, "parentId": 0, "type": 1, "option": 8, "title": "Loop", "sequence": [4], "isInLoop": false, "position": 1, "parameters": {"history": 4, "tabIndex": -1, "useLoop": false, "xpath": "//a[contains(text(),\"\u4e0b\u4e00\u9875\")]", "wait": 0, "scrollType": 0, "scrollCount": 0, "loopType": 0, "pathList": "", "textList": "", "exitCount": 0, "historyWait": 2}}, {"id": -1, "index": 3, "parentId": 2, "type": 0, "option": 2, "title": "Click Element", "sequence": [], "isInLoop": true, "position": 1, "parameters": {"history": 4, "tabIndex": -1, "useLoop": true, "xpath": "//*[@id=\"page-follows\"]/div[1]/div[2]/div[2]/div[2]/ul[2]/li[7]", "wait": 1, "scrollType": 0, "scrollCount": 0, "paras": [], "loopType": 0}}, {"id": 3, "index": 4, "parentId": 2, "type": 1, "option": 8, "title": "Loop", "sequence": [5], "isInLoop": true, "position": 0, "parameters": {"history": 4, "tabIndex": -1, "useLoop": false, "xpath": "/html/body/div[2]/div[4]/div[1]/div[1]/div[1]/div[2]/div[2]/div[2]/ul[1]/li/div[2]/a[1]/span[1]", "wait": 0, "scrollType": 0, "scrollCount": 0, "loopType": 1, "pathList": "", "textList": "", "exitCount": 0, "historyWait": 2}}, {"id": 4, "index": 5, "parentId": 3, "type": 0, "option": 3, "title": "Extract Data", "sequence": [], "isInLoop": true, "position": 0, "parameters": {"history": 4, "tabIndex": -1, "useLoop": false, "xpath": "", "wait": 0, "paras": [{"nodeType": 0, "contentType": 0, "relative": true, "name": "\u53c2\u65701_\u6587\u672c", "desc": "", "relativeXPath": "", "exampleValues": [{"num": 0, "value": "\u5bf9\u65b9\u7b54\u590d5"}], "default": ""}], "loopType": 1}}]}

+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/23.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/25.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/27.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/28.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/29.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/30.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/31.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/32.json View File

@ -1 +1 @@
{"id":32,"name":"知乎_登录后采集","url":"https://www.zhihu.com","links":"https://www.zhihu.com","containJudge":false,"desc":"https://www.zhihu.com\n使用带用户配置的浏览器模式来先手工登录后保存信息,再接着执行。","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"历史上有哪些通过“正当手段”干出不正当事的人物?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"scrollType":0,"scrollCount":0,"loopType":2,"pathList":"//*[contains(@class, \"css-0\")]/div[2]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[3]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[4]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[5]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[6]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[7]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[8]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[9]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[10]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[11]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[12]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[13]/div[1]/div[1]/div[1]/h2[1]/div[1]","textList":"","exitCount":0,"historyWait":2}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","relativeXpath":"","exampleValues":[{"num":0,"value":"历史上有哪些通过“正当手段”干出不正当事的人物?"},{"num":1,"value":"新加坡有哪些不好的地方?"},{"num":2,"value":"孙悟空可以秒杀山村老尸那样的厉鬼吗?"},{"num":3,"value":"为什么渐渐厌倦玩《原神》了?"},{"num":4,"value":"历史上有哪些著名的考古乌龙事件?"},{"num":5,"value":"苹果公司为什么能把用户调教得这么好?"},{"num":6,"value":"哪个瞬间让你发现了世界的bug?"},{"num":7,"value":"假如中国的院士,想为亲属谋体制内的工作,难度大吗?为什么?"},{"num":8,"value":"你一直珍藏的视频是哪个?"},{"num":9,"value":"如何评价《原神》角色艾莉丝?"},{"num":10,"value":"索罗斯如何做空的英镑、泰铢?为什么做空香港失败了?"},{"num":11,"value":"如何在婚前认清并杜绝王力宏这种男人?"}],"default":""}],"loopType":2}}]}
{"id":32,"name":"知乎_登录后采集","url":"https://www.zhihu.com","links":"https://www.zhihu.com","containJudge":false,"desc":"https://www.zhihu.com\n使用带用户配置的浏览器模式来先手工登录后保存信息,再接着执行。","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"历史上有哪些通过“正当手段”干出不正当事的人物?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"scrollType":0,"scrollCount":0,"loopType":2,"pathList":"//*[contains(@class, \"css-0\")]/div[2]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[3]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[4]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[5]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[6]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[7]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[8]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[9]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[10]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[11]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[12]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[13]/div[1]/div[1]/div[1]/h2[1]/div[1]","textList":"","exitCount":0,"historyWait":2}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","relativeXPath":"","exampleValues":[{"num":0,"value":"历史上有哪些通过“正当手段”干出不正当事的人物?"},{"num":1,"value":"新加坡有哪些不好的地方?"},{"num":2,"value":"孙悟空可以秒杀山村老尸那样的厉鬼吗?"},{"num":3,"value":"为什么渐渐厌倦玩《原神》了?"},{"num":4,"value":"历史上有哪些著名的考古乌龙事件?"},{"num":5,"value":"苹果公司为什么能把用户调教得这么好?"},{"num":6,"value":"哪个瞬间让你发现了世界的bug?"},{"num":7,"value":"假如中国的院士,想为亲属谋体制内的工作,难度大吗?为什么?"},{"num":8,"value":"你一直珍藏的视频是哪个?"},{"num":9,"value":"如何评价《原神》角色艾莉丝?"},{"num":10,"value":"索罗斯如何做空的英镑、泰铢?为什么做空香港失败了?"},{"num":11,"value":"如何在婚前认清并杜绝王力宏这种男人?"}],"default":""}],"loopType":2}}]}

+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/4.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/5.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/6.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_linux_amd64_Ubuntu/tasks/7.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_windows_386/execution_instances/0.json View File

@ -1 +1 @@
{"id":0,"name":"知乎_登录后采集","url":"https://www.zhihu.com","links":"https://www.zhihu.com","containJudge":false,"desc":"https://www.zhihu.com\n使用带用户配置的浏览器模式来先手工登录后保存信息,再接着执行。","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"历史上有哪些通过“正当手段”干出不正当事的人物?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"scrollType":0,"scrollCount":0,"loopType":2,"pathList":"//*[contains(@class, \"css-0\")]/div[2]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[3]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[4]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[5]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[6]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[7]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[8]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[9]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[10]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[11]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[12]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[13]/div[1]/div[1]/div[1]/h2[1]/div[1]","textList":"","exitCount":0,"historyWait":2}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","relativeXpath":"","exampleValues":[{"num":0,"value":"历史上有哪些通过“正当手段”干出不正当事的人物?"},{"num":1,"value":"新加坡有哪些不好的地方?"},{"num":2,"value":"孙悟空可以秒杀山村老尸那样的厉鬼吗?"},{"num":3,"value":"为什么渐渐厌倦玩《原神》了?"},{"num":4,"value":"历史上有哪些著名的考古乌龙事件?"},{"num":5,"value":"苹果公司为什么能把用户调教得这么好?"},{"num":6,"value":"哪个瞬间让你发现了世界的bug?"},{"num":7,"value":"假如中国的院士,想为亲属谋体制内的工作,难度大吗?为什么?"},{"num":8,"value":"你一直珍藏的视频是哪个?"},{"num":9,"value":"如何评价《原神》角色艾莉丝?"},{"num":10,"value":"索罗斯如何做空的英镑、泰铢?为什么做空香港失败了?"},{"num":11,"value":"如何在婚前认清并杜绝王力宏这种男人?"}],"default":""}],"loopType":2}}]}
{"id":0,"name":"知乎_登录后采集","url":"https://www.zhihu.com","links":"https://www.zhihu.com","containJudge":false,"desc":"https://www.zhihu.com\n使用带用户配置的浏览器模式来先手工登录后保存信息,再接着执行。","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"历史上有哪些通过“正当手段”干出不正当事的人物?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"scrollType":0,"scrollCount":0,"loopType":2,"pathList":"//*[contains(@class, \"css-0\")]/div[2]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[3]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[4]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[5]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[6]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[7]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[8]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[9]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[10]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[11]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[12]/div[1]/div[1]/div[1]/h2[1]/div[1]\n//*[contains(@class, \"css-0\")]/div[13]/div[1]/div[1]/div[1]/h2[1]/div[1]","textList":"","exitCount":0,"historyWait":2}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","relativeXPath":"","exampleValues":[{"num":0,"value":"历史上有哪些通过“正当手段”干出不正当事的人物?"},{"num":1,"value":"新加坡有哪些不好的地方?"},{"num":2,"value":"孙悟空可以秒杀山村老尸那样的厉鬼吗?"},{"num":3,"value":"为什么渐渐厌倦玩《原神》了?"},{"num":4,"value":"历史上有哪些著名的考古乌龙事件?"},{"num":5,"value":"苹果公司为什么能把用户调教得这么好?"},{"num":6,"value":"哪个瞬间让你发现了世界的bug?"},{"num":7,"value":"假如中国的院士,想为亲属谋体制内的工作,难度大吗?为什么?"},{"num":8,"value":"你一直珍藏的视频是哪个?"},{"num":9,"value":"如何评价《原神》角色艾莉丝?"},{"num":10,"value":"索罗斯如何做空的英镑、泰铢?为什么做空香港失败了?"},{"num":11,"value":"如何在婚前认清并杜绝王力宏这种男人?"}],"default":""}],"loopType":2}}]}

+ 1
- 1
Releases/EasySpider_windows_386/tasks/11.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_windows_386/tasks/15.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_windows_386/tasks/16.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_windows_386/tasks/17.json
File diff suppressed because it is too large
View File


+ 1
- 1
Releases/EasySpider_windows_386/tasks/19.json
File diff suppressed because it is too large
View File


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save