Browse Source

Break inside every loop

naibo 2 years ago
parent
commit
4b276d47c1

+ 1 - 1
ElectronJS/README.md

@@ -25,7 +25,7 @@ chromedriver_mac64 # for mac x64
 For example, if you want to build this software on Windows x64 platform, then you should first download a chrome for windows x64, then copy the whole `chrome` folder to this `ElectronJS` folder and rename the folder to `chrome_win64`, assume the chrome version you downloaded is 110; then, download a `chromedriver.exe` with version 110 for windows x64, and put it into the `chrome_win64` folder, then rename it to `chromedriver_win64.exe`.
 For example, if you want to build this software on Windows x64 platform, then you should first download a chrome for windows x64, then copy the whole `chrome` folder to this `ElectronJS` folder and rename the folder to `chrome_win64`, assume the chrome version you downloaded is 110; then, download a `chromedriver.exe` with version 110 for windows x64, and put it into the `chrome_win64` folder, then rename it to `chromedriver_win64.exe`.
 
 
 
 
-Finally, copy the `stealth.min.js` and `execute.bat` (for windows) file in this folder to these `chrome` folders.
+Finally, copy the `stealth.min.js` and `execute.bat` (for windows x64) file in this folder to these `chrome` folders.
 
 
 ## Run Instruction
 ## Run Instruction
 
 

+ 18 - 1
ElectronJS/src/taskGrid/FlowChart_CN.html

@@ -357,6 +357,8 @@
                     <!-- 这里添加退出循环条件,找不到元素肯定退出循环 -->
                     <!-- 这里添加退出循环条件,找不到元素肯定退出循环 -->
                     <label v-if='parseInt(loopType) == 0'>最多执行循环次数(0代表无限循环直到找不到元素为止):</label>
                     <label v-if='parseInt(loopType) == 0'>最多执行循环次数(0代表无限循环直到找不到元素为止):</label>
                     <input onkeydown="inputDelete(event)" required v-if='parseInt(loopType) == 0' class="form-control" type="number" v-model.number='nowNode["parameters"]["exitCount"]'></input>
                     <input onkeydown="inputDelete(event)" required v-if='parseInt(loopType) == 0' class="form-control" type="number" v-model.number='nowNode["parameters"]["exitCount"]'></input>
+
+
                     <label><b>历史记录回退后</b>等待秒数:</label>
                     <label><b>历史记录回退后</b>等待秒数:</label>
                     <input onkeydown="inputDelete(event)" required type="number" class="form-control" v-model.number='list.nl[index.nowNodeIndex]["parameters"]["historyWait"]'></input>
                     <input onkeydown="inputDelete(event)" required type="number" class="form-control" v-model.number='list.nl[index.nowNodeIndex]["parameters"]["historyWait"]'></input>
                     <label>执行完是否向下滚动:</label>
                     <label>执行完是否向下滚动:</label>
@@ -367,7 +369,22 @@
                     </select>
                     </select>
                     <label>滚动次数:</label>
                     <label>滚动次数:</label>
                     <input onkeydown="inputDelete(event)" class="form-control" v-model.number="nowNode['parameters']['scrollCount']" type="number" required></input>
                     <input onkeydown="inputDelete(event)" class="form-control" v-model.number="nowNode['parameters']['scrollCount']" type="number" required></input>
-
+                    <div id="breakAdvanced" v-if='nowNode["parameters"]["loopType"] < 5'>
+                        <div>
+                            <p><label>(高级操作)使用代码/脚本定义循环退出条件: </label></p>
+                            <select v-model='nowNode["parameters"]["breakMode"]' class="form-control" style="font-weight: bold">
+                                <option value = 0>不设置脚本(选择这个下面写了脚本也不会执行)</option>
+                                <option value = 1>JavaScript脚本</option>
+                                <option value = 2>操作系统级别命令</option>
+                            </select>
+                            <div>
+                                <textarea style="margin-top: 10px" onkeydown="inputDelete(event)" class="form-control" rows="2"
+                                          placeholder='命令返回值小于等于0或为假时则直接退出循环,不管其他条件如何。如:return document.body.scrollWidth > 1000 或 python D:/test.py,分别为JS命令和系统命令返回值示例。' v-model='nowNode["parameters"]["breakCode"]'></textarea>
+                                <label>最长等待脚本执行时间(0代表无限等待): </label>
+                                <input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["breakCodeWaitTime"]'></input>
+                            </div>
+                        </div>
+                    </div>
                 </div>
                 </div>
 
 
                 <div class="elements" v-if="nodeType==9">
                 <div class="elements" v-if="nodeType==9">

+ 1 - 2
ElectronJS/src/taskGrid/FlowChart_CN.js

@@ -552,8 +552,7 @@ document.onkeydown = function(e) {
             deleteElement();
             deleteElement();
         }
         }
     } else { //ctrl+s保存服务
     } else { //ctrl+s保存服务
-        let currKey = 0,
-            e = e || event || window.event;
+        let currKey = 0;
         currKey = e.keyCode || e.which || e.charCode;
         currKey = e.keyCode || e.which || e.charCode;
         if (currKey == 83 && (e.ctrlKey || e.metaKey)) {
         if (currKey == 83 && (e.ctrlKey || e.metaKey)) {
             $('#save').click();
             $('#save').click();

+ 14 - 0
ElectronJS/src/taskGrid/logic_CN.js

@@ -47,6 +47,16 @@ function changeGetDataParameters(msg, i) {
     msg["parameters"][i]["downloadPic"] = 0; //是否下载图片
     msg["parameters"][i]["downloadPic"] = 0; //是否下载图片
 }
 }
 
 
+
+function extractTitle(html) {
+    var match = html.match(/<title[^>]*>([^<]+)<\/title>/i);
+    if (match && match[1]) {
+        return "采集" + match[1];
+    } else {
+        return "采集新Web页面";
+    }
+}
+
 function handleAddElement(msg) {
 function handleAddElement(msg) {
     if (msg["type"] == "openPage") {
     if (msg["type"] == "openPage") {
         addElement(1, msg);
         addElement(1, msg);
@@ -166,6 +176,9 @@ function addParameters(t) {
         t["parameters"]["waitTime"] = 0; //最长等待时间
         t["parameters"]["waitTime"] = 0; //最长等待时间
         t["parameters"]["exitCount"] = 0; //执行多少次后退出循环,0代表不设置此条件
         t["parameters"]["exitCount"] = 0; //执行多少次后退出循环,0代表不设置此条件
         t["parameters"]["historyWait"] = 2; //历史记录回退时间,用于循环点击每个链接的情况下点击链接后不打开新标签页的情况
         t["parameters"]["historyWait"] = 2; //历史记录回退时间,用于循环点击每个链接的情况下点击链接后不打开新标签页的情况
+        t["parameters"]["breakMode"] = 0; //break类型,0代表JS,2代表系统命令
+        t["parameters"]["breakCode"] = ""; //break条件
+        t["parameters"]["breakCodeWaitTime"] = 0; //break条件等待时间
     } else if (t.option == 9) { //条件
     } else if (t.option == 9) { //条件
 
 
     } else if (t.option == 10) { //条件分支
     } else if (t.option == 10) { //条件分支
@@ -365,6 +378,7 @@ function saveService(type) {
             "url": url,
             "url": url,
             "links": links,
             "links": links,
             "create_time": new Date().toLocaleString(),
             "create_time": new Date().toLocaleString(),
+            "version": "0.3.0",
             "containJudge": containJudge,
             "containJudge": containJudge,
             "desc": serviceDescription,
             "desc": serviceDescription,
             "inputParameters": inputParameters,
             "inputParameters": inputParameters,

+ 1 - 1
ElectronJS/src/taskGrid/taskList.html

@@ -21,7 +21,7 @@
                 <table style="table-layout: fixed;" class="table table-hover">
                 <table style="table-layout: fixed;" class="table table-hover">
                     <thead>
                     <thead>
                         <tr>
                         <tr>
-                            <th>ID</th>
+                            <th>No.</th>
                             <th>{{"Task Name~任务名称" | lang}}</th>
                             <th>{{"Task Name~任务名称" | lang}}</th>
                             <th>URL</th>
                             <th>URL</th>
                             <th v-bind:colspan="type">{{"Operations~操作" | lang}}</th>
                             <th v-bind:colspan="type">{{"Operations~操作" | lang}}</th>

+ 215 - 0
ElectronJS/tasks/60.json

@@ -0,0 +1,215 @@
+{
+    "id": 60,
+    "name": "新web采集任务",
+    "url": "https://www.jd.com",
+    "links": "https://www.jd.com",
+    "create_time": "5/21/2023, 4:26:32 PM",
+    "containJudge": false,
+    "desc": "https://www.jd.com",
+    "inputParameters": [
+        {
+            "id": 0,
+            "name": "urlList_0",
+            "nodeId": 1,
+            "nodeName": "打开网页",
+            "value": "https://www.jd.com",
+            "desc": "要采集的网址列表,多行以\\n分开",
+            "type": "string",
+            "exampleValue": "https://www.jd.com"
+        }
+    ],
+    "outputParameters": [
+        {
+            "id": 0,
+            "name": "参数1_文本",
+            "desc": "",
+            "type": "string",
+            "exampleValue": "/手机/数码"
+        }
+    ],
+    "graph": [
+        {
+            "index": 0,
+            "id": 0,
+            "parentId": 0,
+            "type": -1,
+            "option": 0,
+            "title": "root",
+            "sequence": [
+                1,
+                2
+            ],
+            "parameters": {
+                "history": 1,
+                "tabIndex": 0,
+                "useLoop": false,
+                "xpath": "",
+                "wait": 0
+            },
+            "isInLoop": false
+        },
+        {
+            "id": 1,
+            "index": 1,
+            "parentId": 0,
+            "type": 0,
+            "option": 1,
+            "title": "打开网页",
+            "sequence": [],
+            "isInLoop": false,
+            "position": 0,
+            "parameters": {
+                "useLoop": false,
+                "xpath": "",
+                "wait": 0,
+                "beforeJS": "",
+                "beforeJSWaitTime": 0,
+                "afterJS": "",
+                "afterJSWaitTime": 0,
+                "url": "https://www.jd.com",
+                "links": "https://www.jd.com",
+                "maxWaitTime": 10,
+                "scrollType": 0,
+                "scrollCount": 0
+            }
+        },
+        {
+            "id": 2,
+            "index": 2,
+            "parentId": 0,
+            "type": 1,
+            "option": 8,
+            "title": "循环",
+            "sequence": [
+                3
+            ],
+            "isInLoop": false,
+            "position": 1,
+            "parameters": {
+                "history": 4,
+                "tabIndex": -1,
+                "useLoop": false,
+                "xpath": "/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div",
+                "wait": 0,
+                "beforeJS": "",
+                "beforeJSWaitTime": 0,
+                "afterJS": "",
+                "afterJSWaitTime": 0,
+                "scrollType": 0,
+                "scrollCount": 0,
+                "loopType": "1",
+                "pathList": "",
+                "textList": "",
+                "code": "",
+                "waitTime": 0,
+                "exitCount": 0,
+                "historyWait": 2,
+                "breakMode": "1",
+                "breakCode": "return window.innerHeight > 500",
+                "breakCodeWaitTime": 0,
+                "allXPaths": [
+                    "/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]",
+                    "//div[contains(., '/手机/数码')]",
+                    "//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']"
+                ]
+            }
+        },
+        {
+            "id": 3,
+            "index": 3,
+            "parentId": 2,
+            "type": 0,
+            "option": 3,
+            "title": "提取数据",
+            "sequence": [],
+            "isInLoop": true,
+            "position": 0,
+            "parameters": {
+                "history": 4,
+                "tabIndex": -1,
+                "useLoop": false,
+                "xpath": "",
+                "wait": 0,
+                "beforeJS": "",
+                "beforeJSWaitTime": 0,
+                "afterJS": "",
+                "afterJSWaitTime": 0,
+                "paras": [
+                    {
+                        "nodeType": 0,
+                        "contentType": 0,
+                        "relative": true,
+                        "name": "参数1_文本",
+                        "desc": "",
+                        "extractType": 0,
+                        "relativeXPath": "",
+                        "allXPaths": "",
+                        "exampleValues": [
+                            {
+                                "num": 0,
+                                "value": "/手机/数码"
+                            },
+                            {
+                                "num": 1,
+                                "value": "/家用电器"
+                            },
+                            {
+                                "num": 2,
+                                "value": "/电脑/办公"
+                            },
+                            {
+                                "num": 3,
+                                "value": "/家纺/家居/厨具"
+                            },
+                            {
+                                "num": 4,
+                                "value": "/家具/家装/灯具/工业品"
+                            },
+                            {
+                                "num": 5,
+                                "value": "/内衣/男装/女装/童装"
+                            },
+                            {
+                                "num": 6,
+                                "value": "/箱包/钟表/珠宝/女鞋"
+                            },
+                            {
+                                "num": 7,
+                                "value": "/运动/户外/男鞋"
+                            },
+                            {
+                                "num": 8,
+                                "value": "/汽车用品/车载电器"
+                            },
+                            {
+                                "num": 9,
+                                "value": "/母婴/洗护喂养"
+                            },
+                            {
+                                "num": 10,
+                                "value": "/玩具乐器/宠物生活"
+                            },
+                            {
+                                "num": 11,
+                                "value": "/家庭清洁/个人护理/计生情趣"
+                            },
+                            {
+                                "num": 12,
+                                "value": "/图书/童书/文学"
+                            }
+                        ],
+                        "default": "",
+                        "beforeJS": "",
+                        "beforeJSWaitTime": 0,
+                        "JS": "",
+                        "JSWaitTime": 0,
+                        "afterJS": "",
+                        "afterJSWaitTime": 0,
+                        "downloadPic": 0
+                    }
+                ],
+                "loopType": 1
+            }
+        }
+    ]
+}

File diff suppressed because it is too large
+ 0 - 0
ElectronJS/tasks/61.json


File diff suppressed because it is too large
+ 0 - 0
ElectronJS/tasks/62.json


File diff suppressed because it is too large
+ 0 - 0
ElectronJS/tasks/63.json


+ 3 - 0
ElectronJS/每次发布之前要检查的事项.md

@@ -0,0 +1,3 @@
+- 删除chrome的install文件夹
+- 修改logic.js及logic_CN.js, ElectronJS/package.json,Manifestv3/package.json,easyspider_executestage.py中的版本号
+- 更新最新的tasks文件夹

+ 1 - 1
ExecuteStage/.vscode/launch.json

@@ -12,7 +12,7 @@
             "console": "integratedTerminal",
             "console": "integratedTerminal",
             "justMyCode": true,
             "justMyCode": true,
             // "args": ["--id", "38", "--read_type", "local", "--headless", "1"]
             // "args": ["--id", "38", "--read_type", "local", "--headless", "1"]
-            "args": ["--id", "14", "--headless", "0"]
+            "args": ["--id", "23", "--headless", "0"]
         }
         }
     ]
     ]
 }
 }

+ 35 - 8
ExecuteStage/easyspider_executestage.py

@@ -289,6 +289,15 @@ def judgeExcute(node, loopElement, clickPath="", index=0):
     if executeBranchId != 0:
     if executeBranchId != 0:
         executeNode(executeBranchId, loopElement, clickPath, index)
         executeNode(executeBranchId, loopElement, clickPath, index)
 
 
+def get_output_code(output):
+    try:
+        if output.find("rue") != -1: # 如果返回值中包含true
+            code = 1
+        else:
+            code = int(output)
+    except:
+        code = 0
+    return code
 
 
 # 对循环的处理
 # 对循环的处理
 def loopExcute(node, loopValue, clickPath="", index=0):
 def loopExcute(node, loopValue, clickPath="", index=0):
@@ -335,13 +344,17 @@ def loopExcute(node, loopValue, clickPath="", index=0):
                         if node["option"] != 2:
                         if node["option"] != 2:
                             executeNode(i, None, node["parameters"]["xpath"], 0)
                             executeNode(i, None, node["parameters"]["xpath"], 0)
                     break  # 如果找不到元素,退出循环
                     break  # 如果找不到元素,退出循环
-
             count = count + 1
             count = count + 1
             Log("Page: ", count)
             Log("Page: ", count)
             recordLog("Page:" + str(count))
             recordLog("Page:" + str(count))
             # print(node["parameters"]["exitCount"], "-------")
             # print(node["parameters"]["exitCount"], "-------")
             if node["parameters"]["exitCount"] == count:  # 如果达到设置的退出循环条件的话
             if node["parameters"]["exitCount"] == count:  # 如果达到设置的退出循环条件的话
                 break
                 break
+            if int(node["parameters"]["breakMode"]) > 0:  # 如果设置了退出循环的脚本条件
+                output = execute_code(int(node["parameters"]["breakMode"]) -1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"])
+                code = get_output_code(output)
+                if code <= 0:
+                    break
     elif int(node["parameters"]["loopType"]) == 1:  # 不固定元素列表
     elif int(node["parameters"]["loopType"]) == 1:  # 不固定元素列表
         try:
         try:
             elements = browser.find_elements(By.XPATH,
             elements = browser.find_elements(By.XPATH,
@@ -370,6 +383,11 @@ def loopExcute(node, loopValue, clickPath="", index=0):
                     Log("Change history back time or:",
                     Log("Change history back time or:",
                         node["parameters"]["historyWait"])
                         node["parameters"]["historyWait"])
                     browser.execute_script('window.stop()')
                     browser.execute_script('window.stop()')
+                if int(node["parameters"]["breakMode"]) > 0:  # 如果设置了退出循环的脚本条件
+                    output = execute_code(int(node["parameters"]["breakMode"]) -1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"])
+                    code = get_output_code(output)
+                    if code <= 0:
+                        break
         except NoSuchElementException:
         except NoSuchElementException:
             Log("pathNotFound: ", node["parameters"]["xpath"])
             Log("pathNotFound: ", node["parameters"]["xpath"])
             recordLog("pathNotFound: " + node["parameters"]["xpath"])
             recordLog("pathNotFound: " + node["parameters"]["xpath"])
@@ -407,12 +425,22 @@ def loopExcute(node, loopValue, clickPath="", index=0):
                 continue  # 循环中找不到元素就略过操作
                 continue  # 循环中找不到元素就略过操作
             except Exception as e:
             except Exception as e:
                 raise
                 raise
+            if int(node["parameters"]["breakMode"]) > 0:  # 如果设置了退出循环的脚本条件
+                output = execute_code(int(node["parameters"]["breakMode"]) -1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"])
+                code = get_output_code(output)
+                if code <= 0:
+                    break
     elif int(node["parameters"]["loopType"]) == 3:  # 固定文本列表
     elif int(node["parameters"]["loopType"]) == 3:  # 固定文本列表
         textList = node["parameters"]["textList"].split("\n")
         textList = node["parameters"]["textList"].split("\n")
         for text in textList:
         for text in textList:
             recordLog("input: " + text)
             recordLog("input: " + text)
             for i in node["sequence"]:  # 挨个执行操作
             for i in node["sequence"]:  # 挨个执行操作
                 executeNode(i, text, "", 0)
                 executeNode(i, text, "", 0)
+            if int(node["parameters"]["breakMode"]) > 0:  # 如果设置了退出循环的脚本条件
+                output = execute_code(int(node["parameters"]["breakMode"]) -1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"])
+                code = get_output_code(output)
+                if code <= 0:
+                    break
     elif int(node["parameters"]["loopType"]) == 4:  # 固定网址列表
     elif int(node["parameters"]["loopType"]) == 4:  # 固定网址列表
         # tempList = node["parameters"]["textList"].split("\r\n")
         # tempList = node["parameters"]["textList"].split("\r\n")
         urlList = list(
         urlList = list(
@@ -425,19 +453,18 @@ def loopExcute(node, loopValue, clickPath="", index=0):
             recordLog("input: " + url)
             recordLog("input: " + url)
             for i in node["sequence"]:
             for i in node["sequence"]:
                 executeNode(i, url, "", 0)
                 executeNode(i, url, "", 0)
+            if int(node["parameters"]["breakMode"]) > 0:  # 如果设置了退出循环的脚本条件
+                output = execute_code(int(node["parameters"]["breakMode"]) -1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"])
+                code = get_output_code(output)
+                if code <= 0:
+                    break
     elif int(node["parameters"]["loopType"]) <= 6:  # 命令返回值
     elif int(node["parameters"]["loopType"]) <= 6:  # 命令返回值
         while True:  # do while循环
         while True:  # do while循环
             if int(node["parameters"]["loopType"]) == 5:  # JS
             if int(node["parameters"]["loopType"]) == 5:  # JS
                 output = execute_code(0, node["parameters"]["code"], node["parameters"]["waitTime"])
                 output = execute_code(0, node["parameters"]["code"], node["parameters"]["waitTime"])
             elif int(node["parameters"]["loopType"]) == 6:  # System
             elif int(node["parameters"]["loopType"]) == 6:  # System
                 output = execute_code(1, node["parameters"]["code"], node["parameters"]["waitTime"])
                 output = execute_code(1, node["parameters"]["code"], node["parameters"]["waitTime"])
-            try:
-                if output.find("rue") != -1: # 如果返回值中包含true
-                    code = 1
-                else:
-                    code = int(output)
-            except:
-                code = 0
+            code = get_output_code(output)
             if code <= 0:
             if code <= 0:
                 break
                 break
             for i in node["sequence"]:  # 挨个执行操作
             for i in node["sequence"]:  # 挨个执行操作

+ 1 - 0
Extension/manifest_v3/package.json

@@ -8,6 +8,7 @@
     "crx": "crx3",
     "crx": "crx3",
     "package": "node package.js"
     "package": "node package.js"
   },
   },
+  "license": "GPL-3.0",
   "dependencies": {
   "dependencies": {
     "crx": "^5.0.1",
     "crx": "^5.0.1",
     "crx3": "^1.1.3",
     "crx3": "^1.1.3",

File diff suppressed because it is too large
+ 0 - 0
Releases/EasySpider_windows_amd64/execution_instances/0.json


File diff suppressed because it is too large
+ 0 - 0
Releases/EasySpider_windows_amd64/execution_instances/1.json


File diff suppressed because it is too large
+ 0 - 0
Releases/EasySpider_windows_amd64/execution_instances/2.json


File diff suppressed because it is too large
+ 0 - 0
Releases/EasySpider_windows_amd64/execution_instances/3.json


File diff suppressed because it is too large
+ 0 - 0
Releases/EasySpider_windows_amd64/execution_instances/4.json


Some files were not shown because too many files changed in this diff