2 tahun lalu · 42db55deb8
--- a/ElectronJS/.gitignore
+++ b/ElectronJS/.gitignore
@@ -10,3 +10,7 @@ user_data/
 
				 Data/
			
 
				 Chrome/
			
 
				 execution_instances/*
			
 
				+EasySpider_en.crx
			
 
				+EasySpider_zh.crx
			
 
				+.DS_Store
			
 
				+npminstall-debug.log
			
--- a/ElectronJS/EasySpider_en.crx
+++ b/ElectronJS/EasySpider_en.crx
--- a/ElectronJS/EasySpider_zh.crx
+++ b/ElectronJS/EasySpider_zh.crx
--- a/ElectronJS/src/taskGrid/FlowChart_CN.html
+++ b/ElectronJS/src/taskGrid/FlowChart_CN.html
@@ -235,6 +235,13 @@
 
				                             <option :value = 3>表单值</option>
			
 
				                             <option :value = 4>图片地址</option>
			
 
				                         </select>
			
 
				+                        <div v-if='paras.parameters[paraIndex]["nodeType"] == 4'>
			
 
				+                            <label>提取图片地址后是否同时下载图片</label>
			
 
				+                            <select v-model='paras.parameters[paraIndex]["downloadPic"]' class="form-control">
			
 
				+                                <option :value = 0>否</option>
			
 
				+                                <option :value = 1>是</option>
			
 
				+                            </select>
			
 
				+                        </div>
			
 
				 <!--                        <label>提取方式</label>-->
			
 
				 <!--                        <select v-model='paras.parameters[paraIndex]["extractType"]' class="form-control">-->
			
 
				 <!--                            <option :value = 0>普通提取</option>-->
			
@@ -390,8 +397,8 @@
 
				                         <input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
			
 
				                     </div>
			
 
				                     <div v-else-if='TClass == 7'>
			
 
				-                        <label>代码/脚本内容: </label>
			
 
				-                        <textarea onkeydown="inputDelete(event)" class="form-control" rows="3" v-model='nowNode["parameters"]["code"]' placeholder="输入针对该循环项的JS命令，该循环项用arguments[0]表示，返回值大于0或为真则执行此分支内操作，否则不执行。如：return arguments[0].innerText.indexOf('123') >=0 即判断当前循环项的文本是否包含123，注意要配合循环类型为元素相关（如不固定元素列表）使用。"></textarea>
			
 
				+                        <label>代码/脚本内容（<a href="https://github.com/NaiboWang/EasySpider/wiki/Example-of-JavaScript-instruction-for-the-current-iteration-in-a-conditional-statement" target="_blank">点击此处</a>查看更多示例）: </label>
			
 
				+                        <textarea onkeydown="inputDelete(event)" class="form-control" rows="3" v-model='nowNode["parameters"]["code"]' placeholder="输入针对该循环项的JS命令，该循环项用arguments[0]表示，返回值大于0或为真则执行此分支内操作，否则不执行。如：return arguments[0].innerText.length >=5 即判断当前循环项的文本长度是否大于5，注意要配合循环类型为元素相关（如不固定元素列表）使用。"></textarea>
			
 
				                         <label>最长等待脚本执行时间（0代表无限等待）: </label>
			
 
				                         <input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
			
 
				                     </div>
			
--- a/ElectronJS/src/taskGrid/logic_CN.js
+++ b/ElectronJS/src/taskGrid/logic_CN.js
@@ -44,6 +44,7 @@ function changeGetDataParameters(msg, i) {
 
				     msg["parameters"][i]["JSWaitTime"] = 0; //JS等待时间
			
 
				     msg["parameters"][i]["afterJS"] = ""; //执行后执行的js
			
 
				     msg["parameters"][i]["afterJSWaitTime"] = 0; //执行后js等待时间
			
 
				+    msg["parameters"][i]["downloadPic"] = 0; //是否下载图片
			
 
				 }
			
 
				 
			
 
				 function handleAddElement(msg) {
			
--- a/ElectronJS/tasks/57.json
+++ b/ElectronJS/tasks/57.json
@@ -0,0 +1,335 @@
 
				+{
			
 
				+    "id": 57,
			
 
				+    "name": "图片下载",
			
 
				+    "url": "https://www.jd.com",
			
 
				+    "links": "https://www.jd.com",
			
 
				+    "create_time": "5/20/2023, 8:18:15 PM",
			
 
				+    "containJudge": false,
			
 
				+    "desc": "https://www.jd.com",
			
 
				+    "inputParameters": [
			
 
				+        {
			
 
				+            "id": 0,
			
 
				+            "name": "urlList_0",
			
 
				+            "nodeId": 1,
			
 
				+            "nodeName": "打开网页",
			
 
				+            "value": "https://www.jd.com",
			
 
				+            "desc": "要采集的网址列表，多行以\\n分开",
			
 
				+            "type": "string",
			
 
				+            "exampleValue": "https://www.jd.com"
			
 
				+        }
			
 
				+    ],
			
 
				+    "outputParameters": [
			
 
				+        {
			
 
				+            "id": 0,
			
 
				+            "name": "参数3_图片地址",
			
 
				+            "desc": "",
			
 
				+            "type": "string",
			
 
				+            "exampleValue": "//m.360buyimg.com/babel/jfs/t1/223646/1/18719/254758/6458a465F7a57af84/f44d7d983018d9ed.png"
			
 
				+        }
			
 
				+    ],
			
 
				+    "graph": [
			
 
				+        {
			
 
				+            "index": 0,
			
 
				+            "id": 0,
			
 
				+            "parentId": 0,
			
 
				+            "type": -1,
			
 
				+            "option": 0,
			
 
				+            "title": "root",
			
 
				+            "sequence": [
			
 
				+                1,
			
 
				+                4
			
 
				+            ],
			
 
				+            "parameters": {
			
 
				+                "history": 1,
			
 
				+                "tabIndex": 0,
			
 
				+                "useLoop": false,
			
 
				+                "xpath": "",
			
 
				+                "wait": 0
			
 
				+            },
			
 
				+            "isInLoop": false
			
 
				+        },
			
 
				+        {
			
 
				+            "id": 1,
			
 
				+            "index": 1,
			
 
				+            "parentId": 0,
			
 
				+            "type": 0,
			
 
				+            "option": 1,
			
 
				+            "title": "打开网页",
			
 
				+            "sequence": [],
			
 
				+            "isInLoop": false,
			
 
				+            "position": 0,
			
 
				+            "parameters": {
			
 
				+                "useLoop": false,
			
 
				+                "xpath": "",
			
 
				+                "wait": 0,
			
 
				+                "beforeJS": "",
			
 
				+                "beforeJSWaitTime": 0,
			
 
				+                "afterJS": "",
			
 
				+                "afterJSWaitTime": 0,
			
 
				+                "url": "https://www.jd.com",
			
 
				+                "links": "https://www.jd.com",
			
 
				+                "maxWaitTime": 10,
			
 
				+                "scrollType": 0,
			
 
				+                "scrollCount": 0
			
 
				+            }
			
 
				+        },
			
 
				+        {
			
 
				+            "id": -1,
			
 
				+            "index": 2,
			
 
				+            "parentId": 0,
			
 
				+            "type": 1,
			
 
				+            "option": 8,
			
 
				+            "title": "循环",
			
 
				+            "sequence": [
			
 
				+                3
			
 
				+            ],
			
 
				+            "isInLoop": false,
			
 
				+            "position": 1,
			
 
				+            "parameters": {
			
 
				+                "history": 4,
			
 
				+                "tabIndex": -1,
			
 
				+                "useLoop": false,
			
 
				+                "xpath": "/html/body/div[4]/div[1]/div[4]/a",
			
 
				+                "wait": 0,
			
 
				+                "beforeJS": "",
			
 
				+                "beforeJSWaitTime": 0,
			
 
				+                "afterJS": "",
			
 
				+                "afterJSWaitTime": 0,
			
 
				+                "scrollType": 0,
			
 
				+                "scrollCount": 0,
			
 
				+                "loopType": 1,
			
 
				+                "pathList": "",
			
 
				+                "textList": "",
			
 
				+                "code": "",
			
 
				+                "waitTime": 0,
			
 
				+                "exitCount": 0,
			
 
				+                "historyWait": 2,
			
 
				+                "allXPaths": [
			
 
				+                    "/html/body/div[4]/div[1]/div[4]/a[1]",
			
 
				+                    "//a[contains(., '平板電腦')]"
			
 
				+                ]
			
 
				+            }
			
 
				+        },
			
 
				+        {
			
 
				+            "id": -1,
			
 
				+            "index": 3,
			
 
				+            "parentId": 2,
			
 
				+            "type": 0,
			
 
				+            "option": 3,
			
 
				+            "title": "提取数据",
			
 
				+            "sequence": [],
			
 
				+            "isInLoop": true,
			
 
				+            "position": 0,
			
 
				+            "parameters": {
			
 
				+                "history": 4,
			
 
				+                "tabIndex": -1,
			
 
				+                "useLoop": false,
			
 
				+                "xpath": "",
			
 
				+                "wait": 0,
			
 
				+                "beforeJS": "",
			
 
				+                "beforeJSWaitTime": 0,
			
 
				+                "afterJS": "",
			
 
				+                "afterJSWaitTime": 0,
			
 
				+                "paras": [
			
 
				+                    {
			
 
				+                        "nodeType": 1,
			
 
				+                        "contentType": 0,
			
 
				+                        "relative": true,
			
 
				+                        "name": "参数1_链接文本",
			
 
				+                        "desc": "",
			
 
				+                        "extractType": 0,
			
 
				+                        "relativeXPath": "",
			
 
				+                        "allXPaths": "",
			
 
				+                        "exampleValues": [
			
 
				+                            {
			
 
				+                                "num": 0,
			
 
				+                                "value": "平板電腦"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 1,
			
 
				+                                "value": "爆款耳機"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 2,
			
 
				+                                "value": "手機"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 3,
			
 
				+                                "value": "數據線"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 4,
			
 
				+                                "value": "年貨節"
			
 
				+                            }
			
 
				+                        ],
			
 
				+                        "default": "",
			
 
				+                        "beforeJS": "",
			
 
				+                        "beforeJSWaitTime": 0,
			
 
				+                        "JS": "",
			
 
				+                        "JSWaitTime": 0,
			
 
				+                        "afterJS": "",
			
 
				+                        "afterJSWaitTime": 0,
			
 
				+                        "downloadPic": 0
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "nodeType": 2,
			
 
				+                        "contentType": 0,
			
 
				+                        "relative": true,
			
 
				+                        "name": "参数2_链接地址",
			
 
				+                        "desc": "",
			
 
				+                        "relativeXPath": "",
			
 
				+                        "allXPaths": "",
			
 
				+                        "exampleValues": [
			
 
				+                            {
			
 
				+                                "num": 0,
			
 
				+                                "value": "https://search.jd.com/Search?keyword=%E5%B9%B3%E6%9D%BF%E7%94%B5%E8%84%91&enc=utf-8&wq=%E5%B9%B3%E6%9D%BF%E7%94%B5%E8%84%91&pvid=84c62205dccd43dfad1b6eb5fdf5077b"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 1,
			
 
				+                                "value": "https://audio.jd.com/"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 2,
			
 
				+                                "value": "https://search.jd.com/search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&cid2=653&cid3=655&ev=exbrand_%E5%B0%8F%E7%B1%B3%EF%BC%88MI%EF%BC%89%5E&uc=0#J_searchWrap"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 3,
			
 
				+                                "value": "https://mall.jd.com/index-1000007418.html"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 4,
			
 
				+                                "value": "https://pro.jd.com/mall/active/22WyJjMqTCbvjj1YB3pSJssBonLR/index.html"
			
 
				+                            }
			
 
				+                        ],
			
 
				+                        "default": "",
			
 
				+                        "beforeJS": "",
			
 
				+                        "beforeJSWaitTime": 0,
			
 
				+                        "JS": "",
			
 
				+                        "JSWaitTime": 0,
			
 
				+                        "afterJS": "",
			
 
				+                        "afterJSWaitTime": 0,
			
 
				+                        "downloadPic": 0
			
 
				+                    }
			
 
				+                ],
			
 
				+                "loopType": 1
			
 
				+            }
			
 
				+        },
			
 
				+        {
			
 
				+            "id": 2,
			
 
				+            "index": 4,
			
 
				+            "parentId": 0,
			
 
				+            "type": 1,
			
 
				+            "option": 8,
			
 
				+            "title": "循环",
			
 
				+            "sequence": [
			
 
				+                5
			
 
				+            ],
			
 
				+            "isInLoop": false,
			
 
				+            "position": 1,
			
 
				+            "parameters": {
			
 
				+                "history": 4,
			
 
				+                "tabIndex": -1,
			
 
				+                "useLoop": false,
			
 
				+                "xpath": "/html/body/div[5]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div/div[1]/div[1]/a[1]/img[1]",
			
 
				+                "wait": 0,
			
 
				+                "beforeJS": "",
			
 
				+                "beforeJSWaitTime": 0,
			
 
				+                "afterJS": "",
			
 
				+                "afterJSWaitTime": 0,
			
 
				+                "scrollType": 0,
			
 
				+                "scrollCount": 0,
			
 
				+                "loopType": 1,
			
 
				+                "pathList": "",
			
 
				+                "textList": "",
			
 
				+                "code": "",
			
 
				+                "waitTime": 0,
			
 
				+                "exitCount": 0,
			
 
				+                "historyWait": 2,
			
 
				+                "allXPaths": [
			
 
				+                    "/html/body/div[5]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/a[1]/img[1]",
			
 
				+                    "//img[contains(., '')]"
			
 
				+                ]
			
 
				+            }
			
 
				+        },
			
 
				+        {
			
 
				+            "id": 3,
			
 
				+            "index": 5,
			
 
				+            "parentId": 2,
			
 
				+            "type": 0,
			
 
				+            "option": 3,
			
 
				+            "title": "提取数据",
			
 
				+            "sequence": [],
			
 
				+            "isInLoop": true,
			
 
				+            "position": 0,
			
 
				+            "parameters": {
			
 
				+                "history": 4,
			
 
				+                "tabIndex": -1,
			
 
				+                "useLoop": false,
			
 
				+                "xpath": "",
			
 
				+                "wait": 0,
			
 
				+                "beforeJS": "",
			
 
				+                "beforeJSWaitTime": 0,
			
 
				+                "afterJS": "",
			
 
				+                "afterJSWaitTime": 0,
			
 
				+                "paras": [
			
 
				+                    {
			
 
				+                        "nodeType": 4,
			
 
				+                        "contentType": 0,
			
 
				+                        "relative": true,
			
 
				+                        "name": "参数3_图片地址",
			
 
				+                        "desc": "",
			
 
				+                        "extractType": 0,
			
 
				+                        "relativeXPath": "",
			
 
				+                        "allXPaths": "",
			
 
				+                        "exampleValues": [
			
 
				+                            {
			
 
				+                                "num": 0,
			
 
				+                                "value": "//m.360buyimg.com/babel/jfs/t1/223646/1/18719/254758/6458a465F7a57af84/f44d7d983018d9ed.png"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 1,
			
 
				+                                "value": "//m.360buyimg.com/babel/s1420x740_jfs/t1/194401/20/32669/76553/64142a96F7733e6ad/cf2727848c86cf45.jpg!q70.dpg"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 2,
			
 
				+                                "value": "//m.360buyimg.com/babel/s1420x740_jfs/t1/222655/28/27238/153145/644b858eF2cd1200f/e37bd7da42a814b0.jpg!q70.dpg"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 3,
			
 
				+                                "value": "//m.360buyimg.com/babel/s710x370_jfs/t1/197659/30/31344/62825/640fd751F694963ed/a6e1ac2e5c27f160.jpg!q70.dpg"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 4,
			
 
				+                                "value": "//m.360buyimg.com/babel/s1420x740_jfs/t1/194401/20/32669/76553/64142a96F7733e6ad/cf2727848c86cf45.jpg!q70.dpg"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 5,
			
 
				+                                "value": "//m.360buyimg.com/babel/s1420x740_jfs/t1/222655/28/27238/153145/644b858eF2cd1200f/e37bd7da42a814b0.jpg!q70.dpg"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 6,
			
 
				+                                "value": "//m.360buyimg.com/babel/s710x370_jfs/t1/197659/30/31344/62825/640fd751F694963ed/a6e1ac2e5c27f160.jpg!q70.dpg"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 7,
			
 
				+                                "value": "//m.360buyimg.com/babel/jfs/t1/223646/1/18719/254758/6458a465F7a57af84/f44d7d983018d9ed.png"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "num": 8,
			
 
				+                                "value": "//m.360buyimg.com/babel/s1420x740_jfs/t1/194401/20/32669/76553/64142a96F7733e6ad/cf2727848c86cf45.jpg!q70.dpg"
			
 
				+                            }
			
 
				+                        ],
			
 
				+                        "default": "",
			
 
				+                        "beforeJS": "",
			
 
				+                        "beforeJSWaitTime": 0,
			
 
				+                        "JS": "",
			
 
				+                        "JSWaitTime": 0,
			
 
				+                        "afterJS": "",
			
 
				+                        "afterJSWaitTime": 0,
			
 
				+                        "downloadPic": 1
			
 
				+                    }
			
 
				+                ]
			
 
				+            }
			
 
				+        }
			
 
				+    ]
			
 
				+}
			
--- a/ElectronJS/tasks/58.json
+++ b/ElectronJS/tasks/58.json
--- a/ElectronJS/tasks/59.json
+++ b/ElectronJS/tasks/59.json
--- a/ExecuteStage/.gitignore
+++ b/ExecuteStage/.gitignore
@@ -11,4 +11,5 @@ Data/
 
				 tasks/
			
 
				 Application/
			
 
				 .history
			
 
				-execution_instances/
			
 
				+execution_instances/
			
 
				+.DS_Store
			
--- a/ExecuteStage/.vscode/launch.json
+++ b/ExecuteStage/.vscode/launch.json
@@ -12,7 +12,7 @@
 
				             "console": "integratedTerminal",
			
 
				             "justMyCode": true,
			
 
				             // "args": ["--id", "38", "--read_type", "local", "--headless", "1"]
			
 
				-            "args": ["--id", "10", "--headless", "0"]
			
 
				+            "args": ["--id", "15", "--headless", "0"]
			
 
				         }
			
 
				     ]
			
 
				 }
			
--- a/ExecuteStage/easyspider_executestage.py
+++ b/ExecuteStage/easyspider_executestage.py
@@ -30,7 +30,7 @@ from selenium.webdriver.common.by import By
 
				 from commandline_config import Config
			
 
				 import pytesseract
			
 
				 from PIL import Image
			
 
				-
			
 
				+import uuid
			
 
				 
			
 
				 saveName, log, OUTPUT, browser, SAVED = None, "", "", None, False
			
 
				 
			
@@ -65,6 +65,38 @@ def Log(text, text2=""):
 
				 # 屏幕滚动函数
			
 
				 
			
 
				 
			
 
				+
			
 
				+def download_image(url, save_directory):
			
 
				+    # 定义浏览器头信息
			
 
				+    headers = {
			
 
				+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
			
 
				+    }
			
 
				+    
			
 
				+    # 发送 GET 请求获取图片数据
			
 
				+    response = requests.get(url, headers=headers)
			
 
				+
			
 
				+    # 检查响应状态码是否为成功状态
			
 
				+    if response.status_code == requests.codes.ok:
			
 
				+        # 提取文件名
			
 
				+        file_name = url.split('/')[-1]
			
 
				+        
			
 
				+        # 生成唯一的新文件名
			
 
				+        new_file_name = str(uuid.uuid4()) + '_' + file_name
			
 
				+        
			
 
				+        # 构建保存路径
			
 
				+        save_path = os.path.join(save_directory, new_file_name)
			
 
				+        
			
 
				+        # 保存图片到本地
			
 
				+        with open(save_path, 'wb') as file:
			
 
				+            file.write(response.content)
			
 
				+        
			
 
				+        print("图片已成功下载到:", save_path)
			
 
				+        print("The image has been successfully downloaded to:", save_path)
			
 
				+    else:
			
 
				+        print("下载图片失败，请检查此图片链接是否有效:", url)
			
 
				+        print("Failed to download image, please check if this image link is valid:", url)
			
 
				+
			
 
				+
			
 
				 def scrollDown(para, rt=""):
			
 
				     try:
			
 
				         if para["scrollType"] != 0 and para["scrollCount"] > 0:  # 控制屏幕向下滚动
			
@@ -180,6 +212,7 @@ def executeNode(nodeId, loopValue="", clickPath="", index=0):
 
				         inputInfo(node["parameters"], loopValue)
			
 
				     elif node["option"] == 5:  # 自定义操作
			
 
				         customOperation(node, loopValue)
			
 
				+        saveData()
			
 
				     elif node["option"] == 8:  # 循环
			
 
				         recordLog("loop")
			
 
				         loopExcute(node, loopValue, clickPath, index)  # 执行循环
			
@@ -644,8 +677,8 @@ def getData(para, loopElement, isInLoop=True, parentPath="", index=0):
 
				                 recordLog('Element %s not found, use default' % p["relativeXPath"])
			
 
				                 continue
			
 
				             except TimeoutException:  # 超时的时候设置超时值
			
 
				-                Log('time out after 10 seconds when getting data')
			
 
				-                recordLog('time out after 10 seconds when getting data')
			
 
				+                Log('time out after set seconds when getting data')
			
 
				+                recordLog('time out after set seconds when getting data')
			
 
				                 browser.execute_script('window.stop()')
			
 
				                 if p["relative"]:  # 是否相对xpath
			
 
				                     if p["relativeXPath"] == "":  # 相对xpath有时候就是元素本身，不需要二次查找
			
@@ -660,104 +693,44 @@ def getData(para, loopElement, isInLoop=True, parentPath="", index=0):
 
				             element = browser.find_element(By.XPATH, "//body")
			
 
				         try:
			
 
				             execute_code(2, p["beforeJS"], p["beforeJSWaitTime"], element) # 执行前置js
			
 
				-            if p["contentType"] == 2:
			
 
				-                content = element.get_attribute('innerHTML')
			
 
				-            elif p["contentType"] == 3:
			
 
				-                content = element.get_attribute('outerHTML')
			
 
				-            elif p["contentType"] == 4:
			
 
				-                # 获取元素的背景图片地址
			
 
				-                bg_url = element.value_of_css_property('background-image')
			
 
				-                # 清除背景图片地址中的多余字符
			
 
				-                bg_url = bg_url.replace('url("', '').replace('")', '')
			
 
				-                content = bg_url
			
 
				-            elif p["contentType"] == 5:
			
 
				-                content = browser.current_url
			
 
				-            elif p["contentType"] == 6:
			
 
				-                content = browser.title
			
 
				-            elif p["contentType"] == 7:
			
 
				-                # 获取整个网页的高度和宽度
			
 
				-                height = browser.execute_script("return document.body.scrollHeight");
			
 
				-                width = browser.execute_script("return document.body.scrollWidth");
			
 
				-                # 调整浏览器窗口的大小
			
 
				-                browser.set_window_size(width, height)
			
 
				-                element.screenshot("Data/" +saveName + "/"+ str(time.time()) + ".png")
			
 
				-            elif p["contentType"] == 8:
			
 
				+            # 先处理特殊节点类型
			
 
				+            if p["nodeType"] == 2:
			
 
				+                if element.get_attribute("href") != None:
			
 
				+                    content = element.get_attribute("href")
			
 
				+                else:
			
 
				+                    content = ""
			
 
				+            elif p["nodeType"] == 3:
			
 
				+                if element.get_attribute("value") != None:
			
 
				+                    content = element.get_attribute("value")
			
 
				+                else:
			
 
				+                    content = ""
			
 
				+            elif p["nodeType"] == 4:  # 图片
			
 
				+                if element.get_attribute("src") != None:
			
 
				+                    content = element.get_attribute("src")
			
 
				+                else:
			
 
				+                    content = ""
			
 
				                 try:
			
 
				-                    screenshot = element.screenshot_as_png
			
 
				-                    screenshot_stream = io.BytesIO(screenshot)
			
 
				-                    # 使用Pillow库打开截图，并转换为灰度图像
			
 
				-                    image = Image.open(screenshot_stream).convert('L')
			
 
				-                    # 使用Tesseract OCR引擎识别图像中的文本
			
 
				-                    text = pytesseract.image_to_string(image,  lang='chi_sim+eng')
			
 
				-                    content = text
			
 
				-                except Exception as e:
			
 
				-                    content = "OCR Error"
			
 
				-                    print("To use OCR, You need to install Tesseract-OCR and add it to the environment variable PATH: https://tesseract-ocr.github.io/tessdoc/Installation.html")
			
 
				-                    print("要使用OCR识别功能，你需要安装Tesseract-OCR并将其添加到环境变量PATH中：https://blog.csdn.net/u010454030/article/details/80515501")
			
 
				-            elif p["contentType"] == 9:
			
 
				-                content = execute_code(2, p["JS"], p["JSWaitTime"], element)
			
 
				-            elif p["contentType"] == 1:  # 只采集当期元素下的文本，不包括子元素
			
 
				-                command = 'var arr = [];\
			
 
				-                var content = arguments[0];\
			
 
				-                for(var i = 0, len = content.childNodes.length; i < len; i++) {\
			
 
				-                    if(content.childNodes[i].nodeType === 3){  \
			
 
				-                        arr.push(content.childNodes[i].nodeValue);\
			
 
				+                    downloadPic = p["downloadPic"]
			
 
				+                except:
			
 
				+                    downloadPic = 0
			
 
				+                if downloadPic == 1:
			
 
				+                    download_image(content, "Data/" +saveName + "/")
			
 
				+            else: # 普通节点
			
 
				+                if p["contentType"] == 0:
			
 
				+                    content = element.text
			
 
				+                elif p["contentType"] == 1:  # 只采集当期元素下的文本，不包括子元素
			
 
				+                    command = 'var arr = [];\
			
 
				+                    var content = arguments[0];\
			
 
				+                    for(var i = 0, len = content.childNodes.length; i < len; i++) {\
			
 
				+                        if(content.childNodes[i].nodeType === 3){  \
			
 
				+                            arr.push(content.childNodes[i].nodeValue);\
			
 
				+                        }\
			
 
				                     }\
			
 
				-                }\
			
 
				-                var str = arr.join(" "); \
			
 
				-                return str;'
			
 
				-                content = browser.execute_script(command, element).replace(
			
 
				-                    "\n", "").replace("\\s+", " ")
			
 
				-                if p["nodeType"] == 2:
			
 
				-                    if element.get_attribute("href") != None:
			
 
				-                        content = element.get_attribute("href")
			
 
				-                    else:
			
 
				-                        content = ""
			
 
				-                elif p["nodeType"] == 3:
			
 
				-                    if element.get_attribute("value") != None:
			
 
				-                        content = element.get_attribute("value")
			
 
				-                    else:
			
 
				-                        content = ""
			
 
				-                elif p["nodeType"] == 4:  # 图片
			
 
				-                    if element.get_attribute("src") != None:
			
 
				-                        content = element.get_attribute("src")
			
 
				-                    else:
			
 
				-                        content = ""
			
 
				-            elif p["contentType"] == 0:
			
 
				-                content = element.text
			
 
				-                if p["nodeType"] == 2:
			
 
				-                    if element.get_attribute("href") != None:
			
 
				-                        content = element.get_attribute("href")
			
 
				-                    else:
			
 
				-                        content = ""
			
 
				-                elif p["nodeType"] == 3:
			
 
				-                    if element.get_attribute("value") != None:
			
 
				-                        content = element.get_attribute("value")
			
 
				-                    else:
			
 
				-                        content = ""
			
 
				-                elif p["nodeType"] == 4:  # 图片
			
 
				-                    if element.get_attribute("src") != None:
			
 
				-                        content = element.get_attribute("src")
			
 
				-                    else:
			
 
				-                        content = ""
			
 
				-        except StaleElementReferenceException:  # 发生找不到元素的异常后，等待几秒重新查找
			
 
				-            recordLog('StaleElementReferenceException：'+p["relativeXPath"])
			
 
				-            time.sleep(3)
			
 
				-            try:
			
 
				-                if p["relative"]:  # 是否相对xpath
			
 
				-                    if p["relativeXPath"] == "":  # 相对xpath有时候就是元素本身，不需要二次查找
			
 
				-                        element = loopElement
			
 
				-                        recordLog('StaleElementReferenceException：loopElement')
			
 
				-                    else:
			
 
				-                        element = loopElement.find_element(By.XPATH,
			
 
				-                                                           p["relativeXPath"][1:])
			
 
				-                        recordLog(
			
 
				-                            'StaleElementReferenceException：loopElement+relativeXPath')
			
 
				-                else:
			
 
				-                    element = browser.find_element(
			
 
				-                        By.XPATH, p["relativeXPath"])
			
 
				-                    recordLog('StaleElementReferenceException：relativeXPath')
			
 
				-                if p["contentType"] == 2:
			
 
				+                    var str = arr.join(" "); \
			
 
				+                    return str;'
			
 
				+                    content = browser.execute_script(command, element).replace(
			
 
				+                        "\n", "").replace("\\s+", " ")
			
 
				+                elif p["contentType"] == 2:
			
 
				                     content = element.get_attribute('innerHTML')
			
 
				                 elif p["contentType"] == 3:
			
 
				                     content = element.get_attribute('outerHTML')
			
@@ -788,55 +761,101 @@ def getData(para, loopElement, isInLoop=True, parentPath="", index=0):
 
				                         text = pytesseract.image_to_string(image,  lang='chi_sim+eng')
			
 
				                         content = text
			
 
				                     except Exception as e:
			
 
				-                        content = "OCR失败"
			
 
				-                        print("To use OCR, You need to install Tesseract-OCR and add it to the environment variable path: https://tesseract-ocr.github.io/tessdoc/Installation.html")
			
 
				-                        print("要使用OCR识别功能，你需要安装Tesseract-OCR并将其添加到环境变量path中：")
			
 
				+                        content = "OCR Error"
			
 
				+                        print("To use OCR, You need to install Tesseract-OCR and add it to the environment variable PATH: https://tesseract-ocr.github.io/tessdoc/Installation.html")
			
 
				+                        print("要使用OCR识别功能，你需要安装Tesseract-OCR并将其添加到环境变量PATH中：https://blog.csdn.net/u010454030/article/details/80515501")
			
 
				                 elif p["contentType"] == 9:
			
 
				                     content = execute_code(2, p["JS"], p["JSWaitTime"], element)
			
 
				-                elif p["contentType"] == 1:  # 只采集当期元素下的文本，不包括子元素
			
 
				-                    command = 'var arr = [];\
			
 
				-                    var content = arguments[0];\
			
 
				-                    for(var i = 0, len = content.childNodes.length; i < len; i++) {\
			
 
				-                        if(content.childNodes[i].nodeType === 3){  \
			
 
				-                            arr.push(content.childNodes[i].nodeValue);\
			
 
				+        except StaleElementReferenceException:  # 发生找不到元素的异常后，等待几秒重新查找
			
 
				+            recordLog('StaleElementReferenceException：'+p["relativeXPath"])
			
 
				+            time.sleep(3)
			
 
				+            try:
			
 
				+                if p["relative"]:  # 是否相对xpath
			
 
				+                    if p["relativeXPath"] == "":  # 相对xpath有时候就是元素本身，不需要二次查找
			
 
				+                        element = loopElement
			
 
				+                        recordLog('StaleElementReferenceException：loopElement')
			
 
				+                    else:
			
 
				+                        element = loopElement.find_element(By.XPATH,
			
 
				+                                                           p["relativeXPath"][1:])
			
 
				+                        recordLog(
			
 
				+                            'StaleElementReferenceException：loopElement+relativeXPath')
			
 
				+                else:
			
 
				+                    element = browser.find_element(
			
 
				+                        By.XPATH, p["relativeXPath"])
			
 
				+                    recordLog('StaleElementReferenceException：relativeXPath')
			
 
				+                # 先处理特殊节点类型
			
 
				+                if p["nodeType"] == 2:
			
 
				+                    if element.get_attribute("href") != None:
			
 
				+                        content = element.get_attribute("href")
			
 
				+                    else:
			
 
				+                        content = ""
			
 
				+                elif p["nodeType"] == 3:
			
 
				+                    if element.get_attribute("value") != None:
			
 
				+                        content = element.get_attribute("value")
			
 
				+                    else:
			
 
				+                        content = ""
			
 
				+                elif p["nodeType"] == 4:  # 图片
			
 
				+                    if element.get_attribute("src") != None:
			
 
				+                        content = element.get_attribute("src")
			
 
				+                    else:
			
 
				+                        content = ""
			
 
				+                    try:
			
 
				+                        downloadPic = p["downloadPic"]
			
 
				+                    except:
			
 
				+                        downloadPic = 0
			
 
				+                    if downloadPic == 1:
			
 
				+                        download_image(content, "Data/" +saveName + "/")
			
 
				+                else: # 普通节点
			
 
				+                    if p["contentType"] == 0:
			
 
				+                        content = element.text
			
 
				+                    elif p["contentType"] == 1:  # 只采集当期元素下的文本，不包括子元素
			
 
				+                        command = 'var arr = [];\
			
 
				+                        var content = arguments[0];\
			
 
				+                        for(var i = 0, len = content.childNodes.length; i < len; i++) {\
			
 
				+                            if(content.childNodes[i].nodeType === 3){  \
			
 
				+                                arr.push(content.childNodes[i].nodeValue);\
			
 
				+                            }\
			
 
				                         }\
			
 
				-                    }\
			
 
				-                    var str = arr.join(" "); \
			
 
				-                    return str;'
			
 
				-                    content = browser.execute_script(command, element).replace(
			
 
				-                        "\n", "").replace("\\s+", " ")
			
 
				-                    if p["nodeType"] == 2:
			
 
				-                        if element.get_attribute("href") != None:
			
 
				-                            content = element.get_attribute("href")
			
 
				-                        else:
			
 
				-                            content = ""
			
 
				-                    elif p["nodeType"] == 3:
			
 
				-                        if element.get_attribute("value") != None:
			
 
				-                            content = element.get_attribute("value")
			
 
				-                        else:
			
 
				-                            content = ""
			
 
				-                    elif p["nodeType"] == 4:  # 图片
			
 
				-                        if element.get_attribute("src") != None:
			
 
				-                            content = element.get_attribute("src")
			
 
				-                        else:
			
 
				-                            content = ""
			
 
				-                elif p["contentType"] == 0:
			
 
				-                    content = element.text
			
 
				-                    if p["nodeType"] == 2:
			
 
				-                        if element.get_attribute("href") != None:
			
 
				-                            content = element.get_attribute("href")
			
 
				-                        else:
			
 
				-                            content = ""
			
 
				-                    elif p["nodeType"] == 3:
			
 
				-                        if element.get_attribute("value") != None:
			
 
				-                            content = element.get_attribute("value")
			
 
				-                        else:
			
 
				-                            content = ""
			
 
				-                    elif p["nodeType"] == 4:  # 图片
			
 
				-                        if element.get_attribute("src") != None:
			
 
				-                            content = element.get_attribute("src")
			
 
				-                        else:
			
 
				-                            content = ""
			
 
				+                        var str = arr.join(" "); \
			
 
				+                        return str;'
			
 
				+                        content = browser.execute_script(command, element).replace(
			
 
				+                            "\n", "").replace("\\s+", " ")
			
 
				+                    elif p["contentType"] == 2:
			
 
				+                        content = element.get_attribute('innerHTML')
			
 
				+                    elif p["contentType"] == 3:
			
 
				+                        content = element.get_attribute('outerHTML')
			
 
				+                    elif p["contentType"] == 4:
			
 
				+                        # 获取元素的背景图片地址
			
 
				+                        bg_url = element.value_of_css_property('background-image')
			
 
				+                        # 清除背景图片地址中的多余字符
			
 
				+                        bg_url = bg_url.replace('url("', '').replace('")', '')
			
 
				+                        content = bg_url
			
 
				+                    elif p["contentType"] == 5:
			
 
				+                        content = browser.current_url
			
 
				+                    elif p["contentType"] == 6:
			
 
				+                        content = browser.title
			
 
				+                    elif p["contentType"] == 7:
			
 
				+                        # 获取整个网页的高度和宽度
			
 
				+                        height = browser.execute_script("return document.body.scrollHeight");
			
 
				+                        width = browser.execute_script("return document.body.scrollWidth");
			
 
				+                        # 调整浏览器窗口的大小
			
 
				+                        browser.set_window_size(width, height)
			
 
				+                        element.screenshot("Data/" +saveName + "/"+ str(time.time()) + ".png")
			
 
				+                    elif p["contentType"] == 8:
			
 
				+                        try:
			
 
				+                            screenshot = element.screenshot_as_png
			
 
				+                            screenshot_stream = io.BytesIO(screenshot)
			
 
				+                            # 使用Pillow库打开截图，并转换为灰度图像
			
 
				+                            image = Image.open(screenshot_stream).convert('L')
			
 
				+                            # 使用Tesseract OCR引擎识别图像中的文本
			
 
				+                            text = pytesseract.image_to_string(image,  lang='chi_sim+eng')
			
 
				+                            content = text
			
 
				+                        except Exception as e:
			
 
				+                            content = "OCR Error"
			
 
				+                            print("To use OCR, You need to install Tesseract-OCR and add it to the environment variable PATH: https://tesseract-ocr.github.io/tessdoc/Installation.html")
			
 
				+                            print("要使用OCR识别功能，你需要安装Tesseract-OCR并将其添加到环境变量PATH中：https://blog.csdn.net/u010454030/article/details/80515501")
			
 
				+                    elif p["contentType"] == 9:
			
 
				+                        content = execute_code(2, p["JS"], p["JSWaitTime"], element)
			
 
				             except StaleElementReferenceException:
			
 
				                 recordLog('StaleElementReferenceException：'+p["relativeXPath"])
			
 
				                 continue  # 再出现类似问题直接跳过
			
@@ -859,7 +878,7 @@ def isnull(s):
 
				 
			
 
				 def saveData(exit=False):
			
 
				     global saveName, log, OUTPUT, browser
			
 
				-    if exit == True or len(OUTPUT) > 100: # 每100条保存一次
			
 
				+    if exit == True or len(OUTPUT) >= 100: # 每100条保存一次
			
 
				         with open("Data/"+saveName + '_log.txt', 'a', encoding='utf-8-sig') as file_obj:
			
 
				             file_obj.write(log)
			
 
				             file_obj.close()
			
@@ -890,6 +909,7 @@ if __name__ == '__main__':
 
				         "config_folder": "",
			
 
				         "config_file_name": "config.json",
			
 
				         "headless": False,
			
 
				+        "version": "0.3.0",
			
 
				     }
			
 
				     c = Config(config)
			
 
				     print(c)
			
--- a/Extension/manifest_v3/.gitignore
+++ b/Extension/manifest_v3/.gitignore
@@ -4,3 +4,5 @@ dist
 
				 .env
			
 
				 EasySpider_en
			
 
				 EasySpider_zh
			
 
				+EasySpider_en.crx
			
 
				+EasySpider_zh.crx
			
--- a/Extension/manifest_v3/EasySpider_en.crx
+++ b/Extension/manifest_v3/EasySpider_en.crx
--- a/Extension/manifest_v3/EasySpider_zh.crx
+++ b/Extension/manifest_v3/EasySpider_zh.crx
--- a/Extension/manifest_v3/src/manifest.json
+++ b/Extension/manifest_v3/src/manifest.json
@@ -1,6 +1,6 @@
 
				 {
			
 
				   "name": "EasySpider",
			
 
				-  "version": "0.2",
			
 
				+  "version": "0.3.0",
			
 
				   "description": "EasySpider's chrome extension",
			
 
				   "author": "Naibo Wang",
			
 
				   "manifest_version": 3,
			
--- a/Releases/EasySpider_windows_amd64/V0.3.0
+++ b/Releases/EasySpider_windows_amd64/V0.3.0
@@ -0,0 +1,48 @@
 
				+https://github.com/NaiboWang/EasySpider/releases/tag/v0.3.0
			
 
				+
			
 
				+### 强烈建议大家观看新特性讲解视频
			
 
				+
			
 
				+B站最新版特性视频已上传，新视频非常有用，推荐大家观看。
			
 
				+
			
 
				+[【重要】自定义条件判断之使用循环项内的JS命令返回值 - 第二弹](https://www.bilibili.com/video/BV1mu411x7Nn/)
			
 
				+
			
 
				+[如何执行自己写的JS代码和系统代码 （自定义操作）](https://www.bilibili.com/video/BV1qs4y1z7Hc/)
			
 
				+
			
 
				+[如何自定义循环和判断条件 - 第一弹](https://www.bilibili.com/video/BV1Ys4y1z777/)
			
 
				+
			
 
				+[如何对元素和网页截图及（无头模式）命令行执行指南](https://www.bilibili.com/video/BV1dV4y1z764/)
			
 
				+
			
 
				+[OCR识别元素内容功能](https://www.bilibili.com/video/BV1xz4y1b72D/)
			
 
				+
			
 
				+注意，v0.3.0版本任务task文件夹内`.json`文件和v0.2.0版本不兼容，请重新设计v0.3.0版本任务。
			
 
				+
			
 
				+## 更新说明
			
 
				+1. 高级操作：
			
 
				+ - 可以在任务流程中**执行自定义脚本**，包括在浏览器中**执行Javascript指令**以及**操作系统级别的脚本调用**并可**得到命令返回值并记录**，大大扩展了可操作空间。
			
 
				+
			
 
				+![image](https://github.com/NaiboWang/EasySpider/assets/30287768/06e63a06-328d-4339-b40b-2d57c94cee66)
			
 
				+
			
 
				+ - 在每一个操作执行前和执行后，都可以指定执行一段针对当前定位元素的JavaScript指令。
			
 
				+ 
			
 
				+<img src="https://github.com/NaiboWang/EasySpider/assets/30287768/dde64388-5668-40ff-951e-fb8f60655c49" height=50% width=50%> 
			
 
				+
			
 
				+2. **判断条件和循环条件**中同样增加了**执行自定义脚本**，并根据自定义脚本的返回值是否为真来作为条件判断和循环的判断条件，同样极大的增加了任务的可操作性。
			
 
				+![image](https://github.com/NaiboWang/EasySpider/assets/30287768/9dea0564-1a1c-487d-9fa4-427c5e284796)
			
 
				+3. 可同时生成多种XPath供用户选择，并**预装了XPath Helper扩展**供大家调试XPath。
			
 
				+4. 增加采集元素背景图片地址，当前页面标题，当前页面URL地址功能。
			
 
				+5. 增加保存元素截图功能，如要截图某元素或整个网页页面，可以用此功能（配合无头模式效果更好）。
			
 
				+6. 增加下载图片功能（正式版，Beta版没有）。
			
 
				+7. 增加OCR识别元素功能（使用此功能需首先自行安装Tesseract库：[https://blog.csdn.net/u010454030/article/details/80515501](https://blog.csdn.net/u010454030/article/details/80515501)）
			
 
				+8. 可直接提取对元素执行JavaScript代码后的返回值，实现如正则表达式，获得元素背景颜色等功能。
			
 
				+<img src="https://github.com/NaiboWang/EasySpider/assets/30287768/f6a9b5ce-63c5-4348-8967-053c21d67ef9" width=50% height=50%>
			
 
				+
			
 
				+9. 大幅增加使用提示和说明，使软件更易用（如增加了iframe标签的处理方式说明，各个选项的参数意义，以及循环项XPath的修改说明等等）。
			
 
				+10. 执行命令时增加了如何用命令行执行任务的提示：[https://github.com/NaiboWang/EasySpider/wiki/Argument-Instruction](https://github.com/NaiboWang/EasySpider/wiki/Argument-Instruction)。
			
 
				+![image](https://github.com/NaiboWang/EasySpider/assets/30287768/a9e774df-e345-4d51-b7c9-2c4dac0ec624)
			
 
				+11. 增加无头模式，即无浏览器界面模式配置。
			
 
				+12. 修复了使用用户配置浏览器模式下的中文路径不能正确识别的问题。
			
 
				+13. 修复了条件分支没有无条件分支时会卡死的问题。
			
 
				+14. 修复了保存任务后会输入框卡死的问题。
			
 
				+15. 打开网页操作和点击元素操作新增设置页面最长加载等待时间。
			
 
				+16. 增加版本更新提示。
			
 
				+17. 更新chrome版本为113。
			
--- a/Releases/EasySpider_windows_amd64/config.json
+++ b/Releases/EasySpider_windows_amd64/config.json
@@ -1 +1 @@
 
				-{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\Releases\\EasySpider_windows_amd64\\user_data1"}
			
 
				+{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data12","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\Releases\\EasySpider_windows_amd64\\user_data1"}
			
--- a/Releases/EasySpider_windows_amd64/execution_instances/0.json
+++ b/Releases/EasySpider_windows_amd64/execution_instances/0.json
--- a/Releases/EasySpider_windows_amd64/execution_instances/1.json
+++ b/Releases/EasySpider_windows_amd64/execution_instances/1.json
--- a/Releases/EasySpider_windows_amd64/execution_instances/2.json
+++ b/Releases/EasySpider_windows_amd64/execution_instances/2.json
--- a/Releases/EasySpider_windows_amd64/execution_instances/3.json
+++ b/Releases/EasySpider_windows_amd64/execution_instances/3.json
--- a/Releases/EasySpider_windows_amd64/execution_instances/4.json
+++ b/Releases/EasySpider_windows_amd64/execution_instances/4.json
--- a/Releases/EasySpider_windows_amd64/tasks/49.json
+++ b/Releases/EasySpider_windows_amd64/tasks/49.json