2 years ago · 3230254f98
--- a/ElectronJS/src/taskGrid/FlowChart.html
+++ b/ElectronJS/src/taskGrid/FlowChart.html
@@ -456,7 +456,7 @@
 
				 
			
 
				                     <div id="breakAdvanced" v-if='nowNode["parameters"]["loopType"] < 5'>
			
 
				                         <div>
			
 
				-                            <p><label>(Advanced Operation) Define loop exit condition using code/script:</label></p>
			
 
				+                            <p><label>(Advanced Operation) Define loop exit condition using code/script; or you can add a <b>Custom Action</b>, then select the "Exit Loop" option:</label></p>
			
 
				                             <select v-model='nowNode["parameters"]["breakMode"]' class="form-control" style="font-weight: bold">
			
 
				                                 <option value=0>Do not set script (even if a script is written below, it will not be executed)</option>
			
 
				                                 <option value=1>JavaScript script (start with 'return ')</option>
			
--- a/ElectronJS/src/taskGrid/FlowChart_CN.html
+++ b/ElectronJS/src/taskGrid/FlowChart_CN.html
@@ -451,12 +451,12 @@
 
				                         <input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
			
 
				                     </div>
			
 
				                     <!-- 这里添加退出循环条件,找不到元素肯定退出循环 -->
			
 
				-                    <label v-if='parseInt(loopType) == 0'>最多执行循环次数（0代表无限循环直到找不到元素或数据变化为止）：</label>
			
 
				+                    <label v-if='parseInt(loopType) == 0'>最多执行循环次数（0代表无限循环直到找不到元素或检测不到页面内容变化为止）：</label>
			
 
				                     <input onkeydown="inputDelete(event)" required v-if='parseInt(loopType) == 0' class="form-control" type="number" v-model.number='nowNode["parameters"]["exitCount"]'></input>
			
 
				 
			
 
				                     <div id="breakAdvanced" v-if='nowNode["parameters"]["loopType"] < 5'>
			
 
				                         <div>
			
 
				-                            <p><label>（高级操作）使用代码/脚本定义循环退出条件（也可以在流程中添加自定义操作，然后选择Break选项）： </label></p>
			
 
				+                            <p><label>（高级操作）使用代码/脚本定义循环退出条件（也可以在流程中添加<b>自定义操作</b>，然后选择<b>退出循环</b>选项）： </label></p>
			
 
				                             <select v-model='nowNode["parameters"]["breakMode"]' class="form-control" style="font-weight: bold">
			
 
				                                 <option value = 0>不设置脚本（选择这个下面写了脚本也不会执行）</option>
			
 
				                                 <option value = 1>JavaScript脚本返回值（需以return 开头）</option>
			
--- a/ElectronJS/tasks/158.json
+++ b/ElectronJS/tasks/158.json
--- a/ElectronJS/tasks/159.json
+++ b/ElectronJS/tasks/159.json
--- a/ElectronJS/tasks/162.json
+++ b/ElectronJS/tasks/162.json
--- a/ElectronJS/tasks/163.json
+++ b/ElectronJS/tasks/163.json
--- a/ElectronJS/tasks/164.json
+++ b/ElectronJS/tasks/164.json
@@ -0,0 +1 @@
 
				+{"id":164,"name":"Just a moment...","url":"https://turnstile.zeroclover.io/","links":"https://turnstile.zeroclover.io/","create_time":"","update_time":"7/12/2023, 5:36:24 AM","version":"0.3.5","saveThreshold":10,"cloudflare":1,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://turnstile.zeroclover.io/","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://turnstile.zeroclover.io/","desc":"要采集的网址列表，多行以\\n分开","type":"text","exampleValue":"https://turnstile.zeroclover.io/"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://turnstile.zeroclover.io/","links":"https://turnstile.zeroclover.io/","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"feedback-form\")]/input[2]","iframe":false,"wait":20,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[1]/form[1]/input[2]","//input[contains(., '')]","/html/body/div[last()-3]/form/input"]}}]}
			
--- a/ElectronJS/tasks/165.json
+++ b/ElectronJS/tasks/165.json
--- a/ElectronJS/tasks/4.json
+++ b/ElectronJS/tasks/4.json
--- a/ElectronJS/tasks/49.json
+++ b/ElectronJS/tasks/49.json
--- a/ExecuteStage/.vscode/launch.json
+++ b/ExecuteStage/.vscode/launch.json
@@ -12,7 +12,7 @@
 
				             "justMyCode": false,
			
 
				             //  "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
			
 
				             // "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
			
 
				-            "args": ["--id", "[90]", "--headless", "0", "--user_data", "1"]
			
 
				+            "args": ["--id", "[3]", "--headless", "0", "--user_data", "1"]
			
 
				         }
			
 
				     ]
			
 
				 }
			
--- a/ExecuteStage/easyspider_executestage.py
+++ b/ExecuteStage/easyspider_executestage.py
@@ -1221,29 +1221,42 @@ class BrowserThread(Thread):
 
				                     # p["relativeXPath"] = p["relativeXPath"].lower()
			
 
				                     # p["relativeXPath"] = lowercase_tags_in_xpath(p["relativeXPath"])
			
 
				                     # 已经有text()或@href了，不需要再加
			
 
				+                    content_type = ""
			
 
				                     if p["relativeXPath"].find("/@href") >= 0 or p["relativeXPath"].find("/text()") >= 0 or p["relativeXPath"].find("::text()") >= 0:
			
 
				-                        xpath = p["relativeXPath"]
			
 
				+                        content_type = ""
			
 
				                     elif p["nodeType"] == 2:
			
 
				-                        xpath = p["relativeXPath"] + "/@href"
			
 
				+                        content_type = "/@href"
			
 
				                     elif p["contentType"] == 1:
			
 
				-                        xpath = p["relativeXPath"] + "/text()"
			
 
				+                        content_type = "/text()"
			
 
				                     elif p["contentType"] == 0:
			
 
				-                        xpath = p["relativeXPath"] + "//text()"
			
 
				+                        content_type = "//text()"
			
 
				+                    xpath = p["relativeXPath"] + content_type
			
 
				                     if p["relative"]:
			
 
				                         # if p["relativeXPath"] == "":
			
 
				                         #     content = [loopElementHTML]
			
 
				                         # else:
			
 
				                         # 如果字串里有//即子孙查找，则不动语句
			
 
				                         if p["relativeXPath"].find("//") >= 0:
			
 
				-                            full_path = "(" + parentPath + \
			
 
				-                                xpath + ")" + \
			
 
				-                                "[" + str(index + 1) + "]"
			
 
				-                            content = pageHTML.xpath(full_path)
			
 
				+                            if xpath.startswith("/"): 
			
 
				+                                full_path = "(" + parentPath  + ")" + \
			
 
				+                                        "[" + str(index + 1) + "]"+ \
			
 
				+                                        p["relativeXPath"] + content_type
			
 
				+                            else: # 如果是id()这种形式，不需要包parentPath
			
 
				+                                full_path = xpath
			
 
				+                            try:
			
 
				+                                content = pageHTML.xpath(full_path)
			
 
				+                            except:
			
 
				+                                content = []
			
 
				+                        elif not p["relativeXPath"].startswith("/"): # 如果是id()这种形式，不需要包/html/body
			
 
				+                            try:
			
 
				+                                content = loopElementHTML.xpath(xpath)
			
 
				+                            except:
			
 
				+                                content = []
			
 
				                         else:
			
 
				                             content = loopElementHTML.xpath(
			
 
				                                 "/html/body/" + loopElementHTML[0][0].tag + xpath)
			
 
				                     else:
			
 
				-                        if xpath.find("/body") < 0:
			
 
				+                        if xpath.find("/body") < 0 and xpath.startswith("/"): # 如果是id()或(//div)[1]这种形式，不需要包/html/body
			
 
				                             xpath = "/html/body" + xpath
			
 
				                         content = pageHTML.xpath(xpath)
			
 
				                     if len(content) > 0:
			
@@ -1289,9 +1302,12 @@ class BrowserThread(Thread):
 
				                             else:
			
 
				                                 # 如果字串里有//即子孙查找，则不动语句
			
 
				                                 if p["relativeXPath"].find("//") >= 0:
			
 
				-                                    full_path = "(" + parentPath + \
			
 
				-                                        p["relativeXPath"] + ")" + \
			
 
				-                                        "[" + str(index + 1) + "]"
			
 
				+                                    # full_path = "(" + parentPath + \
			
 
				+                                    #     p["relativeXPath"] + ")" + \
			
 
				+                                    #     "[" + str(index + 1) + "]"
			
 
				+                                    full_path = "(" + parentPath + ")" + \
			
 
				+                                        "[" + str(index + 1) + "]" + \
			
 
				+                                        p["relativeXPath"]
			
 
				                                     element = self.browser.find_element(
			
 
				                                         By.XPATH, full_path, iframe=p["iframe"])
			
 
				                                 else:
			
@@ -1462,10 +1478,8 @@ if __name__ == '__main__':
 
				 
			
 
				     option.add_experimental_option(
			
 
				         'excludeSwitches', ['enable-automation'])  # 以开发者模式
			
 
				-    options.add_argument('-ignore-certificate-errors')
			
 
				-    options.add_argument('-ignore -ssl-errors')
			
 
				-    option.add_argument('-ignore-certificate-errors')
			
 
				-    option.add_argument('-ignore -ssl-errors')
			
 
				+    options.add_argument('log-level=3')  # 隐藏日志
			
 
				+    option.add_argument('log-level=3')  # 隐藏日志
			
 
				     # user_data_dir = r''  # 注意没有Default！
			
 
				 
			
 
				     # options.add_argument('--user-data-dir='+p)
			
@@ -1559,8 +1573,6 @@ if __name__ == '__main__':
 
				             if sys.platform != "darwin":
			
 
				                 browser_t = MyUCChrome(
			
 
				                 options=options, chrome_options=option, driver_executable_path=driver_path)
			
 
				-                print("Pass Cloudflare Mode")
			
 
				-                print("过Cloudflare验证模式")
			
 
				             else:
			
 
				                 print("Not support Cloudflare Mode on MacOS")
			
 
				                 print("MacOS不支持Cloudflare验证模式")
			
@@ -1587,6 +1599,9 @@ if __name__ == '__main__':
 
				         print("正在运行任务，长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码；如果想恢复任务的执行，请再次长按p键。")
			
 
				         print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
			
 
				         print("----------------------------------\n\n")
			
 
				+        if cloudflare:
			
 
				+            print("过Cloudflare验证模式有时候会不稳定，如果无法通过验证则需要隔几分钟重试一次，或者可以更换新的用户信息文件夹再执行任务。")
			
 
				+            print("Passing Cloudflare verification mode is sometimes unstable, if you cannot pass the verification, you need to try again every few minutes, or you can change a new user information folder and then execute the task.")
			
 
				         # 使用监听器监听键盘输入
			
 
				         try:
			
 
				             with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener:
			
--- a/ExecuteStage/undetected_chromedriver_ES/__init__.py
+++ b/ExecuteStage/undetected_chromedriver_ES/__init__.py
@@ -451,7 +451,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
 
				             service = None
			
 
				 
			
 
				         super(Chrome, self).__init__(
			
 
				-            executable_path=driver_executable_path,
			
 
				+            executable_path=self.patcher.executable_path,
			
 
				             port=port,
			
 
				             options=options,
			
 
				             service_args=service_args,
			
--- a/ExecuteStage/undetected_chromedriver_ES/patcher.py
+++ b/ExecuteStage/undetected_chromedriver_ES/patcher.py
@@ -116,18 +116,24 @@ class Patcher(object):
 
				         #     # -1 being a skip value used later in this block
			
 
				         #
			
 
				         p = pathlib.Path(self.data_path)
			
 
				-        with Lock():
			
 
				-            files = list(p.rglob("*chromedriver*?"))
			
 
				-            for file in files:
			
 
				-                if self.is_binary_patched(file):
			
 
				-                    self.executable_path = str(file)
			
 
				-                    return True
			
 
				+        # with Lock():
			
 
				+        #     files = list(p.rglob("*chromedriver*?"))
			
 
				+        #     for file in files:
			
 
				+        #         if self.is_binary_patched(file):
			
 
				+        #             self.executable_path = str(file)
			
 
				+        #             return True
			
 
				 
			
 
				         if executable_path:
			
 
				             self.executable_path = executable_path
			
 
				             self._custom_exe_path = True
			
 
				 
			
 
				         if self._custom_exe_path:
			
 
				+            file_name, file_extension = os.path.splitext(self.executable_path)
			
 
				+            # 创建新的文件名
			
 
				+            new_file = f"{file_name}_uc{file_extension}"
			
 
				+            if not os.path.exists(new_file):
			
 
				+                shutil.copy(self.executable_path, new_file)
			
 
				+            self.executable_path = new_file # 用新的chromedriver
			
 
				             ispatched = self.is_binary_patched(self.executable_path)
			
 
				             if not ispatched:
			
 
				                 return self.patch_exe()
			
--- a/Readme.md
+++ b/Readme.md
@@ -32,6 +32,12 @@ A visual code-free/no-code web crawler/spider, just select the content you want
 
				 
			
 
				 ![animation_en](media/animation_en.gif)
			
 
				 
			
 
				+### 更多特性/More Features
			
 
				+
			
 
				+更多特性请翻到页面底部查看。
			
 
				+
			
 
				+More features please scroll to the bottom of this page to view.
			
 
				+
			
 
				 ## 下载易采集/Download EasySpider
			
 
				 
			
 
				 进入 [Releases Page](https://github.com/NaiboWang/EasySpider/releases) 下载最新版本。如果下载速度慢，可以考虑中国境内下载地址：[中国境内下载地址](https://www.easyspider.cn/download.html)。
			
@@ -144,6 +150,11 @@ At the same time, the software is protected by patent rights. If you want to use
 
				 
			
 
				 Refer to [Compilation Instructions](ElectronJS/README.md).
			
 
				 
			
 
				+## 支持特性/Supported Features
			
 
				+
			
 
				+![pic](media/features_CN.png)
			
 
				+![pic](media/features_EN.png)
			
 
				+
			
 
				 ## 中文界面截图
			
 
				 
			
 
				 #### 软件界面示例
			
--- a/media/features_CN.png
+++ b/media/features_CN.png
--- a/media/features_EN.png
+++ b/media/features_EN.png