Browse Source

MacOS Test

Naibo_Mac_M2 1 year ago
parent
commit
5376aa37b0

File diff suppressed because it is too large
+ 379 - 189
.temp_to_pub/EasySpider_MacOS_all_arch/Code/easyspider_executestage.py


+ 24 - 16
.temp_to_pub/EasySpider_MacOS_all_arch/Code/myChrome.py

@@ -37,19 +37,21 @@ class MyChrome(webdriver.Chrome):
             except Exception as e:
                 print(e)
             find_element = False
-            # 遍历所有的 iframe 并点击里面的元素
+            # 遍历所有的 iframe 并查找里面的元素
             for iframe in iframes:
                 # 切换到 iframe
                 super().switch_to.default_content()
                 super().switch_to.frame(iframe)
                 self.iframe_env = True
                 try:
-                    # 在 iframe 中查找并点击元素
+                    # 在 iframe 中查找元素
                     # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
                     element = super().find_element(by=by, value=value)
                     find_element = True
-                except:
-                    print("No such element found in the iframe")
+                except NoSuchElementException as e:
+                    print(f"No such element found in the iframe: {str(e)}")
+                except Exception as e:
+                    print(f"Exception: {str(e)}")
                 # 完成操作后切回主文档
                 # super().switch_to.default_content()
                 if find_element:
@@ -68,14 +70,14 @@ class MyChrome(webdriver.Chrome):
             # 获取所有的 iframe
             iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
             find_element = False
-            # 遍历所有的 iframe 并点击里面的元素
+            # 遍历所有的 iframe 并找到里面的元素
             for iframe in iframes:
                 # 切换到 iframe
                 try:
                     super().switch_to.default_content()
                     super().switch_to.frame(iframe)
                     self.iframe_env = True
-                    # 在 iframe 中查找并点击元素
+                    # 在 iframe 中查找元素
                     # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
                     elements = super().find_elements(by=by, value=value)
                     if len(elements) > 0:
@@ -84,8 +86,10 @@ class MyChrome(webdriver.Chrome):
                     # super().switch_to.default_content()
                     if find_element:
                         return elements
-                except:
-                    print("No such element found in the iframe")
+                except NoSuchElementException as e:
+                    print(f"No such element found in the iframe: {str(e)}")
+                except Exception as e:
+                    print(f"Exception: {str(e)}")
             if not find_element:
                 raise NoSuchElementException
         else:
@@ -117,19 +121,21 @@ if sys.platform != "darwin":
                 except Exception as e:
                     print(e)
                 find_element = False
-                # 遍历所有的 iframe 并点击里面的元素
+                # 遍历所有的 iframe 并找到里面的元素
                 for iframe in iframes:
                     # 切换到 iframe
                     super().switch_to.default_content()
                     super().switch_to.frame(iframe)
                     self.iframe_env = True
                     try:
-                        # 在 iframe 中查找并点击元素
+                        # 在 iframe 中查找元素
                         # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
                         element = super().find_element(by=by, value=value)
                         find_element = True
-                    except:
-                        print("No such element found in the iframe")
+                    except NoSuchElementException as e:
+                        print(f"No such element found in the iframe: {str(e)}")
+                    except Exception as e:
+                        print(f"Exception: {str(e)}")
                     # 完成操作后切回主文档
                     # super().switch_to.default_content()
                     if find_element:
@@ -148,14 +154,14 @@ if sys.platform != "darwin":
                 # 获取所有的 iframe
                 iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
                 find_element = False
-                # 遍历所有的 iframe 并点击里面的元素
+                # 遍历所有的 iframe 并查找里面的元素
                 for iframe in iframes:
                     # 切换到 iframe
                     try:
                         super().switch_to.default_content()
                         super().switch_to.frame(iframe)
                         self.iframe_env = True
-                        # 在 iframe 中查找并点击元素
+                        # 在 iframe 中查找元素
                         # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
                         elements = super().find_elements(by=by, value=value)
                         if len(elements) > 0:
@@ -164,8 +170,10 @@ if sys.platform != "darwin":
                         # super().switch_to.default_content()
                         if find_element:
                             return elements
-                    except:
-                        print("No such element found in the iframe")
+                    except NoSuchElementException as e:
+                        print(f"No such element found in the iframe: {str(e)}")
+                    except Exception as e:
+                        print(f"Exception: {str(e)}")
                 if not find_element:
                     raise NoSuchElementException
             else:

+ 25 - 15
.temp_to_pub/EasySpider_MacOS_all_arch/Code/utils.py

@@ -31,7 +31,7 @@ def lowercase_tags_in_xpath(xpath):
 def on_press_creator(press_time, event):
     def on_press(key):
         try:
-            if key.char == 'p':
+            if key.char == press_time["pause_key"]:
                 if press_time["is_pressed"] == False:  # 没按下p键时,记录按下p键的时间
                     press_time["duration"] = time.time()
                     press_time["is_pressed"] = True
@@ -39,14 +39,14 @@ def on_press_creator(press_time, event):
                     duration = time.time() - press_time["duration"]
                     if duration > 2:
                         if event._flag == False:
-                            print("任务执行中,长按p键暂停执行。")
-                            print("Task is running, long press 'p' to pause.")
+                            print("任务执行中,长按" + press_time["pause_key"] + "键暂停执行。")
+                            print("Task is running, long press '" + press_time["pause_key"] + "' to pause.")
                             # 设置Event的值为True,使得线程b可以继续执行
                             event.set()
                         else:
                             # 设置Event的值为False,使得线程b暂停执行
-                            print("任务已暂停,长按p键继续执行...")
-                            print("Task paused, long press 'p' to continue...")
+                            print("任务已暂停,长按" + press_time["pause_key"] + "键继续执行...")
+                            print("Task paused, long press '" + press_time["pause_key"] + "' to continue...")
                             event.clear()
                         press_time["duration"] = time.time()
                         press_time["is_pressed"] = False
@@ -176,26 +176,36 @@ def write_to_csv(file_name, data, record):
             f_csv.writerow(to_write)
         f.close()
 
-
-def eval_repl(matchobj):
-     print(matchobj.group(1))
-     return str(eval(matchobj.group(1), globals(), locals()))
-
-
-
 def replace_field_values(orginal_text, outputParameters, browser=None):
     pattern = r'Field\["([^"]+)"\]'
     try:
         replaced_text = re.sub(
             pattern, lambda match: outputParameters.get(match.group(1), ''), orginal_text)
-        if replaced_text.find("EVAL") != -1: # 如果返回值中包含EVAL
+        if re.search(r'eval\(', replaced_text, re.IGNORECASE): # 如果返回值中包含EVAL
             replaced_text = replaced_text.replace("self.", "browser.")
-            replaced_text = re.sub(r'EVAL\("(.*?)"\)', lambda match: str(eval(match.group(1))), replaced_text)
-    except:
+            pattern = re.compile(r'(?i)eval\("(.+?)"\)')
+            # 循环替换所有匹配到的eval语句
+            while True:
+                match = pattern.search(replaced_text)
+                if not match:
+                    break
+                # 执行eval并将其结果转换为字符串形式
+                eval_replaced_text = str(eval(match.group(1)))
+                # 替换eval语句
+                replaced_text = replaced_text.replace(match.group(0), eval_replaced_text)
+    except Exception as e:
+        print("eval替换失败,请检查eval语句是否正确。| Failed to replace eval, please check if the eval statement is correct.")
         replaced_text = orginal_text
     return replaced_text
 
 
+def readCode(code):
+    if code.startswith("outside:"):
+        file_name = os.path.join(os.path.abspath("./"), code[8:])
+        with open(file_name, 'r', encoding='utf-8-sig') as file_obj:
+            code = file_obj.read()
+    return code
+
 def write_to_json(file_name, data, types, record, keys):
     keys = list(keys)
     # Prepare empty list for data

+ 57 - 0
.temp_to_pub/EasySpider_MacOS_all_arch/myCode.py

@@ -0,0 +1,57 @@
+"""
+这是一个示例代码文件,可以直接在这里写Python代码,然后在程序中的exec操作中调用。如果此文件名称为myCode.py,请将此文件放置在EasySpider程序目录下(和Data/文件夹同级),那么在程序中的exec操作中可以直接写outside:myCode.py来调用此文件中的代码,示例:
+
+1. 用self.browser表示当前操作的浏览器,可直接用selenium的API进行操作,如self.browser.find_element(By.CSS_SELECTOR, "body").send_keys(Keys.END)即可滚动到页面最下方。
+2. 自定义一个全局变量:self.myVar = 1
+3. 操纵上面定义的全局变量:self.myVar = self.myVar + 1
+4. 打印上面定义的全局变量:print(self.myVar)
+5. 将自定义变量的值赋值为某个字段提取的值:self.myVar = self.outputParameters["字段名"]
+6. 修改某个字段提取的值:self.outputParameters["字段名"] = "新值"
+
+对于更加复杂的操作,请直接下载源代码并编译执行。
+
+This is a sample code snippet file. You can directly write Python code here, and then call it in the program using an `exec` operation. If this file is named myCode.py, please place this file under the EasySpider program directory (at the same level as the Data/ folder). Then, in the program's `exec` operation, you can directly write outside:myCode.py to invoke the code from this file. Examples:
+
+1. Use self.browser to refer to the current browser being operated on. You can directly utilize the selenium API to perform actions. For instance, self.browser.find_element(By.CSS_SELECTOR, "body").send_keys(Keys.END) will scroll to the bottom of the page.
+2. Define a global variable: self.myVar = 1
+3. Manipulate the above-defined global variable: self.myVar = self.myVar + 1
+4. Print the above-defined global variable: print(self.myVar)
+5. Assign a value to the custom variable from a value extracted for some field: self.myVar = self.outputParameters["field name"]
+6. Modify the value extracted for some field: self.outputParameters["field name"] = "new value"
+
+For more complex operations, please download the source code and compile it for execution.
+"""
+
+# 请在下面编写你的代码,不要有代码缩进!!! | Please write your code below, do not indent the code!!!
+
+# 导包 | Import packages
+from selenium.common.exceptions import ElementClickInterceptedException
+
+# 定义一个函数 | Define a function
+def test(n = 0):
+    for i in range(0, n):
+        if i % 2 == 0:
+            print(i)
+    return "test"
+
+# 异常捕获 | Exception capture
+try:
+    # 使用XPath定位元素并点击浏览器中元素 | Use XPath to locate the element and click the element in the browser
+    element = self.browser.find_element(By.XPATH, "//*[contains(@class, 'LeftSide_menu_list__qXCeM')]/div[1]/a[1]") # 这里请忽略IDE的报错,因为代码是嵌入到程序中的,IDE无法识别self变量和By变量是正常的 | Please ignore the error reported by the IDE, because the code is embedded in the program, and the IDE cannot recognize that the self variable and By variable are normal
+    element.click()
+    print("点击成功|Click success")
+except ElementClickInterceptedException:
+    # 如果元素被遮挡,点击失败
+    print("元素被遮挡,无法点击|The element is blocked and cannot be clicked")
+except Exception as e:
+    # 打印其他异常
+    print("发生了一个异常|An exception occurred", e)
+finally:
+    # 测试函数 | Test function
+    self.a = 1
+    print("a = ", self.a)
+    self.a = self.a + 1
+    print("a = ", self.a)
+    print("All parameters:", self.outputParameters)
+    print(test(3))
+    print("执行完毕|Execution completed")

BIN
ElectronJS/EasySpider_en.crx


BIN
ElectronJS/EasySpider_zh.crx


+ 1 - 1
ElectronJS/config.json

@@ -1 +1 @@
-{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":1,"sys_version":"x64","mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"}
+{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":1,"sys_version":"x64","mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"/Users/naibo/Documents/EasySpider/ElectronJS/user_data"}

+ 4 - 6
ElectronJS/package-lock.json

@@ -15,6 +15,7 @@
                 "formidable": "^3.5.0",
                 "http": "^0.0.1-security",
                 "multer": "^1.4.5-lts.1",
+                "node-abi": "^3.52.0",
                 "node-window-manager": "^2.2.4",
                 "selenium-webdriver": "^4.16.0",
                 "ws": "^8.12.0",
@@ -3914,9 +3915,9 @@
             "license": "MIT"
         },
         "node_modules/node-abi": {
-            "version": "3.45.0",
-            "dev": true,
-            "license": "MIT",
+            "version": "3.52.0",
+            "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.52.0.tgz",
+            "integrity": "sha512-JJ98b02z16ILv7859irtXn4oUaFWADtvkzy2c0IAatNVX2Mc9Yoh8z6hZInn3QwvMEYhHuQloYi+TTQy67SIdQ==",
             "dependencies": {
                 "semver": "^7.3.5"
             },
@@ -4814,7 +4815,6 @@
         },
         "node_modules/semver": {
             "version": "7.5.3",
-            "dev": true,
             "license": "ISC",
             "dependencies": {
                 "lru-cache": "^6.0.0"
@@ -4834,7 +4834,6 @@
         },
         "node_modules/semver/node_modules/lru-cache": {
             "version": "6.0.0",
-            "dev": true,
             "license": "ISC",
             "dependencies": {
                 "yallist": "^4.0.0"
@@ -5665,7 +5664,6 @@
         },
         "node_modules/yallist": {
             "version": "4.0.0",
-            "dev": true,
             "license": "ISC"
         },
         "node_modules/yargs": {

+ 2 - 1
ElectronJS/package.json

@@ -37,6 +37,7 @@
         "formidable": "^3.5.0",
         "http": "^0.0.1-security",
         "multer": "^1.4.5-lts.1",
+        "node-abi": "^3.52.0",
         "node-window-manager": "^2.2.4",
         "selenium-webdriver": "^4.16.0",
         "ws": "^8.12.0",
@@ -79,4 +80,4 @@
             "publishers": []
         }
     }
-}
+}

File diff suppressed because it is too large
+ 0 - 0
ElectronJS/tasks/229.json


+ 1 - 1
ElectronJS/update_chrome.py

@@ -48,7 +48,7 @@ def copy_folder(source_folder, destination_folder):
 
 
 def get_chrome_version():
-    version = "115"
+    version = "120"
     if sys.platform == "win32":
         version_re = re.compile(r"^[1-9]\d*\.\d*.\d*")
         try:

+ 2 - 1
ExecuteStage/.vscode/launch.json

@@ -12,7 +12,8 @@
             "justMyCode": false,
             //  "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
             // "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
-            "args": ["--ids", "[52]", "--headless", "0", "--user_data", "1", "--keyboard", "1"]
+            // "args": ["--ids", "[1]", "--headless", "0", "--user_data", "1", "--keyboard", "1"]
+            "args": "--ids '[3]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
         }
     ]
 }

Some files were not shown because too many files changed in this diff