naibo 1 anno fa
parent
commit
5d53da96f1
100 ha cambiato i file con 648 aggiunte e 230 eliminazioni
  1. 0 0
      .temp_to_pub/EasySpider_Linux_x64/tasks/252.json
  2. 0 0
      .temp_to_pub/EasySpider_MacOS_all_arch/Sample Tasks/252.json
  3. 0 0
      .temp_to_pub/EasySpider_windows_x32/tasks/14.json
  4. 0 0
      .temp_to_pub/EasySpider_windows_x32/tasks/18.json
  5. 0 0
      .temp_to_pub/EasySpider_windows_x32/tasks/21.json
  6. 0 0
      .temp_to_pub/EasySpider_windows_x32/tasks/22.json
  7. 0 0
      .temp_to_pub/EasySpider_windows_x32/tasks/24.json
  8. 0 0
      .temp_to_pub/EasySpider_windows_x32/tasks/252.json
  9. 0 0
      .temp_to_pub/EasySpider_windows_x32/tasks/26.json
  10. 363 128
      .temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py
  11. 150 65
      .temp_to_pub/EasySpider_windows_x64/Code/myChrome.py
  12. 135 37
      .temp_to_pub/EasySpider_windows_x64/Code/utils.py
  13. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/0.json
  14. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/1.json
  15. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/10.json
  16. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/100.json
  17. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/101.json
  18. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/102.json
  19. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/103.json
  20. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/104.json
  21. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/105.json
  22. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/106.json
  23. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/107.json
  24. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/108.json
  25. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/109.json
  26. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/11.json
  27. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/110.json
  28. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/111.json
  29. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/112.json
  30. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/113.json
  31. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/114.json
  32. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/115.json
  33. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/116.json
  34. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/117.json
  35. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/118.json
  36. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/119.json
  37. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/12.json
  38. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/120.json
  39. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/121.json
  40. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/122.json
  41. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/123.json
  42. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/124.json
  43. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/125.json
  44. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/126.json
  45. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/127.json
  46. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/128.json
  47. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/129.json
  48. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/13.json
  49. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/130.json
  50. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/131.json
  51. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/132.json
  52. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/133.json
  53. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/134.json
  54. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/135.json
  55. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/136.json
  56. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/137.json
  57. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/138.json
  58. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/139.json
  59. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/14.json
  60. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/140.json
  61. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/141.json
  62. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/142.json
  63. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/143.json
  64. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/144.json
  65. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/145.json
  66. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/146.json
  67. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/147.json
  68. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/148.json
  69. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/149.json
  70. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/15.json
  71. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/150.json
  72. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/16.json
  73. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/17.json
  74. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/18.json
  75. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/19.json
  76. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/2.json
  77. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/20.json
  78. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/21.json
  79. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/22.json
  80. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/23.json
  81. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/24.json
  82. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/25.json
  83. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/26.json
  84. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/27.json
  85. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/28.json
  86. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/29.json
  87. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/3.json
  88. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/30.json
  89. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/31.json
  90. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/32.json
  91. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/33.json
  92. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/34.json
  93. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/35.json
  94. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/36.json
  95. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/37.json
  96. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/38.json
  97. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/39.json
  98. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/4.json
  99. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/40.json
  100. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/41.json

File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_Linux_x64/tasks/252.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_MacOS_all_arch/Sample Tasks/252.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x32/tasks/14.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x32/tasks/18.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x32/tasks/21.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x32/tasks/22.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x32/tasks/24.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x32/tasks/252.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x32/tasks/26.json


File diff suppressed because it is too large
+ 363 - 128
.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py


+ 150 - 65
.temp_to_pub/EasySpider_windows_x64/Code/myChrome.py

@@ -25,71 +25,152 @@ class MyChrome(webdriver.Chrome):
         self.iframe_env = False  # 现在的环境是root还是iframe
         super().__init__(*args, **kwargs)  # 调用父类的 __init__
 
-    def find_element(self, by=By.ID, value=None, iframe=False):
-        # 在这里改变查找元素的行为
-        if self.iframe_env:
-            super().switch_to.default_content()
-            self.iframe_env = False
-        if iframe:
-            # 获取所有的 iframe
+    # def find_element(self, by=By.ID, value=None, iframe=False):
+    #     # 在这里改变查找元素的行为
+    #     if self.iframe_env:
+    #         super().switch_to.default_content()
+    #         self.iframe_env = False
+    #     if iframe:
+    #         # 获取所有的 iframe
+    #         try:
+    #             iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
+    #         except Exception as e:
+    #             print(e)
+    #         find_element = False
+    #         # 遍历所有的 iframe 并查找里面的元素
+    #         for iframe in iframes:
+    #             # 切换到 iframe
+    #             super().switch_to.default_content()
+    #             super().switch_to.frame(iframe)
+    #             self.iframe_env = True
+    #             try:
+    #                 # 在 iframe 中查找元素
+    #                 # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
+    #                 element = super().find_element(by=by, value=value)
+    #                 find_element = True
+    #             except NoSuchElementException as e:
+    #                 print(f"No such element found in the iframe: {str(e)}")
+    #             except Exception as e:
+    #                 print(f"Exception: {str(e)}")
+    #             # 完成操作后切回主文档
+    #             # super().switch_to.default_content()
+    #             if find_element:
+    #                 return element
+    #         if not find_element:
+    #             raise NoSuchElementException
+    #     else:
+    #         return super().find_element(by=by, value=value)
+
+    def find_element_recursive(self, by, value, frames):
+        for frame in frames:
             try:
-                iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
-            except Exception as e:
-                print(e)
-            find_element = False
-            # 遍历所有的 iframe 并点击里面的元素
-            for iframe in iframes:
-                # 切换到 iframe
-                super().switch_to.default_content()
-                super().switch_to.frame(iframe)
-                self.iframe_env = True
                 try:
-                    # 在 iframe 中查找并点击元素
-                    # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
-                    element = super().find_element(by=by, value=value)
-                    find_element = True
-                except:
-                    print("No such element found in the iframe")
-                # 完成操作后切回主文档
-                # super().switch_to.default_content()
-                if find_element:
+                    self.switch_to.frame(frame)
+                except StaleElementReferenceException:
+                    # If the frame has been refreshed, we need to switch to the parent frame first,
+                    self.switch_to.parent_frame()
+                    self.switch_to.frame(frame)
+                try:
+                    # !!! Attempt to find the element in the current frame, not the context (iframe environment will not change to default), therefore we use super().find_element instead of self.find_element
+                    element = super(MyChrome, self).find_element(by=by, value=value)
                     return element
-            if not find_element:
-                raise NoSuchElementException
-        else:
-            return super().find_element(by=by, value=value)
+                except NoSuchElementException:
+                    # Recurse into nested iframes
+                    nested_frames = super(MyChrome, self).find_elements(By.CSS_SELECTOR, "iframe")
+                    if nested_frames:
+                        element = self.find_element_recursive(by, value, nested_frames)
+                        if element:
+                            return element
+            except Exception as e:
+                print(f"Exception while processing frame: {e}")
 
-    def find_elements(self, by=By.ID, value=None, iframe=False):
-        # 在这里改变查找元素的行为
-        if self.iframe_env:
-            super().switch_to.default_content()
-            self.iframe_env = False
+        raise NoSuchElementException(f"Element {value} not found in any frame or iframe")
+
+    def find_element(self, by=By.ID, value=None, iframe=False):
+        self.switch_to.default_content()  # Switch back to the main document
+        self.iframe_env = False
         if iframe:
-            # 获取所有的 iframe
-            iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
-            find_element = False
-            # 遍历所有的 iframe 并点击里面的元素
-            for iframe in iframes:
-                # 切换到 iframe
+            frames = self.find_elements(By.CSS_SELECTOR, "iframe")
+            if not frames:
+                raise NoSuchElementException(f"No iframes found in the current page while searching for {value}")
+            self.iframe_env = True
+            element = self.find_element_recursive(by, value, frames)
+        else:
+            # Find element in the main document as normal
+            element = super(MyChrome, self).find_element(by=by, value=value)
+        return element
+
+    # def find_elements(self, by=By.ID, value=None, iframe=False):
+    #     # 在这里改变查找元素的行为
+    #     if self.iframe_env:
+    #         super().switch_to.default_content()
+    #         self.iframe_env = False
+    #     if iframe:
+    #         # 获取所有的 iframe
+    #         iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
+    #         find_element = False
+    #         # 遍历所有的 iframe 并找到里面的元素
+    #         for iframe in iframes:
+    #             # 切换到 iframe
+    #             try:
+    #                 super().switch_to.default_content()
+    #                 super().switch_to.frame(iframe)
+    #                 self.iframe_env = True
+    #                 # 在 iframe 中查找元素
+    #                 # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
+    #                 elements = super().find_elements(by=by, value=value)
+    #                 if len(elements) > 0:
+    #                     find_element = True
+    #                 # 完成操作后切回主文档
+    #                 # super().switch_to.default_content()
+    #                 if find_element:
+    #                     return elements
+    #             except NoSuchElementException as e:
+    #                 print(f"No such element found in the iframe: {str(e)}")
+    #             except Exception as e:
+    #                 print(f"Exception: {str(e)}")
+    #         if not find_element:
+    #             raise NoSuchElementException
+    #     else:
+    #         return super().find_elements(by=by, value=value)
+
+    def find_elements_recursive(self, by, value, frames):
+        for frame in frames:
+            try:
                 try:
-                    super().switch_to.default_content()
-                    super().switch_to.frame(iframe)
-                    self.iframe_env = True
-                    # 在 iframe 中查找并点击元素
-                    # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
-                    elements = super().find_elements(by=by, value=value)
-                    if len(elements) > 0:
-                        find_element = True
-                    # 完成操作后切回主文档
-                    # super().switch_to.default_content()
-                    if find_element:
+                    self.switch_to.frame(frame)
+                except StaleElementReferenceException:
+                    # If the frame has been refreshed, we need to switch to the parent frame first,
+                    self.switch_to.parent_frame()
+                    self.switch_to.frame(frame)
+                # Directly find elements in the current frame
+                elements = super(MyChrome, self).find_elements(by=by, value=value)
+                if elements:
+                    return elements
+                # Recursively search for elements in nested iframes
+                nested_frames = super(MyChrome, self).find_elements(By.CSS_SELECTOR, "iframe")
+                if nested_frames:
+                    elements = self.find_elements_recursive(by, value, nested_frames)
+                    if elements:
                         return elements
-                except:
-                    print("No such element found in the iframe")
-            if not find_element:
-                raise NoSuchElementException
+            except Exception as e:
+                print(f"Exception while processing frame: {e}")
+
+        raise NoSuchElementException(f"Elements with {value} not found in any frame or iframe")
+
+    def find_elements(self, by=By.ID, value=None, iframe=False):
+        self.switch_to.default_content()  # Switch back to the main document
+        self.iframe_env = False
+        if iframe:
+            frames = self.find_elements(By.CSS_SELECTOR, "iframe")
+            if not frames:
+                return []  # Return an empty list if no iframes are found
+            self.iframe_env = True
+            elements = self.find_elements_recursive(by, value, frames)
         else:
-            return super().find_elements(by=by, value=value)
+            # Find elements in the main document as normal
+            elements =  super(MyChrome, self).find_elements(by=by, value=value)
+        return elements
 
 # MacOS不支持直接打包带Cloudflare的功能,如果要自己编译运行,可以把这个if去掉,然后配置好浏览器和driver路径
 if sys.platform != "darwin": 
@@ -117,19 +198,21 @@ if sys.platform != "darwin":
                 except Exception as e:
                     print(e)
                 find_element = False
-                # 遍历所有的 iframe 并点击里面的元素
+                # 遍历所有的 iframe 并找到里面的元素
                 for iframe in iframes:
                     # 切换到 iframe
                     super().switch_to.default_content()
                     super().switch_to.frame(iframe)
                     self.iframe_env = True
                     try:
-                        # 在 iframe 中查找并点击元素
+                        # 在 iframe 中查找元素
                         # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
                         element = super().find_element(by=by, value=value)
                         find_element = True
-                    except:
-                        print("No such element found in the iframe")
+                    except NoSuchElementException as e:
+                        print(f"No such element found in the iframe: {str(e)}")
+                    except Exception as e:
+                        print(f"Exception: {str(e)}")
                     # 完成操作后切回主文档
                     # super().switch_to.default_content()
                     if find_element:
@@ -148,14 +231,14 @@ if sys.platform != "darwin":
                 # 获取所有的 iframe
                 iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
                 find_element = False
-                # 遍历所有的 iframe 并点击里面的元素
+                # 遍历所有的 iframe 并查找里面的元素
                 for iframe in iframes:
                     # 切换到 iframe
                     try:
                         super().switch_to.default_content()
                         super().switch_to.frame(iframe)
                         self.iframe_env = True
-                        # 在 iframe 中查找并点击元素
+                        # 在 iframe 中查找元素
                         # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
                         elements = super().find_elements(by=by, value=value)
                         if len(elements) > 0:
@@ -164,8 +247,10 @@ if sys.platform != "darwin":
                         # super().switch_to.default_content()
                         if find_element:
                             return elements
-                    except:
-                        print("No such element found in the iframe")
+                    except NoSuchElementException as e:
+                        print(f"No such element found in the iframe: {str(e)}")
+                    except Exception as e:
+                        print(f"Exception: {str(e)}")
                 if not find_element:
                     raise NoSuchElementException
             else:

+ 135 - 37
.temp_to_pub/EasySpider_windows_x64/Code/utils.py

@@ -1,5 +1,4 @@
-# 控制流程的暂停和继续
-
+# 工具库
 import csv
 import datetime
 import json
@@ -14,6 +13,47 @@ import requests
 from urllib.parse import urlparse
 import pymysql
 from lxml import etree
+import smtplib
+from email.mime.text import MIMEText
+from email.header import Header
+
+def send_email(config):
+    """
+    发送邮件的函数。
+
+    :param config: 包含邮件配置信息的字典。
+    """
+    # 校验配置信息是否完整
+    # required_keys = ["host", "port", "username", "password", "from", "to", "subject", "content"]
+    # missing_keys = [key for key in required_keys if key not in config]
+    # if missing_keys:
+    #     raise ValueError(f"邮件配置缺少必要的键: {', '.join(missing_keys)}")
+    try:
+        print("正在发送邮件到:" + config['to'])
+        message = MIMEText(config['content'], 'plain', 'utf-8')
+        message['From'] = Header(f"{config['username'].split('@')[0]} <{config['username']}>")
+        to_name_list = []
+        for address in config['to'].split(','):
+            address = address.strip()
+            name = address.split('@')[0]
+            to_name_list.append(f"{name} <{address}>")
+        to_name_list = ', '.join(to_name_list)
+        message['To'] = Header(to_name_list)
+        message['Subject'] = Header(config['subject'], 'utf-8')
+        # 使用SSL加密方式连接邮件服务器
+        smtp_server = smtplib.SMTP_SSL(config['host'], config['port'])
+        smtp_server.login(config['username'], config['password'])
+        to_address_list = config['to'].split(',')
+        smtp_server.sendmail(config['username'], to_address_list, message.as_string())
+        print("邮件发送成功|Email sent successfully")
+    except Exception as e:
+        print(f"无法发送邮件,发生错误:{e}")
+        print(f"Failed to send email, error: {e}")
+    finally:
+        try:
+            smtp_server.quit()
+        except:
+            pass
 
 
 def is_valid_url(url):
@@ -31,7 +71,7 @@ def lowercase_tags_in_xpath(xpath):
 def on_press_creator(press_time, event):
     def on_press(key):
         try:
-            if key.char == 'p':
+            if key.char == press_time["pause_key"]:
                 if press_time["is_pressed"] == False:  # 没按下p键时,记录按下p键的时间
                     press_time["duration"] = time.time()
                     press_time["is_pressed"] = True
@@ -39,14 +79,14 @@ def on_press_creator(press_time, event):
                     duration = time.time() - press_time["duration"]
                     if duration > 2:
                         if event._flag == False:
-                            print("任务执行中,长按p键暂停执行。")
-                            print("Task is running, long press 'p' to pause.")
+                            print("任务执行中,长按" + press_time["pause_key"] + "键暂停执行。")
+                            print("Task is running, long press '" + press_time["pause_key"] + "' to pause.")
                             # 设置Event的值为True,使得线程b可以继续执行
                             event.set()
                         else:
                             # 设置Event的值为False,使得线程b暂停执行
-                            print("任务已暂停,长按p键继续执行...")
-                            print("Task paused, long press 'p' to continue...")
+                            print("任务已暂停,长按" + press_time["pause_key"] + "键继续执行...")
+                            print("Task paused, long press '" + press_time["pause_key"] + "' to continue...")
                             event.clear()
                         press_time["duration"] = time.time()
                         press_time["is_pressed"] = False
@@ -94,6 +134,22 @@ def on_release_creator(event, press_time):
 #                 event.clear()
 #         time.sleep(1)  # 每秒检查一次
 
+def detect_optimizable(para, ignoreWaitElement=True, waitElement=""):
+    if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1:
+        if para["nodeType"] <= 2:
+            if ignoreWaitElement or waitElement == "":
+                return True
+            else:
+                return False
+        elif para["nodeType"] == 4: # 如果是图片
+            if para["downloadPic"]:
+                return False
+            else:
+                return True
+    else:
+        return False
+
+
 
 def download_image(browser, url, save_directory):
     # 定义浏览器头信息
@@ -176,17 +232,37 @@ def write_to_csv(file_name, data, record):
             f_csv.writerow(to_write)
         f.close()
 
-
-def replace_field_values(orginal_text, outputParameters):
+def replace_field_values(orginal_text, outputParameters, browser=None):
     pattern = r'Field\["([^"]+)"\]'
     try:
         replaced_text = re.sub(
             pattern, lambda match: outputParameters.get(match.group(1), ''), orginal_text)
-    except:
+        if re.search(r'eval\(', replaced_text, re.IGNORECASE): # 如果返回值中包含EVAL
+            replaced_text = replaced_text.replace("self.", "browser.")
+            pattern = re.compile(r'(?i)eval\("(.+?)"\)')
+            # 循环替换所有匹配到的eval语句
+            while True:
+                match = pattern.search(replaced_text)
+                if not match:
+                    break
+                # 执行eval并将其结果转换为字符串形式
+                eval_replaced_text = str(eval(match.group(1)))
+                # 替换eval语句
+                replaced_text = replaced_text.replace(match.group(0), eval_replaced_text)
+    except Exception as e:
+        print("eval替换失败,请检查eval语句是否正确。| Failed to replace eval, please check if the eval statement is correct.")
+        print(e)
         replaced_text = orginal_text
     return replaced_text
 
 
+def readCode(code):
+    if code.startswith("outside:"):
+        file_name = os.path.join(os.path.abspath("./"), code[8:])
+        with open(file_name, 'r', encoding='utf-8-sig') as file_obj:
+            code = file_obj.read()
+    return code
+
 def write_to_json(file_name, data, types, record, keys):
     keys = list(keys)
     # Prepare empty list for data
@@ -281,33 +357,37 @@ class myMySQL:
             print("MySQL config file path: ", config_file)
             with open(config_file, 'r') as f:
                 config = json.load(f)
-                host = config["host"]
-                port = config["port"]
-                user = config["username"]
-                passwd = config["password"]
-                db = config["database"]
+                self.host = config["host"]
+                self.port = config["port"]
+                self.username = config["username"]
+                self.password = config["password"]
+                self.db = config["database"]
         except Exception as e:
             print("读取配置文件失败,请检查配置文件:"+config_file+"是否存在,或配置信息是否有误。")
             print("Failed to read configuration file, please check if the configuration file: " +
                   config_file+" exists, or if the configuration information is incorrect.")
             print(e)
+        self.connect()
+        
+    def connect(self):
         try:
             self.conn = pymysql.connect(
-                host=host, port=port, user=user, passwd=passwd, db=db)
+                host=self.host, port=self.port, user=self.username, passwd=self.password, db=self.db)
             print("成功连接到数据库。")
             print("Successfully connected to the database.")
         except:
             print("连接数据库失败,请检查配置文件是否正确。")
             print(
                 "Failed to connect to the database, please check if the configuration file is correct.")
+            sys.exit()
 
     def create_table(self, table_name, parameters):
         self.table_name = table_name
         self.field_sql = "("
-        cursor = self.conn.cursor()
+        self.cursor = self.conn.cursor()
         # 检查表是否存在
-        cursor.execute("SHOW TABLES LIKE '%s'" % table_name)
-        result = cursor.fetchone()
+        self.cursor.execute(f"SHOW TABLES LIKE '{table_name}'")
+        result = self.cursor.fetchone()
 
         sql = "CREATE TABLE " + table_name + \
             " (_id INT AUTO_INCREMENT PRIMARY KEY, "
@@ -342,47 +422,52 @@ class myMySQL:
         # 如果表不存在,创建它
         if not result:
             # 执行SQL命令
-            cursor.execute(sql)
+            self.cursor.execute(sql)
         else:
-            print("数据表" + table_name + "已存在。")
-            print("The data table " + table_name + " already exists.")
-        cursor.close()
+            print(f'数据表 {table_name} 已存在')
+            print(f'The data table {table_name} already exists.')
+        self.cursor.close()
 
     def write_to_mysql(self, OUTPUT, record, types):
         # 创建一个游标对象
-        cursor = self.conn.cursor()
+        self.cursor = self.conn.cursor()
 
         for line in OUTPUT:
             for i in range(len(line)):
                 if types[i] == "int" or types[i] == "bigInt":
                     try:
                         line[i] = int(line[i])
-                    except:
+                    except Exception as e:
+                        print(e)
                         line[i] = 0
                 elif types[i] == "double":
                     try:
                         line[i] = float(line[i])
-                    except:
+                    except Exception as e:
+                        print(e)
                         line[i] = 0.0
                 elif types[i] == "datetime":
                     try:
                         line[i] = datetime.datetime.strptime(
                             line[i], '%Y-%m-%d %H:%M:%S')
-                    except:
+                    except Exception as e:
+                        print(e)
                         line[i] = datetime.datetime.strptime(
                             "1970-01-01 00:00:00", '%Y-%m-%d %H:%M:%S')
                 elif types[i] == "date":
                     try:
                         line[i] = datetime.datetime.strptime(
                             line[i], '%Y-%m-%d')
-                    except:
+                    except Exception as e:
+                        print(e)
                         line[i] = datetime.datetime.strptime(
                             "1970-01-01", '%Y-%m-%d')
                 elif types[i] == "time":
                     try:
                         line[i] = datetime.datetime.strptime(
                             line[i], '%H:%M:%S')
-                    except:
+                    except Exception as e:
+                        print(e)
                         line[i] = datetime.datetime.strptime(
                             "00:00:00", '%H:%M:%S')
             to_write = []
@@ -390,15 +475,21 @@ class myMySQL:
                 if record[i]:
                     to_write.append(line[i])
             # 构造插入数据的 SQL 语句
-            sql = f"INSERT INTO " + self.table_name + \
-                " "+self.field_sql+" VALUES ("
-            for item in to_write:
+            sql = f'INSERT INTO {self.table_name} {self.field_sql} VALUES ('
+            for _ in to_write:
                 sql += "%s, "
             # 移除最后的逗号并添加闭合的括号
             sql = sql.rstrip(', ') + ")"
             # 执行 SQL 语句
             try:
-                cursor.execute(sql, to_write)
+                self.cursor.execute(sql, to_write)
+            except pymysql.OperationalError as e:
+                print("Error:", e)
+                print("Try to reconnect to the database...")
+                self.connect()
+                self.cursor = self.conn.cursor() # 重新创建游标对象
+                self.cursor.execute(sql, to_write) # 重新执行SQL语句
+                # self.write_to_mysql(OUTPUT, record, types)
             except Exception as e:
                 print("Error:", e)
                 print("Error SQL:", sql, to_write)
@@ -412,9 +503,16 @@ class myMySQL:
         self.conn.commit()
 
         # 关闭游标和连接
-        cursor.close()
+        self.cursor.close()
 
     def close(self):
-        self.conn.close()
-        print("成功关闭数据库。")
-        print("Successfully closed the database.")
+        try:
+            self.conn.close()
+            print("成功关闭数据库。")
+            print("Successfully closed the database.")
+        except:
+            print("关闭数据库失败。")
+            print("Failed to close the database.")
+    
+    def __del__(self):
+        self.close()

File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/0.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/1.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/10.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/100.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/101.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/102.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/103.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/104.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/105.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/106.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/107.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/108.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/109.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/11.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/110.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/111.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/112.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/113.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/114.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/115.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/116.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/117.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/118.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/119.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/12.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/120.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/121.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/122.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/123.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/124.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/125.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/126.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/127.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/128.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/129.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/13.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/130.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/131.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/132.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/133.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/134.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/135.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/136.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/137.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/138.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/139.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/14.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/140.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/141.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/142.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/143.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/144.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/145.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/146.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/147.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/148.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/149.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/15.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/150.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/16.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/17.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/18.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/19.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/2.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/20.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/21.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/22.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/23.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/24.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/25.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/26.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/27.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/28.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/29.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/3.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/30.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/31.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/32.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/33.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/34.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/35.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/36.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/37.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/38.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/39.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/4.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/40.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/41.json


Some files were not shown because too many files changed in this diff