Browse Source

Compatible for Windows

naibo 2 years ago
parent
commit
c31bc94dd0

+ 43 - 14
.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py

@@ -15,7 +15,7 @@ import time
 import requests
 import requests
 from urllib.parse import urljoin
 from urllib.parse import urljoin
 from lxml import etree
 from lxml import etree
-import undetected_chromedriver as uc
+# import undetected_chromedriver as uc
 from pynput.keyboard import Key, Listener
 from pynput.keyboard import Key, Listener
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.common.keys import Keys
@@ -42,8 +42,10 @@ import pytesseract
 from PIL import Image
 from PIL import Image
 # import uuid
 # import uuid
 from threading import Thread, Event
 from threading import Thread, Event
-from myChrome import MyChrome, MyUCChrome
-from utils import check_pause, download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel
+from myChrome import MyChrome
+if sys.platform != "darwin":
+    from myChrome import MyUCChrome
+from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel
 desired_capabilities = DesiredCapabilities.CHROME
 desired_capabilities = DesiredCapabilities.CHROME
 desired_capabilities["pageLoadStrategy"] = "none"
 desired_capabilities["pageLoadStrategy"] = "none"
 
 
@@ -279,7 +281,10 @@ class BrowserThread(Thread):
         except:
         except:
             self.Log('Time out after set seconds when scrolling. ')
             self.Log('Time out after set seconds when scrolling. ')
             self.recordLog('Time out after set seconds when scrolling')
             self.recordLog('Time out after set seconds when scrolling')
-            self.browser.execute_script('window.stop()')
+            try:
+                self.browser.execute_script('window.stop()')
+            except:
+                pass
             if scrollType != 0 and para["scrollCount"] > 0:  # 控制屏幕向下滚动
             if scrollType != 0 and para["scrollCount"] > 0:  # 控制屏幕向下滚动
                 for i in range(para["scrollCount"]):
                 for i in range(para["scrollCount"]):
                     self.Log("Wait for set second after screen scrolling")
                     self.Log("Wait for set second after screen scrolling")
@@ -677,7 +682,10 @@ class BrowserThread(Thread):
                         # 切换历史记录等待:
                         # 切换历史记录等待:
                         self.Log("Change history back time or:",
                         self.Log("Change history back time or:",
                                  node["parameters"]["historyWait"])
                                  node["parameters"]["historyWait"])
-                        self.browser.execute_script('window.stop()')
+                        try:
+                            self.browser.execute_script('window.stop()')
+                        except:
+                            pass
                     if int(node["parameters"]["breakMode"]) > 0:  # 如果设置了退出循环的脚本条件
                     if int(node["parameters"]["breakMode"]) > 0:  # 如果设置了退出循环的脚本条件
                         output = self.execute_code(int(
                         output = self.execute_code(int(
                             node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"])
                             node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"])
@@ -722,7 +730,10 @@ class BrowserThread(Thread):
                         # time.sleep(2)
                         # time.sleep(2)
                         self.Log("Change history back time or:",
                         self.Log("Change history back time or:",
                                  node["parameters"]["historyWait"])
                                  node["parameters"]["historyWait"])
-                        self.browser.execute_script('window.stop()')
+                        try:
+                            self.browser.execute_script('window.stop()')
+                        except:
+                            pass
                 except NoSuchElementException:
                 except NoSuchElementException:
                     print("Loop element not found: ", path)
                     print("Loop element not found: ", path)
                     print("找不到循环元素: ", path)
                     print("找不到循环元素: ", path)
@@ -995,7 +1006,10 @@ class BrowserThread(Thread):
                 self.history["index"] = self.browser.execute_script(
                 self.history["index"] = self.browser.execute_script(
                     "return history.length")
                     "return history.length")
             except TimeoutException:
             except TimeoutException:
-                self.browser.execute_script('window.stop()')
+                try:
+                    self.browser.execute_script('window.stop()')
+                except:
+                    pass
                 self.history["index"] = self.browser.execute_script(
                 self.history["index"] = self.browser.execute_script(
                     "return history.length")
                     "return history.length")
         else:
         else:
@@ -1003,7 +1017,10 @@ class BrowserThread(Thread):
                 self.history["index"] = self.browser.execute_script(
                 self.history["index"] = self.browser.execute_script(
                     "return history.length")
                     "return history.length")
             except TimeoutException:
             except TimeoutException:
-                self.browser.execute_script('window.stop()')
+                try:
+                    self.browser.execute_script('window.stop()')
+                except:
+                    pass
                 self.history["index"] = self.browser.execute_script(
                 self.history["index"] = self.browser.execute_script(
                     "return history.length")
                     "return history.length")
                 # 如果打开了新窗口,切换到新窗口
                 # 如果打开了新窗口,切换到新窗口
@@ -1275,7 +1292,10 @@ class BrowserThread(Thread):
                         self.Log('Time out after set seconds when getting data')
                         self.Log('Time out after set seconds when getting data')
                         self.recordLog(
                         self.recordLog(
                             'Time out after set seconds when getting data')
                             'Time out after set seconds when getting data')
-                        self.browser.execute_script('window.stop()')
+                        try:
+                            self.browser.execute_script('window.stop()')
+                        except:
+                            pass
                         if p["relative"]:  # 是否相对xpath
                         if p["relative"]:  # 是否相对xpath
                             if p["relativeXPath"] == "":  # 相对xpath有时候就是元素本身,不需要二次查找
                             if p["relativeXPath"] == "":  # 相对xpath有时候就是元素本身,不需要二次查找
                                 element = loopElement
                                 element = loopElement
@@ -1327,8 +1347,8 @@ class BrowserThread(Thread):
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    from multiprocessing import freeze_support
-    freeze_support() # 防止无限死循环多开
+    # from multiprocessing import freeze_support
+    # freeze_support() # 防止无限死循环多开
     config = {
     config = {
         "id": [0],
         "id": [0],
         "saved_file_name": "",
         "saved_file_name": "",
@@ -1361,6 +1381,9 @@ if __name__ == '__main__':
         # option.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome"
         # option.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome"
         # driver_path = os.getcwd()+ "/chromedriver_mac64"
         # driver_path = os.getcwd()+ "/chromedriver_mac64"
         print(driver_path)
         print(driver_path)
+        if c.config_folder == "":
+            c.config_folder = os.path.expanduser("~/Library/Application Support/EasySpider/")
+        # print("Config folder for MacOS:", c.config_folder)
     elif os.path.exists(os.getcwd()+"/EasySpider/resources"):  # 打包后的路径
     elif os.path.exists(os.getcwd()+"/EasySpider/resources"):  # 打包后的路径
         print("Finding chromedriver in EasySpider",
         print("Finding chromedriver in EasySpider",
               os.getcwd()+"/EasySpider")
               os.getcwd()+"/EasySpider")
@@ -1425,6 +1448,7 @@ if __name__ == '__main__':
     try:
     try:
         with open(c.config_folder + c.config_file_name, "r", encoding='utf-8') as f:
         with open(c.config_folder + c.config_file_name, "r", encoding='utf-8') as f:
             config = json.load(f)
             config = json.load(f)
+            print("Config file path: " + c.config_folder + c.config_file_name)
             absolute_user_data_folder = config["absolute_user_data_folder"]
             absolute_user_data_folder = config["absolute_user_data_folder"]
             print("\nAbsolute_user_data_folder:",
             print("\nAbsolute_user_data_folder:",
                   absolute_user_data_folder, "\n")
                   absolute_user_data_folder, "\n")
@@ -1501,10 +1525,15 @@ if __name__ == '__main__':
             browser_t = MyChrome(
             browser_t = MyChrome(
                 options=options, chrome_options=option, executable_path=driver_path)
                 options=options, chrome_options=option, executable_path=driver_path)
         elif cloudflare == 1:
         elif cloudflare == 1:
-            browser_t = MyUCChrome(
+            if sys.platform != "darwin":
+                browser_t = MyUCChrome(
                 options=options, chrome_options=option, driver_executable_path=driver_path)
                 options=options, chrome_options=option, driver_executable_path=driver_path)
-            print("Pass Cloudflare Mode")
-            print("过Cloudflare验证模式")
+                print("Pass Cloudflare Mode")
+                print("过Cloudflare验证模式")
+            else:
+                print("Not support Cloudflare Mode on MacOS")
+                print("MacOS不支持Cloudflare验证模式")
+                sys.exit()
         event = Event()
         event = Event()
         event.set()
         event.set()
         thread = BrowserThread(browser_t, i, service,
         thread = BrowserThread(browser_t, i, service,

+ 64 - 62
.temp_to_pub/EasySpider_windows_x64/Code/myChrome.py

@@ -12,7 +12,6 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 from selenium.webdriver.support.ui import Select
 from selenium.webdriver.support.ui import Select
 from selenium.webdriver import ActionChains
 from selenium.webdriver import ActionChains
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.by import By
-import undetected_chromedriver_ES as uc
 desired_capabilities = DesiredCapabilities.CHROME
 desired_capabilities = DesiredCapabilities.CHROME
 desired_capabilities["pageLoadStrategy"] = "none"
 desired_capabilities["pageLoadStrategy"] = "none"
 
 
@@ -89,77 +88,80 @@ class MyChrome(webdriver.Chrome):
                 raise NoSuchElementException
                 raise NoSuchElementException
         else:
         else:
             return super().find_elements(by=by, value=value)
             return super().find_elements(by=by, value=value)
-        
 
 
-class MyUCChrome(uc.Chrome):
+import sys
+if sys.platform != "darwin": # MacOS不支持Cloudflare
+    import undetected_chromedriver_ES as uc
 
 
-    def __init__(self, *args, **kwargs):
-        self.iframe_env = False  # 现在的环境是root还是iframe
-        super().__init__(*args, **kwargs)  # 调用父类的 __init__
+    class MyUCChrome(uc.Chrome):
 
 
-    def find_element(self, by=By.ID, value=None, iframe=False):
-        # 在这里改变查找元素的行为
-        if self.iframe_env:
-            super().switch_to.default_content()
-            self.iframe_env = False
-        if iframe:
-            # 获取所有的 iframe
-            try:
-                iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
-            except Exception as e:
-                print(e)
-            find_element = False
-            # 遍历所有的 iframe 并点击里面的元素
-            for iframe in iframes:
-                # 切换到 iframe
-                super().switch_to.default_content()
-                super().switch_to.frame(iframe)
-                self.iframe_env = True
-                try:
-                    # 在 iframe 中查找并点击元素
-                    # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
-                    element = super().find_element(by=by, value=value)
-                    find_element = True
-                except:
-                    print("No such element found in the iframe")
-                # 完成操作后切回主文档
-                # super().switch_to.default_content()
-                if find_element:
-                    return element
-            if not find_element:
-                raise NoSuchElementException
-        else:
-            return super().find_element(by=by, value=value)
+        def __init__(self, *args, **kwargs):
+            self.iframe_env = False  # 现在的环境是root还是iframe
+            super().__init__(*args, **kwargs)  # 调用父类的 __init__
 
 
-    def find_elements(self, by=By.ID, value=None, iframe=False):
-        # 在这里改变查找元素的行为
-        if self.iframe_env:
-            super().switch_to.default_content()
-            self.iframe_env = False
-        if iframe:
-            # 获取所有的 iframe
-            iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
-            find_element = False
-            # 遍历所有的 iframe 并点击里面的元素
-            for iframe in iframes:
-                # 切换到 iframe
+        def find_element(self, by=By.ID, value=None, iframe=False):
+            # 在这里改变查找元素的行为
+            if self.iframe_env:
+                super().switch_to.default_content()
+                self.iframe_env = False
+            if iframe:
+                # 获取所有的 iframe
                 try:
                 try:
+                    iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
+                except Exception as e:
+                    print(e)
+                find_element = False
+                # 遍历所有的 iframe 并点击里面的元素
+                for iframe in iframes:
+                    # 切换到 iframe
                     super().switch_to.default_content()
                     super().switch_to.default_content()
                     super().switch_to.frame(iframe)
                     super().switch_to.frame(iframe)
                     self.iframe_env = True
                     self.iframe_env = True
-                    # 在 iframe 中查找并点击元素
-                    # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
-                    elements = super().find_elements(by=by, value=value)
-                    if len(elements) > 0:
+                    try:
+                        # 在 iframe 中查找并点击元素
+                        # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
+                        element = super().find_element(by=by, value=value)
                         find_element = True
                         find_element = True
+                    except:
+                        print("No such element found in the iframe")
                     # 完成操作后切回主文档
                     # 完成操作后切回主文档
                     # super().switch_to.default_content()
                     # super().switch_to.default_content()
                     if find_element:
                     if find_element:
-                        return elements
-                except:
-                    print("No such element found in the iframe")
-            if not find_element:
-                raise NoSuchElementException
-        else:
-            return super().find_elements(by=by, value=value)
+                        return element
+                if not find_element:
+                    raise NoSuchElementException
+            else:
+                return super().find_element(by=by, value=value)
+
+        def find_elements(self, by=By.ID, value=None, iframe=False):
+            # 在这里改变查找元素的行为
+            if self.iframe_env:
+                super().switch_to.default_content()
+                self.iframe_env = False
+            if iframe:
+                # 获取所有的 iframe
+                iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
+                find_element = False
+                # 遍历所有的 iframe 并点击里面的元素
+                for iframe in iframes:
+                    # 切换到 iframe
+                    try:
+                        super().switch_to.default_content()
+                        super().switch_to.frame(iframe)
+                        self.iframe_env = True
+                        # 在 iframe 中查找并点击元素
+                        # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
+                        elements = super().find_elements(by=by, value=value)
+                        if len(elements) > 0:
+                            find_element = True
+                        # 完成操作后切回主文档
+                        # super().switch_to.default_content()
+                        if find_element:
+                            return elements
+                    except:
+                        print("No such element found in the iframe")
+                if not find_element:
+                    raise NoSuchElementException
+            else:
+                return super().find_elements(by=by, value=value)
 
 

+ 24 - 17
.temp_to_pub/EasySpider_windows_x64/Code/utils.py

@@ -4,10 +4,11 @@ import csv
 import datetime
 import datetime
 import json
 import json
 import os
 import os
+import sys
 import re
 import re
 import time
 import time
 import uuid
 import uuid
-import keyboard
+# import keyboard
 from openpyxl import Workbook, load_workbook
 from openpyxl import Workbook, load_workbook
 import requests
 import requests
 from urllib.parse import urlparse
 from urllib.parse import urlparse
@@ -75,20 +76,20 @@ def on_release_creator(event, press_time):
     return on_release
     return on_release
 
 
 
 
-def check_pause(key, event):
-    while True:
-        if keyboard.is_pressed(key):  # 按下p键,暂停程序
-            if event._flag == False:
-                print("任务执行中,长按p键暂停执行。")
-                print("Task is running, long press 'p' to pause.")
-                # 设置Event的值为True,使得线程b可以继续执行
-                event.set()
-            else:
-                # 设置Event的值为False,使得线程b暂停执行
-                print("任务已暂停,长按p键继续执行...")
-                print("Task paused, press 'p' to continue...")
-                event.clear()
-        time.sleep(1)  # 每秒检查一次
+# def check_pause(key, event):
+#     while True:
+#         if keyboard.is_pressed(key):  # 按下p键,暂停程序
+#             if event._flag == False:
+#                 print("任务执行中,长按p键暂停执行。")
+#                 print("Task is running, long press 'p' to pause.")
+#                 # 设置Event的值为True,使得线程b可以继续执行
+#                 event.set()
+#             else:
+#                 # 设置Event的值为False,使得线程b暂停执行
+#                 print("任务已暂停,长按p键继续执行...")
+#                 print("Task paused, press 'p' to continue...")
+#                 event.clear()
+#         time.sleep(1)  # 每秒检查一次
 
 
 
 
 def download_image(url, save_directory):
 def download_image(url, save_directory):
@@ -219,16 +220,22 @@ class myMySQL:
     def __init__(self, config_file="mysql_config.json"):
     def __init__(self, config_file="mysql_config.json"):
         # 读取配置文件
         # 读取配置文件
         try:
         try:
+            if sys.platform == "darwin":
+                if config_file.find("./") >= 0:
+                    config_file = config_file.replace("./", "")
+                config_file = os.path.expanduser("~/Library/Application Support/EasySpider/" + config_file)
+            print("MySQL config file path: ", config_file)
             with open(config_file, 'r') as f:
             with open(config_file, 'r') as f:
                 config = json.load(f)
                 config = json.load(f)
                 host = config["host"]
                 host = config["host"]
                 port = config["port"]
                 port = config["port"]
-                user = config["user"]
+                user = config["username"]
                 passwd = config["password"]
                 passwd = config["password"]
                 db = config["database"]
                 db = config["database"]
-        except:
+        except Exception as e:
             print("读取配置文件失败,请检查配置文件:"+config_file+"是否存在。")
             print("读取配置文件失败,请检查配置文件:"+config_file+"是否存在。")
             print("Failed to read configuration file, please check if the configuration file: "+config_file+" exists.")
             print("Failed to read configuration file, please check if the configuration file: "+config_file+" exists.")
+            print(e)
         try:
         try:
             self.conn = pymysql.connect(
             self.conn = pymysql.connect(
             host=host, port=port, user=user, passwd=passwd, db=db)
             host=host, port=port, user=user, passwd=passwd, db=db)

File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/tasks/123.json


BIN
ElectronJS/EasySpider_en.crx


BIN
ElectronJS/EasySpider_zh.crx


Some files were not shown because too many files changed in this diff