2 年之前 · 8c5267d66c
--- a/.temp_to_pub/.gitignore
+++ b/.temp_to_pub/.gitignore
@@ -5,6 +5,7 @@ EasySpider
 
				 EasySpider.app/
			
 
				 EasySpider_windows_x64/user_data
			
 
				 *.tmp
			
 
				+*.tar.gz
			
 
				 *.7z*
			
 
				 config.json
			
 
				 mysql_config.json
			
--- a/.temp_to_pub/EasySpider_MacOS_all_arch/Code/easyspider_executestage.py
+++ b/.temp_to_pub/EasySpider_MacOS_all_arch/Code/easyspider_executestage.py
@@ -15,6 +15,8 @@ import time
 
				 import requests
			
 
				 from urllib.parse import urljoin
			
 
				 from lxml import etree
			
 
				+# import undetected_chromedriver as uc
			
 
				+from pynput.keyboard import Key, Listener
			
 
				 from selenium.webdriver.chrome.options import Options
			
 
				 from selenium.webdriver.common.keys import Keys
			
 
				 from selenium.webdriver.common.action_chains import ActionChains
			
@@ -29,7 +31,6 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 
				 from selenium.webdriver.support.ui import Select
			
 
				 from selenium.webdriver import ActionChains
			
 
				 from selenium.webdriver.common.by import By
			
 
				-import undetected_chromedriver as uc
			
 
				 import random
			
 
				 # import pandas as pd
			
 
				 from openpyxl import load_workbook, Workbook
			
@@ -41,8 +42,10 @@ import pytesseract
 
				 from PIL import Image
			
 
				 # import uuid
			
 
				 from threading import Thread, Event
			
 
				-from myChrome import MyChrome, MyUCChrome
			
 
				-from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press, on_release_creator, write_to_csv, write_to_excel
			
 
				+from myChrome import MyChrome
			
 
				+if sys.platform != "darwin":
			
 
				+    from myChrome import MyUCChrome
			
 
				+from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel
			
 
				 desired_capabilities = DesiredCapabilities.CHROME
			
 
				 desired_capabilities["pageLoadStrategy"] = "none"
			
 
				 
			
@@ -1326,6 +1329,8 @@ class BrowserThread(Thread):
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				+    # from multiprocessing import freeze_support
			
 
				+    # freeze_support() # 防止无限死循环多开
			
 
				     config = {
			
 
				         "id": [0],
			
 
				         "saved_file_name": "",
			
@@ -1358,6 +1363,9 @@ if __name__ == '__main__':
 
				         # option.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome"
			
 
				         # driver_path = os.getcwd()+ "/chromedriver_mac64"
			
 
				         print(driver_path)
			
 
				+        if c.config_folder == "":
			
 
				+            c.config_folder = os.path.expanduser("~/Library/Application Support/EasySpider/")
			
 
				+        # print("Config folder for MacOS:", c.config_folder)
			
 
				     elif os.path.exists(os.getcwd()+"/EasySpider/resources"):  # 打包后的路径
			
 
				         print("Finding chromedriver in EasySpider",
			
 
				               os.getcwd()+"/EasySpider")
			
@@ -1367,16 +1375,19 @@ if __name__ == '__main__':
 
				             driver_path = os.path.join(
			
 
				                 os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe")
			
 
				             option.add_extension("EasySpider/resources/app/XPathHelper.crx")
			
 
				+            options.add_extension("EasySpider/resources/app/XPathHelper.crx")
			
 
				         elif sys.platform == "win32" and platform.architecture()[0] == "64bit":
			
 
				             options.binary_location = os.path.join(
			
 
				                 os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
			
 
				             driver_path = os.path.join(
			
 
				                 os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe")
			
 
				             option.add_extension("EasySpider/resources/app/XPathHelper.crx")
			
 
				+            options.add_extension("EasySpider/resources/app/XPathHelper.crx")
			
 
				         elif sys.platform == "linux" and platform.architecture()[0] == "64bit":
			
 
				             options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
			
 
				             driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64"
			
 
				             option.add_extension("EasySpider/resources/app/XPathHelper.crx")
			
 
				+            options.add_extension("EasySpider/resources/app/XPathHelper.crx")
			
 
				         else:
			
 
				             print("Unsupported platform")
			
 
				             sys.exit()
			
@@ -1419,6 +1430,7 @@ if __name__ == '__main__':
 
				     try:
			
 
				         with open(c.config_folder + c.config_file_name, "r", encoding='utf-8') as f:
			
 
				             config = json.load(f)
			
 
				+            print("Config file path: " + c.config_folder + c.config_file_name)
			
 
				             absolute_user_data_folder = config["absolute_user_data_folder"]
			
 
				             print("\nAbsolute_user_data_folder:",
			
 
				                   absolute_user_data_folder, "\n")
			
@@ -1428,6 +1440,9 @@ if __name__ == '__main__':
 
				         option.add_argument(
			
 
				             f'--user-data-dir={absolute_user_data_folder}')  # TMALL 反扒
			
 
				         option.add_argument("--profile-directory=Default")
			
 
				+        options.add_argument(
			
 
				+            f'--user-data-dir={absolute_user_data_folder}')  # TMALL 反扒
			
 
				+        options.add_argument("--profile-directory=Default")
			
 
				 
			
 
				     if c.headless:
			
 
				         print("Headless mode")
			
@@ -1444,7 +1459,7 @@ if __name__ == '__main__':
 
				 
			
 
				     threads = []
			
 
				     for i in c.id:
			
 
				-        print(options)
			
 
				+        # print(options)
			
 
				         print("id: ", i)
			
 
				         if c.read_type == "remote":
			
 
				             print("remote")
			
@@ -1492,10 +1507,15 @@ if __name__ == '__main__':
 
				             browser_t = MyChrome(
			
 
				                 options=options, chrome_options=option, executable_path=driver_path)
			
 
				         elif cloudflare == 1:
			
 
				-            browser_t = MyUCChrome(
			
 
				-                options=options, chrome_options=option, executable_path=driver_path)
			
 
				-            print("Pass Cloudflare Mode")
			
 
				-            print("过Cloudflare验证模式")
			
 
				+            if sys.platform != "darwin":
			
 
				+                browser_t = MyUCChrome(
			
 
				+                options=options, chrome_options=option, driver_executable_path=driver_path)
			
 
				+                print("Pass Cloudflare Mode")
			
 
				+                print("过Cloudflare验证模式")
			
 
				+            else:
			
 
				+                print("Not support Cloudflare Mode on MacOS")
			
 
				+                print("MacOS不支持Cloudflare验证模式")
			
 
				+                sys.exit()
			
 
				         event = Event()
			
 
				         event.set()
			
 
				         thread = BrowserThread(browser_t, i, service,
			
@@ -1505,26 +1525,33 @@ if __name__ == '__main__':
 
				         thread.start()
			
 
				         # Set the pause operation
			
 
				         # if sys.platform != "linux": 
			
 
				+        #     time.sleep(3)
			
 
				+        #     print("\n\n----------------------------------")
			
 
				+        #     print("正在运行任务，长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码；如果想恢复任务的执行，请再次长按p键。")
			
 
				+        #     print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
			
 
				+        #     print("----------------------------------\n\n")
			
 
				         #     Thread(target=check_pause, args=("p", event)).start()
			
 
				         # else:
			
 
				         time.sleep(3)
			
 
				+        press_time = {"duration": 0, "is_pressed": False}
			
 
				         print("\n\n----------------------------------")
			
 
				-        print("正在运行任务，按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码；如果想恢复任务的执行，请再次按p键。")
			
 
				-        print("Running task, press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please press 'p' again.")
			
 
				+        print("正在运行任务，长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码；如果想恢复任务的执行，请再次长按p键。")
			
 
				+        print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
			
 
				         print("----------------------------------\n\n")
			
 
				         # 使用监听器监听键盘输入
			
 
				         try:
			
 
				-            from pynput.keyboard import Key, Listener
			
 
				-            with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener:
			
 
				+            with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener:
			
 
				                 listener.join()
			
 
				         except:
			
 
				-            print("您的操作系统不支持暂停功能。")
			
 
				-            print("Your operating system does not support the pause function.")
			
 
				+            pass
			
 
				+            # print("您的操作系统不支持暂停功能。")
			
 
				+            # print("Your operating system does not support the pause function.")
			
 
				             
			
 
				         
			
 
				-        
			
 
				+    # print("线程长度：", len(threads) )
			
 
				 	
			
 
				     for thread in threads:
			
 
				+        print()
			
 
				         thread.join()
			
 
				 
			
 
				     for thread in threads:
			
--- a/.temp_to_pub/EasySpider_MacOS_all_arch/Code/myChrome.py
+++ b/.temp_to_pub/EasySpider_MacOS_all_arch/Code/myChrome.py
@@ -12,7 +12,6 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 
				 from selenium.webdriver.support.ui import Select
			
 
				 from selenium.webdriver import ActionChains
			
 
				 from selenium.webdriver.common.by import By
			
 
				-import undetected_chromedriver as uc
			
 
				 desired_capabilities = DesiredCapabilities.CHROME
			
 
				 desired_capabilities["pageLoadStrategy"] = "none"
			
 
				 
			
@@ -89,77 +88,80 @@ class MyChrome(webdriver.Chrome):
 
				                 raise NoSuchElementException
			
 
				         else:
			
 
				             return super().find_elements(by=by, value=value)
			
 
				-        
			
 
				 
			
 
				-class MyUCChrome(uc.Chrome):
			
 
				+import sys
			
 
				+if sys.platform != "darwin": # MacOS不支持Cloudflare
			
 
				+    import undetected_chromedriver_ES as uc
			
 
				 
			
 
				-    def __init__(self, *args, **kwargs):
			
 
				-        self.iframe_env = False  # 现在的环境是root还是iframe
			
 
				-        super().__init__(*args, **kwargs)  # 调用父类的 __init__
			
 
				+    class MyUCChrome(uc.Chrome):
			
 
				 
			
 
				-    def find_element(self, by=By.ID, value=None, iframe=False):
			
 
				-        # 在这里改变查找元素的行为
			
 
				-        if self.iframe_env:
			
 
				-            super().switch_to.default_content()
			
 
				-            self.iframe_env = False
			
 
				-        if iframe:
			
 
				-            # 获取所有的 iframe
			
 
				-            try:
			
 
				-                iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
			
 
				-            except Exception as e:
			
 
				-                print(e)
			
 
				-            find_element = False
			
 
				-            # 遍历所有的 iframe 并点击里面的元素
			
 
				-            for iframe in iframes:
			
 
				-                # 切换到 iframe
			
 
				-                super().switch_to.default_content()
			
 
				-                super().switch_to.frame(iframe)
			
 
				-                self.iframe_env = True
			
 
				-                try:
			
 
				-                    # 在 iframe 中查找并点击元素
			
 
				-                    # 在这个例子中，我们查找 XPath 为 '//div[1]' 的元素
			
 
				-                    element = super().find_element(by=by, value=value)
			
 
				-                    find_element = True
			
 
				-                except:
			
 
				-                    print("No such element found in the iframe")
			
 
				-                # 完成操作后切回主文档
			
 
				-                # super().switch_to.default_content()
			
 
				-                if find_element:
			
 
				-                    return element
			
 
				-            if not find_element:
			
 
				-                raise NoSuchElementException
			
 
				-        else:
			
 
				-            return super().find_element(by=by, value=value)
			
 
				+        def __init__(self, *args, **kwargs):
			
 
				+            self.iframe_env = False  # 现在的环境是root还是iframe
			
 
				+            super().__init__(*args, **kwargs)  # 调用父类的 __init__
			
 
				 
			
 
				-    def find_elements(self, by=By.ID, value=None, iframe=False):
			
 
				-        # 在这里改变查找元素的行为
			
 
				-        if self.iframe_env:
			
 
				-            super().switch_to.default_content()
			
 
				-            self.iframe_env = False
			
 
				-        if iframe:
			
 
				-            # 获取所有的 iframe
			
 
				-            iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
			
 
				-            find_element = False
			
 
				-            # 遍历所有的 iframe 并点击里面的元素
			
 
				-            for iframe in iframes:
			
 
				-                # 切换到 iframe
			
 
				+        def find_element(self, by=By.ID, value=None, iframe=False):
			
 
				+            # 在这里改变查找元素的行为
			
 
				+            if self.iframe_env:
			
 
				+                super().switch_to.default_content()
			
 
				+                self.iframe_env = False
			
 
				+            if iframe:
			
 
				+                # 获取所有的 iframe
			
 
				                 try:
			
 
				+                    iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
			
 
				+                except Exception as e:
			
 
				+                    print(e)
			
 
				+                find_element = False
			
 
				+                # 遍历所有的 iframe 并点击里面的元素
			
 
				+                for iframe in iframes:
			
 
				+                    # 切换到 iframe
			
 
				                     super().switch_to.default_content()
			
 
				                     super().switch_to.frame(iframe)
			
 
				                     self.iframe_env = True
			
 
				-                    # 在 iframe 中查找并点击元素
			
 
				-                    # 在这个例子中，我们查找 XPath 为 '//div[1]' 的元素
			
 
				-                    elements = super().find_elements(by=by, value=value)
			
 
				-                    if len(elements) > 0:
			
 
				+                    try:
			
 
				+                        # 在 iframe 中查找并点击元素
			
 
				+                        # 在这个例子中，我们查找 XPath 为 '//div[1]' 的元素
			
 
				+                        element = super().find_element(by=by, value=value)
			
 
				                         find_element = True
			
 
				+                    except:
			
 
				+                        print("No such element found in the iframe")
			
 
				                     # 完成操作后切回主文档
			
 
				                     # super().switch_to.default_content()
			
 
				                     if find_element:
			
 
				-                        return elements
			
 
				-                except:
			
 
				-                    print("No such element found in the iframe")
			
 
				-            if not find_element:
			
 
				-                raise NoSuchElementException
			
 
				-        else:
			
 
				-            return super().find_elements(by=by, value=value)
			
 
				+                        return element
			
 
				+                if not find_element:
			
 
				+                    raise NoSuchElementException
			
 
				+            else:
			
 
				+                return super().find_element(by=by, value=value)
			
 
				+
			
 
				+        def find_elements(self, by=By.ID, value=None, iframe=False):
			
 
				+            # 在这里改变查找元素的行为
			
 
				+            if self.iframe_env:
			
 
				+                super().switch_to.default_content()
			
 
				+                self.iframe_env = False
			
 
				+            if iframe:
			
 
				+                # 获取所有的 iframe
			
 
				+                iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
			
 
				+                find_element = False
			
 
				+                # 遍历所有的 iframe 并点击里面的元素
			
 
				+                for iframe in iframes:
			
 
				+                    # 切换到 iframe
			
 
				+                    try:
			
 
				+                        super().switch_to.default_content()
			
 
				+                        super().switch_to.frame(iframe)
			
 
				+                        self.iframe_env = True
			
 
				+                        # 在 iframe 中查找并点击元素
			
 
				+                        # 在这个例子中，我们查找 XPath 为 '//div[1]' 的元素
			
 
				+                        elements = super().find_elements(by=by, value=value)
			
 
				+                        if len(elements) > 0:
			
 
				+                            find_element = True
			
 
				+                        # 完成操作后切回主文档
			
 
				+                        # super().switch_to.default_content()
			
 
				+                        if find_element:
			
 
				+                            return elements
			
 
				+                    except:
			
 
				+                        print("No such element found in the iframe")
			
 
				+                if not find_element:
			
 
				+                    raise NoSuchElementException
			
 
				+            else:
			
 
				+                return super().find_elements(by=by, value=value)
			
 
				 
			
--- a/.temp_to_pub/EasySpider_MacOS_all_arch/Code/utils.py
+++ b/.temp_to_pub/EasySpider_MacOS_all_arch/Code/utils.py
@@ -4,6 +4,7 @@ import csv
 
				 import datetime
			
 
				 import json
			
 
				 import os
			
 
				+import sys
			
 
				 import re
			
 
				 import time
			
 
				 import uuid
			
@@ -23,27 +24,57 @@ def is_valid_url(url):
 
				 
			
 
				 def lowercase_tags_in_xpath(xpath):
			
 
				     return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath)
			
 
				-    
			
 
				-def on_release_creator(event):
			
 
				+
			
 
				+
			
 
				+def on_press_creator(press_time, event):
			
 
				+    def on_press(key):
			
 
				+        try:
			
 
				+            if key.char == 'p':
			
 
				+                if press_time["is_pressed"] == False: # 没按下p键时，记录按下p键的时间
			
 
				+                    press_time["duration"] = time.time()
			
 
				+                    press_time["is_pressed"] = True
			
 
				+                else: # 按下p键时，判断按下p键的时间是否超过2.5秒
			
 
				+                    duration = time.time() - press_time["duration"]
			
 
				+                    if duration > 2:
			
 
				+                        if event._flag == False:
			
 
				+                            print("任务执行中，长按p键暂停执行。")
			
 
				+                            print("Task is running, long press 'p' to pause.")
			
 
				+                            # 设置Event的值为True，使得线程b可以继续执行
			
 
				+                            event.set()
			
 
				+                        else:
			
 
				+                            # 设置Event的值为False，使得线程b暂停执行
			
 
				+                            print("任务已暂停，长按p键继续执行...")
			
 
				+                            print("Task paused, long press 'p' to continue...")
			
 
				+                            event.clear()
			
 
				+                        press_time["duration"] = time.time()
			
 
				+                        press_time["is_pressed"] = False
			
 
				+                    # print("按下p键时间：", press_time["duration"])
			
 
				+        except:
			
 
				+            pass
			
 
				+    return on_press
			
 
				+
			
 
				+def on_release_creator(event, press_time):
			
 
				     def on_release(key):
			
 
				         try:
			
 
				-            if key.char == 'p':  # 当按下esc键时，退出监听
			
 
				-                if event._flag == False:
			
 
				-                    print("任务执行中，按p键暂停执行。")
			
 
				-                    print("Task is running, press 'p' to pause.")
			
 
				-                    # 设置Event的值为True，使得线程b可以继续执行
			
 
				-                    event.set()
			
 
				-                else:
			
 
				-                    # 设置Event的值为False，使得线程b暂停执行
			
 
				-                    print("任务已暂停，按p键继续执行...")
			
 
				-                    print("Task paused, press 'p' to continue...")
			
 
				-                    event.clear()
			
 
				+            # duration = time.time() - press_time["duration"]
			
 
				+            # # print("松开p键时间：", time.time(), "Duration: ", duration)
			
 
				+            # if duration > 2.5 and key.char == 'p':
			
 
				+            #     if event._flag == False:
			
 
				+            #         print("任务执行中，按p键暂停执行。")
			
 
				+            #         print("Task is running, press 'p' to pause.")
			
 
				+            #         # 设置Event的值为True，使得线程b可以继续执行
			
 
				+            #         event.set()
			
 
				+            #     else:
			
 
				+            #         # 设置Event的值为False，使得线程b暂停执行
			
 
				+            #         print("任务已暂停，按p键继续执行...")
			
 
				+            #         print("Task paused, press 'p' to continue...")
			
 
				+            #         event.clear()
			
 
				+            #     press_time["duration"] = time.time()
			
 
				+            press_time["is_pressed"] = False
			
 
				         except:
			
 
				             pass
			
 
				     return on_release
			
 
				 
			
 
				-def on_press(key):
			
 
				-    pass
			
 
				 
			
 
				 # def check_pause(key, event):
			
 
				 #     while True:
			
@@ -189,16 +220,22 @@ class myMySQL:
 
				     def __init__(self, config_file="mysql_config.json"):
			
 
				         # 读取配置文件
			
 
				         try:
			
 
				+            if sys.platform == "darwin":
			
 
				+                if config_file.find("./") >= 0:
			
 
				+                    config_file = config_file.replace("./", "")
			
 
				+                config_file = os.path.expanduser("~/Library/Application Support/EasySpider/" + config_file)
			
 
				+            print("MySQL config file path: ", config_file)
			
 
				             with open(config_file, 'r') as f:
			
 
				                 config = json.load(f)
			
 
				                 host = config["host"]
			
 
				                 port = config["port"]
			
 
				-                user = config["user"]
			
 
				+                user = config["username"]
			
 
				                 passwd = config["password"]
			
 
				                 db = config["database"]
			
 
				-        except:
			
 
				+        except Exception as e:
			
 
				             print("读取配置文件失败，请检查配置文件："+config_file+"是否存在。")
			
 
				             print("Failed to read configuration file, please check if the configuration file: "+config_file+" exists.")
			
 
				+            print(e)
			
 
				         try:
			
 
				             self.conn = pymysql.connect(
			
 
				             host=host, port=port, user=user, passwd=passwd, db=db)
			
--- a/.temp_to_pub/compress.cmd
+++ b/.temp_to_pub/compress.cmd
@@ -1 +1 @@
 
				-python compress.py
			
 
				+python3 compress.py
			
--- a/.temp_to_pub/compress.py
+++ b/.temp_to_pub/compress.py
@@ -45,7 +45,10 @@ def compress_folder_to_7z_split(folder_path, output_file):
 
				     try:
			
 
				         subprocess.call(["7z", "a", "-v95m", output_file, folder_path])
			
 
				     except:
			
 
				-        subprocess.call(["7za", "a", "-v95m", output_file, folder_path])
			
 
				+        try:
			
 
				+            subprocess.call(["7za", "a", "-v95m", output_file, folder_path])
			
 
				+        except:
			
 
				+            subprocess.call(["7zz", "a", "-v95m", output_file, folder_path])
			
 
				 
			
 
				 easyspider_version = "0.3.5"
			
 
				 
			
@@ -104,5 +107,11 @@ if __name__ == "__main__":
 
				         subprocess.call(["tar", "-Jcvf", file_name, "./EasySpider_Linux_x64"])
			
 
				         print(f"Compress {file_name} successfully!")
			
 
				     elif sys.platform == "darwin" and platform.architecture()[0] == "64bit":
			
 
				-        pass
			
 
				+        file_name = f"EasySpider_{easyspider_version}_MacOS_all_arch.tar.gz"
			
 
				+        if os.path.exists("./EasySpider_MacOS_all_arch/Data"):
			
 
				+            shutil.rmtree("./EasySpider_MacOS_all_arch/Data")
			
 
				+        os.mkdir("./EasySpider_MacOS_all_arch/Data")
			
 
				+        subprocess.call(["tar", "-zcvf", file_name, "./EasySpider_MacOS_all_arch"])
			
 
				+        subprocess.call(["7zz", "a", "-v95m", file_name.replace(".tar.gz", ".7z"), file_name, "请继续解压EasySpider_MacOS_all_arch.tar.gz使用.txt"])
			
 
				+        print(f"Compress {file_name} successfully!")
			
 
				 
			
--- a/.temp_to_pub/请继续解压EasySpider_MacOS_all_arch.tar.gz使用.txt
+++ b/.temp_to_pub/请继续解压EasySpider_MacOS_all_arch.tar.gz使用.txt
@@ -0,0 +1 @@
 
				+请继续解压.tar.gz文件以使用易采集。
			
--- a/.temp_to_pub/请继续解压zip文件以使用EasySpider.txt
+++ b/.temp_to_pub/请继续解压zip文件以使用EasySpider.txt
--- a/ElectronJS/EasySpider_en.crx
+++ b/ElectronJS/EasySpider_en.crx
--- a/ElectronJS/EasySpider_zh.crx
+++ b/ElectronJS/EasySpider_zh.crx
--- a/ElectronJS/config.json
+++ b/ElectronJS/config.json
@@ -1 +1 @@
 
				-{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"}
			
 
				+{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data1","debug":false,"mysql_config_path":"/Users/naibowang/Documents/EasySpider/ElectronJS/mysql_config.json","absolute_user_data_folder":"/Users/naibowang/Documents/EasySpider/ElectronJS/user_data1"}
			
--- a/ElectronJS/main.js
+++ b/ElectronJS/main.js
@@ -324,7 +324,9 @@ async function beginInvoke(msg, ws) {
 
				             config.absolute_user_data_folder = user_data_folder_path;
			
 
				             fs.writeFileSync(path.join(task_server.getDir(), "config.json"), JSON.stringify(config));
			
 
				         }
			
 
				-        config.mysql_config_path = msg.message.mysql_config_path;
			
 
				+        if(msg.message.mysql_config_path != "-1"){
			
 
				+            config.mysql_config_path = msg.message.mysql_config_path;
			
 
				+        }
			
 
				         fs.writeFileSync(path.join(task_server.getDir(), "config.json"), JSON.stringify(config));
			
 
				         // child('Chrome/easyspider_executestage.exe', parameters, function(err,stdout, stderr) {
			
 
				         //    console.log(stdout);
			
--- a/ElectronJS/package_macos.sh
+++ b/ElectronJS/package_macos.sh
@@ -23,4 +23,4 @@ cp ../ExecuteStage/easyspider_executestage.py ../.temp_to_pub/EasySpider_MacOS_a
 
				 cp ../ExecuteStage/myChrome.py ../.temp_to_pub/EasySpider_MacOS_all_arch/Code
			
 
				 cp ../ExecuteStage/utils.py ../.temp_to_pub/EasySpider_MacOS_all_arch/Code
			
 
				 cp ../ExecuteStage/requirements.txt ../.temp_to_pub/EasySpider_MacOS_all_arch/Code
			
 
				-cp -Rf ../undetected_chromedriver_ES ../.temp_to_pub/EasySpider_MacOS_all_arch/Code
			
 
				+cp -Rf ../ExecuteStage/undetected_chromedriver_ES ../.temp_to_pub/EasySpider_MacOS_all_arch/Code
			
--- a/ElectronJS/src/taskGrid/FlowChart.html
+++ b/ElectronJS/src/taskGrid/FlowChart.html
@@ -563,7 +563,7 @@
 
				                     <label>Is it an extreme anti-scraping website like Cloudflare?</label>
			
 
				                     <select id="cloudflare" name="cloudflare" class="form-control">
			
 
				                         <option value=0>No</option>
			
 
				-                        <option value=1>Yes</option>
			
 
				+                        <option value=1>Yes (Not support on MacOS, unless compile by yourself)</option>
			
 
				                     </select>
			
 
				                     <label>Browser Emulation Type:</label>
			
 
				                     <select id="environment" name="environment" class="form-control">
			
--- a/ElectronJS/src/taskGrid/FlowChart_CN.html
+++ b/ElectronJS/src/taskGrid/FlowChart_CN.html
@@ -563,7 +563,7 @@
 
				                     <label>是否为Cloudflare等极端反爬网站（<a href="https://www.bilibili.com/video/BV1Ph4y1E7R9/" target="_blank">查看Cloudflare设计和执行教程</a>）：</label>
			
 
				                     <select id="cloudflare" name="cloudflare" class="form-control">
			
 
				                         <option value = 0>否</option>
			
 
				-                        <option value = 1>是</option>
			
 
				+                        <option value = 1>是（MacOS不支持直接运行，但可以自行编译）</option>
			
 
				                     </select>
			
 
				                     <label>浏览器模拟类型：</label>
			
 
				                     <select id="environment" name="environment" class="form-control">
			
--- a/ElectronJS/src/taskGrid/invokeTask.html
+++ b/ElectronJS/src/taskGrid/invokeTask.html
@@ -209,7 +209,7 @@
 
				                 <input type="text" class="form-control" v-model="user_data_folder"></input>
			
 
				             </div>
			
 
				             <div class="form-group" style="margin-top: 10px" v-if="task.outputFormat=='mysql'">
			
 
				-                <label>{{"MySQL configuration file Path:~MySQL配置文件路径：" | lang}}</label>
			
 
				+                <label>{{"MySQL configuration file Path, relative to this folder:~MySQL配置文件路径，路径相对此文件夹：" | lang}} {{config_folder}}</label>
			
 
				                 <input type="text" class="form-control" v-model="mysql_config_path"></input>
			
 
				             </div>
			
 
				         </form>
			
@@ -485,13 +485,23 @@
 
				     ws.onopen = function () {
			
 
				         // Web Socket 已连接上，使用 send() 方法发送数据
			
 
				         console.log("Connected");
			
 
				-        message = {
			
 
				+        let message = {
			
 
				             type: 0, //消息类型，0代表链接操作
			
 
				             message: {
			
 
				                 id: 1, //socket id
			
 
				             }
			
 
				         };
			
 
				         this.send(JSON.stringify(message));
			
 
				+        message = { //显示flowchart
			
 
				+            type: 5, //消息类型，调用执行程序
			
 
				+            message: {
			
 
				+                "id": -1,
			
 
				+                "user_data_folder": "",
			
 
				+                "mysql_config_path": "-1",
			
 
				+                "execute_type": 1,
			
 
				+            }
			
 
				+        };
			
 
				+        this.send(JSON.stringify(message));
			
 
				     };
			
 
				     ws.onmessage = function(message){
			
 
				         message = JSON.parse(message.data);
			
--- a/ElectronJS/tasks/157.json
+++ b/ElectronJS/tasks/157.json
--- a/ExecuteStage/easyspider_executestage.py
+++ b/ExecuteStage/easyspider_executestage.py
@@ -15,7 +15,7 @@ import time
 
				 import requests
			
 
				 from urllib.parse import urljoin
			
 
				 from lxml import etree
			
 
				-import undetected_chromedriver as uc
			
 
				+# import undetected_chromedriver as uc
			
 
				 from pynput.keyboard import Key, Listener
			
 
				 from selenium.webdriver.chrome.options import Options
			
 
				 from selenium.webdriver.common.keys import Keys
			
@@ -42,7 +42,9 @@ import pytesseract
 
				 from PIL import Image
			
 
				 # import uuid
			
 
				 from threading import Thread, Event
			
 
				-from myChrome import MyChrome, MyUCChrome
			
 
				+from myChrome import MyChrome
			
 
				+if sys.platform != "darwin":
			
 
				+    from myChrome import MyUCChrome
			
 
				 from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel
			
 
				 desired_capabilities = DesiredCapabilities.CHROME
			
 
				 desired_capabilities["pageLoadStrategy"] = "none"
			
@@ -1327,8 +1329,8 @@ class BrowserThread(Thread):
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    from multiprocessing import freeze_support
			
 
				-    freeze_support() # 防止无限死循环多开
			
 
				+    # from multiprocessing import freeze_support
			
 
				+    # freeze_support() # 防止无限死循环多开
			
 
				     config = {
			
 
				         "id": [0],
			
 
				         "saved_file_name": "",
			
@@ -1361,6 +1363,9 @@ if __name__ == '__main__':
 
				         # option.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome"
			
 
				         # driver_path = os.getcwd()+ "/chromedriver_mac64"
			
 
				         print(driver_path)
			
 
				+        if c.config_folder == "":
			
 
				+            c.config_folder = os.path.expanduser("~/Library/Application Support/EasySpider/")
			
 
				+        # print("Config folder for MacOS:", c.config_folder)
			
 
				     elif os.path.exists(os.getcwd()+"/EasySpider/resources"):  # 打包后的路径
			
 
				         print("Finding chromedriver in EasySpider",
			
 
				               os.getcwd()+"/EasySpider")
			
@@ -1425,6 +1430,7 @@ if __name__ == '__main__':
 
				     try:
			
 
				         with open(c.config_folder + c.config_file_name, "r", encoding='utf-8') as f:
			
 
				             config = json.load(f)
			
 
				+            print("Config file path: " + c.config_folder + c.config_file_name)
			
 
				             absolute_user_data_folder = config["absolute_user_data_folder"]
			
 
				             print("\nAbsolute_user_data_folder:",
			
 
				                   absolute_user_data_folder, "\n")
			
@@ -1501,13 +1507,15 @@ if __name__ == '__main__':
 
				             browser_t = MyChrome(
			
 
				                 options=options, chrome_options=option, executable_path=driver_path)
			
 
				         elif cloudflare == 1:
			
 
				-            if sys.platform == "linux":
			
 
				-                import ssl
			
 
				-                ssl._create_default_https_context = ssl._create_unverified_context # 忽略证书验证
			
 
				-            browser_t = MyUCChrome(
			
 
				+            if sys.platform != "darwin":
			
 
				+                browser_t = MyUCChrome(
			
 
				                 options=options, chrome_options=option, driver_executable_path=driver_path)
			
 
				-            print("Pass Cloudflare Mode")
			
 
				-            print("过Cloudflare验证模式")
			
 
				+                print("Pass Cloudflare Mode")
			
 
				+                print("过Cloudflare验证模式")
			
 
				+            else:
			
 
				+                print("Not support Cloudflare Mode on MacOS")
			
 
				+                print("MacOS不支持Cloudflare验证模式")
			
 
				+                sys.exit()
			
 
				         event = Event()
			
 
				         event.set()
			
 
				         thread = BrowserThread(browser_t, i, service,
			
--- a/ExecuteStage/myChrome.py
+++ b/ExecuteStage/myChrome.py
@@ -12,7 +12,6 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 
				 from selenium.webdriver.support.ui import Select
			
 
				 from selenium.webdriver import ActionChains
			
 
				 from selenium.webdriver.common.by import By
			
 
				-import undetected_chromedriver_ES as uc
			
 
				 desired_capabilities = DesiredCapabilities.CHROME
			
 
				 desired_capabilities["pageLoadStrategy"] = "none"
			
 
				 
			
@@ -89,77 +88,80 @@ class MyChrome(webdriver.Chrome):
 
				                 raise NoSuchElementException
			
 
				         else:
			
 
				             return super().find_elements(by=by, value=value)
			
 
				-        
			
 
				 
			
 
				-class MyUCChrome(uc.Chrome):
			
 
				+import sys
			
 
				+if sys.platform != "darwin": # MacOS不支持Cloudflare
			
 
				+    import undetected_chromedriver_ES as uc
			
 
				 
			
 
				-    def __init__(self, *args, **kwargs):
			
 
				-        self.iframe_env = False  # 现在的环境是root还是iframe
			
 
				-        super().__init__(*args, **kwargs)  # 调用父类的 __init__
			
 
				+    class MyUCChrome(uc.Chrome):
			
 
				 
			
 
				-    def find_element(self, by=By.ID, value=None, iframe=False):
			
 
				-        # 在这里改变查找元素的行为
			
 
				-        if self.iframe_env:
			
 
				-            super().switch_to.default_content()
			
 
				-            self.iframe_env = False
			
 
				-        if iframe:
			
 
				-            # 获取所有的 iframe
			
 
				-            try:
			
 
				-                iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
			
 
				-            except Exception as e:
			
 
				-                print(e)
			
 
				-            find_element = False
			
 
				-            # 遍历所有的 iframe 并点击里面的元素
			
 
				-            for iframe in iframes:
			
 
				-                # 切换到 iframe
			
 
				-                super().switch_to.default_content()
			
 
				-                super().switch_to.frame(iframe)
			
 
				-                self.iframe_env = True
			
 
				-                try:
			
 
				-                    # 在 iframe 中查找并点击元素
			
 
				-                    # 在这个例子中，我们查找 XPath 为 '//div[1]' 的元素
			
 
				-                    element = super().find_element(by=by, value=value)
			
 
				-                    find_element = True
			
 
				-                except:
			
 
				-                    print("No such element found in the iframe")
			
 
				-                # 完成操作后切回主文档
			
 
				-                # super().switch_to.default_content()
			
 
				-                if find_element:
			
 
				-                    return element
			
 
				-            if not find_element:
			
 
				-                raise NoSuchElementException
			
 
				-        else:
			
 
				-            return super().find_element(by=by, value=value)
			
 
				+        def __init__(self, *args, **kwargs):
			
 
				+            self.iframe_env = False  # 现在的环境是root还是iframe
			
 
				+            super().__init__(*args, **kwargs)  # 调用父类的 __init__
			
 
				 
			
 
				-    def find_elements(self, by=By.ID, value=None, iframe=False):
			
 
				-        # 在这里改变查找元素的行为
			
 
				-        if self.iframe_env:
			
 
				-            super().switch_to.default_content()
			
 
				-            self.iframe_env = False
			
 
				-        if iframe:
			
 
				-            # 获取所有的 iframe
			
 
				-            iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
			
 
				-            find_element = False
			
 
				-            # 遍历所有的 iframe 并点击里面的元素
			
 
				-            for iframe in iframes:
			
 
				-                # 切换到 iframe
			
 
				+        def find_element(self, by=By.ID, value=None, iframe=False):
			
 
				+            # 在这里改变查找元素的行为
			
 
				+            if self.iframe_env:
			
 
				+                super().switch_to.default_content()
			
 
				+                self.iframe_env = False
			
 
				+            if iframe:
			
 
				+                # 获取所有的 iframe
			
 
				                 try:
			
 
				+                    iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
			
 
				+                except Exception as e:
			
 
				+                    print(e)
			
 
				+                find_element = False
			
 
				+                # 遍历所有的 iframe 并点击里面的元素
			
 
				+                for iframe in iframes:
			
 
				+                    # 切换到 iframe
			
 
				                     super().switch_to.default_content()
			
 
				                     super().switch_to.frame(iframe)
			
 
				                     self.iframe_env = True
			
 
				-                    # 在 iframe 中查找并点击元素
			
 
				-                    # 在这个例子中，我们查找 XPath 为 '//div[1]' 的元素
			
 
				-                    elements = super().find_elements(by=by, value=value)
			
 
				-                    if len(elements) > 0:
			
 
				+                    try:
			
 
				+                        # 在 iframe 中查找并点击元素
			
 
				+                        # 在这个例子中，我们查找 XPath 为 '//div[1]' 的元素
			
 
				+                        element = super().find_element(by=by, value=value)
			
 
				                         find_element = True
			
 
				+                    except:
			
 
				+                        print("No such element found in the iframe")
			
 
				                     # 完成操作后切回主文档
			
 
				                     # super().switch_to.default_content()
			
 
				                     if find_element:
			
 
				-                        return elements
			
 
				-                except:
			
 
				-                    print("No such element found in the iframe")
			
 
				-            if not find_element:
			
 
				-                raise NoSuchElementException
			
 
				-        else:
			
 
				-            return super().find_elements(by=by, value=value)
			
 
				+                        return element
			
 
				+                if not find_element:
			
 
				+                    raise NoSuchElementException
			
 
				+            else:
			
 
				+                return super().find_element(by=by, value=value)
			
 
				+
			
 
				+        def find_elements(self, by=By.ID, value=None, iframe=False):
			
 
				+            # 在这里改变查找元素的行为
			
 
				+            if self.iframe_env:
			
 
				+                super().switch_to.default_content()
			
 
				+                self.iframe_env = False
			
 
				+            if iframe:
			
 
				+                # 获取所有的 iframe
			
 
				+                iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
			
 
				+                find_element = False
			
 
				+                # 遍历所有的 iframe 并点击里面的元素
			
 
				+                for iframe in iframes:
			
 
				+                    # 切换到 iframe
			
 
				+                    try:
			
 
				+                        super().switch_to.default_content()
			
 
				+                        super().switch_to.frame(iframe)
			
 
				+                        self.iframe_env = True
			
 
				+                        # 在 iframe 中查找并点击元素
			
 
				+                        # 在这个例子中，我们查找 XPath 为 '//div[1]' 的元素
			
 
				+                        elements = super().find_elements(by=by, value=value)
			
 
				+                        if len(elements) > 0:
			
 
				+                            find_element = True
			
 
				+                        # 完成操作后切回主文档
			
 
				+                        # super().switch_to.default_content()
			
 
				+                        if find_element:
			
 
				+                            return elements
			
 
				+                    except:
			
 
				+                        print("No such element found in the iframe")
			
 
				+                if not find_element:
			
 
				+                    raise NoSuchElementException
			
 
				+            else:
			
 
				+                return super().find_elements(by=by, value=value)
			
 
				 
			
--- a/ExecuteStage/utils.py
+++ b/ExecuteStage/utils.py
@@ -4,6 +4,7 @@ import csv
 
				 import datetime
			
 
				 import json
			
 
				 import os
			
 
				+import sys
			
 
				 import re
			
 
				 import time
			
 
				 import uuid
			
@@ -219,6 +220,11 @@ class myMySQL:
 
				     def __init__(self, config_file="mysql_config.json"):
			
 
				         # 读取配置文件
			
 
				         try:
			
 
				+            if sys.platform == "darwin":
			
 
				+                if config_file.find("./") >= 0:
			
 
				+                    config_file = config_file.replace("./", "")
			
 
				+                config_file = os.path.expanduser("~/Library/Application Support/EasySpider/" + config_file)
			
 
				+            print("MySQL config file path: ", config_file)
			
 
				             with open(config_file, 'r') as f:
			
 
				                 config = json.load(f)
			
 
				                 host = config["host"]
		`@@ -0,0 +1 @@`
		`+请继续解压.tar.gz文件以使用易采集。`