|
@@ -15,6 +15,8 @@ import time
|
|
|
import requests
|
|
|
from urllib.parse import urljoin
|
|
|
from lxml import etree
|
|
|
+# import undetected_chromedriver as uc
|
|
|
+from pynput.keyboard import Key, Listener
|
|
|
from selenium.webdriver.chrome.options import Options
|
|
|
from selenium.webdriver.common.keys import Keys
|
|
|
from selenium.webdriver.common.action_chains import ActionChains
|
|
@@ -29,7 +31,6 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
|
|
from selenium.webdriver.support.ui import Select
|
|
|
from selenium.webdriver import ActionChains
|
|
|
from selenium.webdriver.common.by import By
|
|
|
-import undetected_chromedriver as uc
|
|
|
import random
|
|
|
# import pandas as pd
|
|
|
from openpyxl import load_workbook, Workbook
|
|
@@ -41,8 +42,10 @@ import pytesseract
|
|
|
from PIL import Image
|
|
|
# import uuid
|
|
|
from threading import Thread, Event
|
|
|
-from myChrome import MyChrome, MyUCChrome
|
|
|
-from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press, on_release_creator, write_to_csv, write_to_excel
|
|
|
+from myChrome import MyChrome
|
|
|
+if sys.platform != "darwin":
|
|
|
+ from myChrome import MyUCChrome
|
|
|
+from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel
|
|
|
desired_capabilities = DesiredCapabilities.CHROME
|
|
|
desired_capabilities["pageLoadStrategy"] = "none"
|
|
|
|
|
@@ -1326,6 +1329,8 @@ class BrowserThread(Thread):
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
+ # from multiprocessing import freeze_support
|
|
|
+ # freeze_support() # 防止无限死循环多开
|
|
|
config = {
|
|
|
"id": [0],
|
|
|
"saved_file_name": "",
|
|
@@ -1358,6 +1363,9 @@ if __name__ == '__main__':
|
|
|
# option.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome"
|
|
|
# driver_path = os.getcwd()+ "/chromedriver_mac64"
|
|
|
print(driver_path)
|
|
|
+ if c.config_folder == "":
|
|
|
+ c.config_folder = os.path.expanduser("~/Library/Application Support/EasySpider/")
|
|
|
+ # print("Config folder for MacOS:", c.config_folder)
|
|
|
elif os.path.exists(os.getcwd()+"/EasySpider/resources"): # 打包后的路径
|
|
|
print("Finding chromedriver in EasySpider",
|
|
|
os.getcwd()+"/EasySpider")
|
|
@@ -1367,16 +1375,19 @@ if __name__ == '__main__':
|
|
|
driver_path = os.path.join(
|
|
|
os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe")
|
|
|
option.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
|
|
+ options.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
|
|
elif sys.platform == "win32" and platform.architecture()[0] == "64bit":
|
|
|
options.binary_location = os.path.join(
|
|
|
os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
|
|
|
driver_path = os.path.join(
|
|
|
os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe")
|
|
|
option.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
|
|
+ options.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
|
|
elif sys.platform == "linux" and platform.architecture()[0] == "64bit":
|
|
|
options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
|
|
|
driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64"
|
|
|
option.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
|
|
+ options.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
|
|
else:
|
|
|
print("Unsupported platform")
|
|
|
sys.exit()
|
|
@@ -1419,6 +1430,7 @@ if __name__ == '__main__':
|
|
|
try:
|
|
|
with open(c.config_folder + c.config_file_name, "r", encoding='utf-8') as f:
|
|
|
config = json.load(f)
|
|
|
+ print("Config file path: " + c.config_folder + c.config_file_name)
|
|
|
absolute_user_data_folder = config["absolute_user_data_folder"]
|
|
|
print("\nAbsolute_user_data_folder:",
|
|
|
absolute_user_data_folder, "\n")
|
|
@@ -1428,6 +1440,9 @@ if __name__ == '__main__':
|
|
|
option.add_argument(
|
|
|
f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒
|
|
|
option.add_argument("--profile-directory=Default")
|
|
|
+ options.add_argument(
|
|
|
+ f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒
|
|
|
+ options.add_argument("--profile-directory=Default")
|
|
|
|
|
|
if c.headless:
|
|
|
print("Headless mode")
|
|
@@ -1444,7 +1459,7 @@ if __name__ == '__main__':
|
|
|
|
|
|
threads = []
|
|
|
for i in c.id:
|
|
|
- print(options)
|
|
|
+ # print(options)
|
|
|
print("id: ", i)
|
|
|
if c.read_type == "remote":
|
|
|
print("remote")
|
|
@@ -1492,10 +1507,15 @@ if __name__ == '__main__':
|
|
|
browser_t = MyChrome(
|
|
|
options=options, chrome_options=option, executable_path=driver_path)
|
|
|
elif cloudflare == 1:
|
|
|
- browser_t = MyUCChrome(
|
|
|
- options=options, chrome_options=option, executable_path=driver_path)
|
|
|
- print("Pass Cloudflare Mode")
|
|
|
- print("过Cloudflare验证模式")
|
|
|
+ if sys.platform != "darwin":
|
|
|
+ browser_t = MyUCChrome(
|
|
|
+ options=options, chrome_options=option, driver_executable_path=driver_path)
|
|
|
+ print("Pass Cloudflare Mode")
|
|
|
+ print("过Cloudflare验证模式")
|
|
|
+ else:
|
|
|
+ print("Not support Cloudflare Mode on MacOS")
|
|
|
+ print("MacOS不支持Cloudflare验证模式")
|
|
|
+ sys.exit()
|
|
|
event = Event()
|
|
|
event.set()
|
|
|
thread = BrowserThread(browser_t, i, service,
|
|
@@ -1505,26 +1525,33 @@ if __name__ == '__main__':
|
|
|
thread.start()
|
|
|
# Set the pause operation
|
|
|
# if sys.platform != "linux":
|
|
|
+ # time.sleep(3)
|
|
|
+ # print("\n\n----------------------------------")
|
|
|
+ # print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。")
|
|
|
+ # print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
|
|
|
+ # print("----------------------------------\n\n")
|
|
|
# Thread(target=check_pause, args=("p", event)).start()
|
|
|
# else:
|
|
|
time.sleep(3)
|
|
|
+ press_time = {"duration": 0, "is_pressed": False}
|
|
|
print("\n\n----------------------------------")
|
|
|
- print("正在运行任务,按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次按p键。")
|
|
|
- print("Running task, press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please press 'p' again.")
|
|
|
+ print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。")
|
|
|
+ print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
|
|
|
print("----------------------------------\n\n")
|
|
|
# 使用监听器监听键盘输入
|
|
|
try:
|
|
|
- from pynput.keyboard import Key, Listener
|
|
|
- with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener:
|
|
|
+ with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener:
|
|
|
listener.join()
|
|
|
except:
|
|
|
- print("您的操作系统不支持暂停功能。")
|
|
|
- print("Your operating system does not support the pause function.")
|
|
|
+ pass
|
|
|
+ # print("您的操作系统不支持暂停功能。")
|
|
|
+ # print("Your operating system does not support the pause function.")
|
|
|
|
|
|
|
|
|
-
|
|
|
+ # print("线程长度:", len(threads) )
|
|
|
|
|
|
for thread in threads:
|
|
|
+ print()
|
|
|
thread.join()
|
|
|
|
|
|
for thread in threads:
|