Jelajahi Sumber

Change keyboard module

naibo 2 tahun lalu
induk
melakukan
ae07c91cf1

+ 26 - 21
.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py

@@ -12,8 +12,8 @@ import sys
 # import base64
 # import hashlib
 import time
-# import keyboard
 import requests
+from urllib.parse import urljoin
 from lxml import etree
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.common.keys import Keys
@@ -39,14 +39,14 @@ import os
 from commandline_config import Config
 import pytesseract
 from PIL import Image
+from pynput.keyboard import Key, Listener
 # import uuid
 from threading import Thread, Event
 from myChrome import MyChrome, MyUCChrome
-from utils import check_pause, download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, write_to_csv, write_to_excel
+from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press, on_release_creator, write_to_csv, write_to_excel
 desired_capabilities = DesiredCapabilities.CHROME
 desired_capabilities["pageLoadStrategy"] = "none"
 
-
 class BrowserThread(Thread):
     def __init__(self, browser_t, id, service, version, event, saveName, config):
         Thread.__init__(self)
@@ -1172,22 +1172,15 @@ class BrowserThread(Thread):
                         continue
                     # p["relativeXPath"] = p["relativeXPath"].lower()
                     # p["relativeXPath"] = lowercase_tags_in_xpath(p["relativeXPath"])
-                    if p["nodeType"] == 2:
-                        if p["relativeXPath"].find("/@href") >= 0:
-                            xpath = p["relativeXPath"]
-                        else:
-                            xpath = p["relativeXPath"] + "/@href"
+                    # 已经有text()或@href了,不需要再加
+                    if p["relativeXPath"].find("/@href") >= 0 or p["relativeXPath"].find("/text()") >= 0 or p["relativeXPath"].find("::text()") >= 0:
+                        xpath = p["relativeXPath"]
+                    elif p["nodeType"] == 2:
+                        xpath = p["relativeXPath"] + "/@href"
                     elif p["contentType"] == 1:
-                        # 已经有text()了,不需要再加
-                        if p["relativeXPath"].find("/text()") >= 0 or p["relativeXPath"].find("::text()") >= 0:
-                            xpath = p["relativeXPath"]
-                        else:
-                            xpath = p["relativeXPath"] + "/text()"
+                        xpath = p["relativeXPath"] + "/text()"
                     elif p["contentType"] == 0:
-                        if p["relativeXPath"].find("/text()") >= 0 or p["relativeXPath"].find("::text()") >= 0:
-                            xpath = p["relativeXPath"]
-                        else:
-                            xpath = p["relativeXPath"] + "//text()"
+                        xpath = p["relativeXPath"] + "//text()"
                     if p["relative"]:
                         # if p["relativeXPath"] == "":
                         #     content = [loopElementHTML]
@@ -1210,6 +1203,9 @@ class BrowserThread(Thread):
                         # 拼接所有文本内容并去掉两边的空白
                         content = ' '.join(result.strip()
                                            for result in content if result.strip())
+                        if p["nodeType"] == 2:
+                            base_url = self.browser.current_url
+                            content = urljoin(base_url, content) # 合并链接相对路径为绝对路径
                     else:
                         content = p["default"]
                         if not self.dataNotFoundKeys[p["name"]]:
@@ -1508,13 +1504,22 @@ if __name__ == '__main__':
         print("Thread with task id: ", i, " is created")
         threads.append(thread)
         thread.start()
-        Thread(target=check_pause, args=("p", event)).start()
-        time.sleep(5)
+        # Set the pause operation
+        # if sys.platform != "linux": 
+        #     Thread(target=check_pause, args=("p", event)).start()
+        # else:
+        time.sleep(3)
         print("\n\n----------------------------------")
-        print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。")
+        print("正在运行任务,按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次按p键。")
         print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
         print("----------------------------------\n\n")
-
+        # 使用监听器监听键盘输入
+        with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener:
+            listener.join()
+            
+        
+        
+	
     for thread in threads:
         thread.join()
 

+ 38 - 17
.temp_to_pub/EasySpider_windows_x64/Code/utils.py

@@ -7,7 +7,7 @@ import os
 import re
 import time
 import uuid
-import keyboard
+# import keyboard
 from openpyxl import Workbook, load_workbook
 import requests
 from urllib.parse import urlparse
@@ -23,21 +23,42 @@ def is_valid_url(url):
 
 def lowercase_tags_in_xpath(xpath):
     return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath)
-
-def check_pause(key, event):
-    while True:
-        if keyboard.is_pressed(key):  # 按下p键,暂停程序
-            if event._flag == False:
-                print("任务执行中,长按p键暂停执行。")
-                print("Task is running, long press 'p' to pause.")
-                # 设置Event的值为True,使得线程b可以继续执行
-                event.set()
-            else:
-                # 设置Event的值为False,使得线程b暂停执行
-                print("任务已暂停,长按p键继续执行...")
-                print("Task paused, press 'p' to continue...")
-                event.clear()
-        time.sleep(1)  # 每秒检查一次
+    
+def on_release_creator(event):
+    def on_release(key):
+        try:
+            if key.char == 'p':  # 当按下esc键时,退出监听
+                if event._flag == False:
+                    print("任务执行中,按p键暂停执行。")
+                    print("Task is running, long press 'p' to pause.")
+                    # 设置Event的值为True,使得线程b可以继续执行
+                    event.set()
+                else:
+                    # 设置Event的值为False,使得线程b暂停执行
+                    print("任务已暂停,按p键继续执行...")
+                    print("Task paused, press 'p' to continue...")
+                    event.clear()
+        except:
+            pass
+    return on_release
+
+def on_press(key):
+    pass
+
+# def check_pause(key, event):
+#     while True:
+#         if keyboard.is_pressed(key):  # 按下p键,暂停程序
+#             if event._flag == False:
+#                 print("任务执行中,长按p键暂停执行。")
+#                 print("Task is running, long press 'p' to pause.")
+#                 # 设置Event的值为True,使得线程b可以继续执行
+#                 event.set()
+#             else:
+#                 # 设置Event的值为False,使得线程b暂停执行
+#                 print("任务已暂停,长按p键继续执行...")
+#                 print("Task paused, press 'p' to continue...")
+#                 event.clear()
+#         time.sleep(1)  # 每秒检查一次
 
 
 def download_image(url, save_directory):
@@ -294,4 +315,4 @@ class myMySQL:
     def close(self):
         self.conn.close()
         print("成功关闭数据库。")
-        print("Successfully closed the database.")
+        print("Successfully closed the database.")

File diff ditekan karena terlalu besar
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/0.json


File diff ditekan karena terlalu besar
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/1.json


File diff ditekan karena terlalu besar
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/2.json


File diff ditekan karena terlalu besar
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/3.json


TEMPAT SAMPAH
ElectronJS/EasySpider_en.crx


TEMPAT SAMPAH
ElectronJS/EasySpider_zh.crx


+ 1 - 1
ExecuteStage/easyspider_executestage.py

@@ -1511,7 +1511,7 @@ if __name__ == '__main__':
         time.sleep(3)
         print("\n\n----------------------------------")
         print("正在运行任务,按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次按p键。")
-        print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
+        print("Running task, press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please press 'p' again.")
         print("----------------------------------\n\n")
         # 使用监听器监听键盘输入
         with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener:

Beberapa file tidak ditampilkan karena terlalu banyak file yang berubah dalam diff ini