1 年之前 · a365783e41
--- a/ElectronJS/tasks/228.json
+++ b/ElectronJS/tasks/228.json
--- a/ExecuteStage/.vscode/launch.json
+++ b/ExecuteStage/.vscode/launch.json
@@ -12,7 +12,7 @@
 
				             "justMyCode": false,
			
 
				             //  "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
			
 
				             // "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
			
 
				-            "args": ["--ids", "[75]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
			
 
				+            "args": ["--ids", "[77, 78]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
			
 
				         "--read_type", "remote"]
			
 
				             // "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
			
 
				         }
			
--- a/ExecuteStage/easyspider_executestage.py
+++ b/ExecuteStage/easyspider_executestage.py
@@ -5,9 +5,10 @@ import copy
 
				 import platform
			
 
				 import shutil
			
 
				 import string
			
 
				+import threading
			
 
				 # import undetected_chromedriver as uc
			
 
				 from utils import detect_optimizable, download_image, extract_text_from_html, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, \
			
 
				-    on_press_creator, on_release_creator, readCode, replace_field_values, send_email, split_text_by_lines, write_to_csv, write_to_excel, write_to_json
			
 
				+    on_press_creator, on_release_creator, readCode, rename_downloaded_file, replace_field_values, send_email, split_text_by_lines, write_to_csv, write_to_excel, write_to_json
			
 
				 from myChrome import MyChrome
			
 
				 from threading import Thread, Event
			
 
				 from PIL import Image
			
@@ -147,8 +148,9 @@ class BrowserThread(Thread):
 
				         self.browser.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
			
 
				         path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(self.id), self.saveName)
			
 
				         self.paramss = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': path}}
			
 
				-
			
 
				-        self.browser.execute("send_command", self.paramss)  # 下载地址改变
			
 
				+        self.browser.execute("send_command", self.paramss)  # 下载目录改变
			
 
				+        monitor_thread = threading.Thread(target=rename_downloaded_file, args=(path, )) #path后面的逗号不能省略，是元组固定写法
			
 
				+        monitor_thread.start()
			
 
				         # self.browser.get('about:blank')
			
 
				         self.procedure = service["graph"]  # 程序执行流程
			
 
				         try:
			
--- a/ExecuteStage/utils.py
+++ b/ExecuteStage/utils.py
@@ -59,41 +59,30 @@ def send_email(config):
 
				             smtp_server.quit()
			
 
				         except:
			
 
				             pass
			
 
				+  
			
 
				+def rename_downloaded_file(download_dir):
			
 
				+    original_files = set(os.listdir(download_dir))
			
 
				 
			
 
				-
			
 
				-def wait_for_download_complete(download_dir, timeout=3600):
			
 
				-    """等待下载完成，直到没有.crdownload文件为止，或者超时"""
			
 
				     while True:
			
 
				-        time.sleep(1)  # 每一秒检查一次
			
 
				-        timeout -= 1
			
 
				-        is_downloading = False
			
 
				-        for fname in os.listdir(download_dir):
			
 
				-            if fname.endswith('.crdownload'):
			
 
				-                is_downloading = True
			
 
				-                break
			
 
				-        # 如果没有下载或超时，则退出
			
 
				-        if not is_downloading or timeout <= 0:
			
 
				-            break
			
 
				-        elif timeout % 10 == 0:
			
 
				-            print(f"下载文件中，请等待...|Downloading in progress, please wait... {timeout} seconds left")
			
 
				-            print("可以在点击元素选项中设置下载超时时间。|You can set the download timeout in the 'Click Element' option.")
			
 
				-    if is_downloading:
			
 
				-        print("下载可能未完成，但已经超时。|Download may not be completed, but it has timed out.")
			
 
				-    else:
			
 
				-        print("下载完成。|Download completed.")
			
 
				+        files = os.listdir(download_dir)
			
 
				+        for file in files:
			
 
				+            if file in original_files:
			
 
				+                continue  # 跳过原始文件和已重命名的文件
			
 
				 
			
 
				-def rename_downloaded_file(download_dir):
			
 
				-    """重命名下载文件，假设是最新下载的文件"""
			
 
				-    files = os.listdir(download_dir)
			
 
				-    paths = [os.path.join(download_dir, basename) for basename in files]
			
 
				-    latest_file = max(paths, key=os.path.getmtime, default=None)
			
 
				-
			
 
				-    if latest_file is not None and not latest_file.endswith('.crdownload'):
			
 
				-        new_name = latest_file.split('/')[-1] + '_' + str(uuid.uuid4()) + '_' + latest_file.split('/')[-1]
			
 
				-        new_path = os.path.join(download_dir, new_name)
			
 
				-        os.rename(latest_file, new_path)
			
 
				-        print(f"文件已重命名为: {new_path}")
			
 
				-        print(f"File has been renamed to: {new_path}")
			
 
				+            full_path = os.path.join(download_dir, file)
			
 
				+
			
 
				+            if not full_path.endswith('.crdownload') and not full_path.endswith('.htm') and not full_path.endswith('.html'):
			
 
				+                new_name = file.split('/')[-1] + '_' + str(uuid.uuid4()) + '_' + file.split('/')[-1]
			
 
				+                new_path = os.path.join(download_dir, new_name)
			
 
				+                try:
			
 
				+                    os.rename(full_path, new_path)
			
 
				+                    original_files.add(new_name)  # 记录新文件名以避免再次重命名
			
 
				+                    print(f"文件已重命名为|File has been renamed to: {new_path}")
			
 
				+                except:
			
 
				+                    print("文件重命名失败|File rename failed")
			
 
				+
			
 
				+        time.sleep(1)  # 每一秒检查一次
			
 
				+        # print("下载文件重命名监控中，请等待...|Download file rename monitoring, please wait...")
			
 
				 
			
 
				 def is_valid_url(url):
			
 
				     try: