瀏覽代碼

Auto Rename Download Files

naibo 1 年之前
父節點
當前提交
a365783e41
共有 4 個文件被更改,包括 27 次插入36 次删除
  1. 0 0
      ElectronJS/tasks/228.json
  2. 1 1
      ExecuteStage/.vscode/launch.json
  3. 5 3
      ExecuteStage/easyspider_executestage.py
  4. 21 32
      ExecuteStage/utils.py

文件差異過大導致無法顯示
+ 0 - 0
ElectronJS/tasks/228.json


+ 1 - 1
ExecuteStage/.vscode/launch.json

@@ -12,7 +12,7 @@
             "justMyCode": false,
             //  "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
             // "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
-            "args": ["--ids", "[75]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
+            "args": ["--ids", "[77, 78]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
         "--read_type", "remote"]
             // "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
         }

+ 5 - 3
ExecuteStage/easyspider_executestage.py

@@ -5,9 +5,10 @@ import copy
 import platform
 import shutil
 import string
+import threading
 # import undetected_chromedriver as uc
 from utils import detect_optimizable, download_image, extract_text_from_html, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, \
-    on_press_creator, on_release_creator, readCode, replace_field_values, send_email, split_text_by_lines, write_to_csv, write_to_excel, write_to_json
+    on_press_creator, on_release_creator, readCode, rename_downloaded_file, replace_field_values, send_email, split_text_by_lines, write_to_csv, write_to_excel, write_to_json
 from myChrome import MyChrome
 from threading import Thread, Event
 from PIL import Image
@@ -147,8 +148,9 @@ class BrowserThread(Thread):
         self.browser.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
         path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(self.id), self.saveName)
         self.paramss = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': path}}
-
-        self.browser.execute("send_command", self.paramss)  # 下载地址改变
+        self.browser.execute("send_command", self.paramss)  # 下载目录改变
+        monitor_thread = threading.Thread(target=rename_downloaded_file, args=(path, )) #path后面的逗号不能省略,是元组固定写法
+        monitor_thread.start()
         # self.browser.get('about:blank')
         self.procedure = service["graph"]  # 程序执行流程
         try:

+ 21 - 32
ExecuteStage/utils.py

@@ -59,41 +59,30 @@ def send_email(config):
             smtp_server.quit()
         except:
             pass
+  
+def rename_downloaded_file(download_dir):
+    original_files = set(os.listdir(download_dir))
 
-
-def wait_for_download_complete(download_dir, timeout=3600):
-    """等待下载完成,直到没有.crdownload文件为止,或者超时"""
     while True:
-        time.sleep(1)  # 每一秒检查一次
-        timeout -= 1
-        is_downloading = False
-        for fname in os.listdir(download_dir):
-            if fname.endswith('.crdownload'):
-                is_downloading = True
-                break
-        # 如果没有下载或超时,则退出
-        if not is_downloading or timeout <= 0:
-            break
-        elif timeout % 10 == 0:
-            print(f"下载文件中,请等待...|Downloading in progress, please wait... {timeout} seconds left")
-            print("可以在点击元素选项中设置下载超时时间。|You can set the download timeout in the 'Click Element' option.")
-    if is_downloading:
-        print("下载可能未完成,但已经超时。|Download may not be completed, but it has timed out.")
-    else:
-        print("下载完成。|Download completed.")
+        files = os.listdir(download_dir)
+        for file in files:
+            if file in original_files:
+                continue  # 跳过原始文件和已重命名的文件
 
-def rename_downloaded_file(download_dir):
-    """重命名下载文件,假设是最新下载的文件"""
-    files = os.listdir(download_dir)
-    paths = [os.path.join(download_dir, basename) for basename in files]
-    latest_file = max(paths, key=os.path.getmtime, default=None)
-
-    if latest_file is not None and not latest_file.endswith('.crdownload'):
-        new_name = latest_file.split('/')[-1] + '_' + str(uuid.uuid4()) + '_' + latest_file.split('/')[-1]
-        new_path = os.path.join(download_dir, new_name)
-        os.rename(latest_file, new_path)
-        print(f"文件已重命名为: {new_path}")
-        print(f"File has been renamed to: {new_path}")
+            full_path = os.path.join(download_dir, file)
+
+            if not full_path.endswith('.crdownload') and not full_path.endswith('.htm') and not full_path.endswith('.html'):
+                new_name = file.split('/')[-1] + '_' + str(uuid.uuid4()) + '_' + file.split('/')[-1]
+                new_path = os.path.join(download_dir, new_name)
+                try:
+                    os.rename(full_path, new_path)
+                    original_files.add(new_name)  # 记录新文件名以避免再次重命名
+                    print(f"文件已重命名为|File has been renamed to: {new_path}")
+                except:
+                    print("文件重命名失败|File rename failed")
+
+        time.sleep(1)  # 每一秒检查一次
+        # print("下载文件重命名监控中,请等待...|Download file rename monitoring, please wait...")
 
 def is_valid_url(url):
     try:

部分文件因文件數量過多而無法顯示