Browse Source

Auto Rename Download Files

naibo 1 year ago
parent
commit
a971b52d38

File diff suppressed because it is too large
+ 0 - 0
ElectronJS/tasks/228.json


+ 1 - 1
ExecuteStage/.vscode/launch.json

@@ -12,7 +12,7 @@
             "justMyCode": false,
             //  "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
             // "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
-            "args": ["--ids", "[77, 78]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
+            "args": ["--ids", "[79]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
         "--read_type", "remote"]
             // "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
         }

+ 11 - 12
ExecuteStage/easyspider_executestage.py

@@ -116,7 +116,10 @@ class BrowserThread(Thread):
         self.downloadFolder = "Data/Task_" + str(id) + "/" + self.saveName
         if not os.path.exists(self.downloadFolder):
             os.mkdir(self.downloadFolder)  # 创建保存文件夹用来保存截图和文件
-        self.existing_files = sorted([os.path.join(self.downloadFolder, file) for file in os.listdir(self.downloadFolder)], key=os.path.getmtime)
+        if not os.path.exists(self.downloadFolder + "/files"):
+            os.mkdir(self.downloadFolder + "/files")
+        if not os.path.exists(self.downloadFolder + "/images"):
+            os.mkdir(self.downloadFolder + "/images")
         self.getDataStep = 0
         self.startSteps = 0
         try:
@@ -146,11 +149,12 @@ class BrowserThread(Thread):
             'source': js})  # TMALL 反扒
         WebDriverWait(self.browser, 10)
         self.browser.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
-        path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(self.id), self.saveName)
+        path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(self.id), self.saveName, "files")
         self.paramss = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': path}}
         self.browser.execute("send_command", self.paramss)  # 下载目录改变
-        monitor_thread = threading.Thread(target=rename_downloaded_file, args=(path, )) #path后面的逗号不能省略,是元组固定写法
-        monitor_thread.start()
+        self.monitor_event = threading.Event()
+        self.monitor_thread = threading.Thread(target=rename_downloaded_file, args=(path, self.monitor_event)) #path后面的逗号不能省略,是元组固定写法
+        self.monitor_thread.start()
         # self.browser.get('about:blank')
         self.procedure = service["graph"]  # 程序执行流程
         try:
@@ -554,6 +558,7 @@ class BrowserThread(Thread):
             shutil.rmtree(self.option["tmp_user_data_folder"])
         except:
             pass
+        self.monitor_event.set()
         self.print_and_log("清理完成!|Clean up completed!")
         self.print_and_log("您现在可以安全的关闭此窗口了。|You can safely close this window now.")
         
@@ -1811,12 +1816,6 @@ class BrowserThread(Thread):
                 self.history["index"] = 0
         self.scrollDown(param)  # 根据参数配置向下滚动
 
-        # 处理文件变化,新下载
-        files = os.listdir(self.downloadFolder)
-        latest_file = files[-1]
-        self.existing_files = files
-        # rt.end()
-
     def get_content(self, p, element):
         content = ""
         if p["contentType"] == 0:
@@ -1842,7 +1841,7 @@ class BrowserThread(Thread):
                     downloadPic = 0
                 if downloadPic == 1:
                     download_image(self, content, "Data/Task_" +
-                                   str(self.id) + "/" + self.saveName + "/", element)
+                                   str(self.id) + "/" + self.saveName + "/images", element)
             else:  # 普通节点
                 if p["splitLine"] == 1:
                     text = extract_text_from_html(element.get_attribute('outerHTML'))
@@ -1871,7 +1870,7 @@ class BrowserThread(Thread):
                     downloadPic = 0
                 if downloadPic == 1:
                     download_image(self, content, "Data/Task_" +
-                                   str(self.id) + "/" + self.saveName + "/", element)
+                                   str(self.id) + "/" + self.saveName + "/images", element)
             else:
                 command = 'var arr = [];\
                 var content = arguments[0];\

+ 5 - 4
ExecuteStage/utils.py

@@ -60,10 +60,10 @@ def send_email(config):
         except:
             pass
   
-def rename_downloaded_file(download_dir):
+def rename_downloaded_file(download_dir, stop_event):
     original_files = set(os.listdir(download_dir))
 
-    while True:
+    while not stop_event.is_set():
         files = os.listdir(download_dir)
         for file in files:
             if file in original_files:
@@ -71,8 +71,8 @@ def rename_downloaded_file(download_dir):
 
             full_path = os.path.join(download_dir, file)
 
-            if not full_path.endswith('.crdownload') and not full_path.endswith('.htm') and not full_path.endswith('.html'):
-                new_name = file.split('/')[-1] + '_' + str(uuid.uuid4()) + '_' + file.split('/')[-1]
+            if not full_path.endswith('.crdownload') and not full_path.endswith('.htm') and not full_path.endswith('.html') and not full_path.startswith('esfile_'):
+                new_name = "esfile_" + file.split('/')[-1] + '_' + str(uuid.uuid4()) + '_' + file.split('/')[-1]
                 new_path = os.path.join(download_dir, new_name)
                 try:
                     os.rename(full_path, new_path)
@@ -83,6 +83,7 @@ def rename_downloaded_file(download_dir):
 
         time.sleep(1)  # 每一秒检查一次
         # print("下载文件重命名监控中,请等待...|Download file rename monitoring, please wait...")
+    print("下载文件重命名监控已停止。|Download file rename monitoring has stopped.")
 
 def is_valid_url(url):
     try:

Some files were not shown because too many files changed in this diff