преди 2 години · 528ae7a132
--- a/ElectronJS/src/taskGrid/FlowChart.html
+++ b/ElectronJS/src/taskGrid/FlowChart.html
@@ -639,6 +639,7 @@ If the expression returns a value greater than 0 or evaluates to True, the opera
 
				                         <option value = "xlsx">XLSX (EXCEL, note that a single Excel cell can save up to 32767 characters)</option>
			
 
				                         <option value = "csv">CSV</option>
			
 
				                         <option value = "txt">TXT</option>
			
 
				+                        <option value = "json">JSON</option>
			
 
				                         <option value = "mysql">MySQL Database</option>
			
 
				                     </select>
			
 
				                     <label>Export File Name/Database Table Name (Can use ../ to represent relative path to change the file save location,the keyword "current_time" will be replaced with the timestamp when the task is executed):</label>
			
--- a/ElectronJS/src/taskGrid/FlowChart_CN.html
+++ b/ElectronJS/src/taskGrid/FlowChart_CN.html
@@ -640,6 +640,7 @@ print(emotlib.emoji()) # 使用其中的函数。
 
				                         <option value = "xlsx">XLSX（即EXCEL文件，注意Excel单个单元格最多可存储32767字符）</option>
			
 
				                         <option value = "csv">CSV</option>
			
 
				                         <option value = "txt">TXT</option>
			
 
				+                        <option value = "json">JSON</option>
			
 
				                         <option value = "mysql">MySQL数据库</option>
			
 
				                     </select>
			
 
				                     <label>导出文件名/数据库表格名称（可使用../表示相对路径以改变文件保存位置，名称中的“current_time”会被替换为执行任务时的时间戳）：</label>
			
--- a/ElectronJS/tasks/112.json
+++ b/ElectronJS/tasks/112.json
--- a/ElectronJS/tasks/200.json
+++ b/ElectronJS/tasks/200.json
--- a/ElectronJS/tasks/201.json
+++ b/ElectronJS/tasks/201.json
--- a/ExecuteStage/.vscode/launch.json
+++ b/ExecuteStage/.vscode/launch.json
@@ -12,7 +12,7 @@
 
				             "justMyCode": false,
			
 
				             //  "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
			
 
				             // "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
			
 
				-            "args": ["--id", "[8]", "--headless", "0", "--user_data", "1", "--keyboard", "0"]
			
 
				+            "args": ["--id", "[16]", "--headless", "0", "--user_data", "1", "--keyboard", "0"]
			
 
				         }
			
 
				     ]
			
 
				 }
			
--- a/ExecuteStage/easyspider_executestage.py
+++ b/ExecuteStage/easyspider_executestage.py
@@ -1,6 +1,6 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 # import atexit
			
 
				-from utils import download_image, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, replace_field_values, write_to_csv, write_to_excel
			
 
				+from utils import download_image, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, replace_field_values, write_to_csv, write_to_excel, write_to_json
			
 
				 from myChrome import MyChrome
			
 
				 from threading import Thread, Event
			
 
				 from PIL import Image
			
@@ -152,27 +152,24 @@ class BrowserThread(Thread):
 
				             filter(isnotnull, service["links"].split("\n")))  # 要执行的link的列表
			
 
				         self.OUTPUT = []  # 采集的数据
			
 
				         self.writeMode = 1  # 写入模式，0为新建，1为追加
			
 
				-        if self.outputFormat == "csv" or self.outputFormat == "txt":
			
 
				+        if self.outputFormat == "csv" or self.outputFormat == "txt" or self.outputFormat == "xlsx":
			
 
				             if not os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat):
			
 
				                 self.OUTPUT.append([])  # 添加表头
			
 
				                 self.writeMode = 0
			
 
				-        elif self.outputFormat == "xlsx":
			
 
				-            if not os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.xlsx'):
			
 
				-                self.OUTPUT.append([])  # 添加表头
			
 
				-                self.writeMode = 0
			
 
				+        elif self.outputFormat == "json":
			
 
				+            self.writeMode = 3 # JSON模式无需判断是否存在文件
			
 
				         elif self.outputFormat == "mysql":
			
 
				             self.mysql = myMySQL(config["mysql_config_path"])
			
 
				             self.mysql.create_table(self.saveName, service["outputParameters"])
			
 
				             self.writeMode = 2
			
 
				-        if self.writeMode == 1:
			
 
				-            self.print_and_log("追加模式")
			
 
				-            self.print_and_log("Append Mode")
			
 
				-        elif self.writeMode == 0:
			
 
				-            self.print_and_log("新建模式")
			
 
				-            self.print_and_log("New Mode")
			
 
				+        if self.writeMode == 0:
			
 
				+            self.print_and_log("新建模式|Create Mode")
			
 
				+        elif self.writeMode == 1:
			
 
				+            self.print_and_log("追加模式|Append Mode")
			
 
				         elif self.writeMode == 2:
			
 
				-            self.print_and_log("MySQL模式")
			
 
				-            self.print_and_log("MySQL Mode")
			
 
				+            self.print_and_log("MySQL模式|MySQL Mode")
			
 
				+        elif self.writeMode == 3:
			
 
				+            self.print_and_log("JSON模式|JSON Mode")
			
 
				         self.containJudge = service["containJudge"]  # 是否含有判断语句
			
 
				         self.outputParameters = {}
			
 
				         self.service = service
			
@@ -401,6 +398,10 @@ class BrowserThread(Thread):
 
				                     str(self.id) + "/" + self.saveName + '.xlsx'
			
 
				                 write_to_excel(
			
 
				                     file_name, self.OUTPUT, self.outputParametersTypes, self.outputParametersRecord)
			
 
				+            elif self.outputFormat == "json":
			
 
				+                file_name = "Data/Task_" + \
			
 
				+                    str(self.id) + "/" + self.saveName + '.json'
			
 
				+                write_to_json(file_name, self.OUTPUT, self.outputParametersTypes, self.outputParametersRecord, self.outputParameters.keys())
			
 
				             elif self.outputFormat == "mysql":
			
 
				                 self.mysql.write_to_mysql(
			
 
				                     self.OUTPUT, self.outputParametersRecord, self.outputParametersTypes)
			
@@ -1395,7 +1396,7 @@ class BrowserThread(Thread):
 
				                 except:
			
 
				                     downloadPic = 0
			
 
				                 if downloadPic == 1:
			
 
				-                    download_image(content, "Data/Task_" +
			
 
				+                    download_image(self, content, "Data/Task_" +
			
 
				                                    str(self.id) + "/" + self.saveName + "/")
			
 
				             else:  # 普通节点
			
 
				                 content = element.text
			
@@ -1420,7 +1421,7 @@ class BrowserThread(Thread):
 
				                 except:
			
 
				                     downloadPic = 0
			
 
				                 if downloadPic == 1:
			
 
				-                    download_image(content, "Data/Task_" +
			
 
				+                    download_image(self, content, "Data/Task_" +
			
 
				                                    str(self.id) + "/" + self.saveName + "/")
			
 
				             else:
			
 
				                 command = 'var arr = [];\
			
--- a/ExecuteStage/utils.py
+++ b/ExecuteStage/utils.py
@@ -95,7 +95,7 @@ def on_release_creator(event, press_time):
 
				 #         time.sleep(1)  # 每秒检查一次
			
 
				 
			
 
				 
			
 
				-def download_image(url, save_directory):
			
 
				+def download_image(browser, url, save_directory):
			
 
				     # 定义浏览器头信息
			
 
				     headers = {
			
 
				         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
			
@@ -120,15 +120,15 @@ def download_image(url, save_directory):
 
				             with open(save_path, 'wb') as file:
			
 
				                 file.write(response.content)
			
 
				 
			
 
				-            print("图片已成功下载到:", save_path)
			
 
				-            print("The image has been successfully downloaded to:", save_path)
			
 
				+            browser.print_and_log("图片已成功下载到:", save_path)
			
 
				+            browser.print_and_log("The image has been successfully downloaded to:", save_path)
			
 
				         else:
			
 
				-            print("下载图片失败，请检查此图片链接是否有效:", url)
			
 
				-            print(
			
 
				+            browser.print_and_log("下载图片失败，请检查此图片链接是否有效:", url)
			
 
				+            browser.print_and_log(
			
 
				                 "Failed to download image, please check if this image link is valid:", url)
			
 
				     else:
			
 
				-        print("下载图片失败，请检查此图片链接是否有效:", url)
			
 
				-        print("Failed to download image, please check if this image link is valid:", url)
			
 
				+        browser.print_and_log("下载图片失败，请检查此图片链接是否有效:", url)
			
 
				+        browser.print_and_log("Failed to download image, please check if this image link is valid:", url)
			
 
				 
			
 
				 
			
 
				 def get_output_code(output):
			
@@ -182,6 +182,41 @@ def replace_field_values(orginal_text, outputParameters):
 
				     return replaced_text
			
 
				 
			
 
				 
			
 
				+def write_to_json(file_name, data, types, record, keys):
			
 
				+    keys = list(keys)
			
 
				+    # Prepare empty list for data
			
 
				+    data_to_write = []
			
 
				+    # Tranform data and append to list
			
 
				+    for line in data:
			
 
				+        to_write = {}
			
 
				+        for i in range(len(line)):
			
 
				+            if types[i] == "int" or types[i] == "bigInt":
			
 
				+                try:
			
 
				+                    line[i] = int(line[i])
			
 
				+                except:
			
 
				+                    line[i] = 0
			
 
				+            elif types[i] == "double":
			
 
				+                try:
			
 
				+                    line[i] = float(line[i])
			
 
				+                except:
			
 
				+                    line[i] = 0.0
			
 
				+            if record[i]:
			
 
				+                 to_write.update({keys[i]: line[i]})
			
 
				+        data_to_write.append(to_write)
			
 
				+    
			
 
				+    try:
			
 
				+        # read data from JSON
			
 
				+        with open(file_name, 'r', encoding='utf-8') as f:
			
 
				+            json_data = json.load(f)
			
 
				+    except:
			
 
				+        json_data = []
			
 
				+
			
 
				+    json_data.extend(data_to_write)
			
 
				+    
			
 
				+    # write data to JSON
			
 
				+    with open(file_name, 'w', encoding='utf-8') as f:
			
 
				+        json.dump(json_data, f, ensure_ascii=False)
			
 
				+
			
 
				 def write_to_excel(file_name, data, types, record):
			
 
				     first = False
			
 
				     if os.path.exists(file_name):