Просмотр исходного кода

Merge pull request #882 from yan-xiaoo/remote-access-dev

Support remote control with backend api
Naibo Wang 3 недель назад
Родитель
Сommit
a7fde17f71

+ 8 - 0
ElectronJS/main.js

@@ -115,6 +115,8 @@ let socket_window = null;
 let socket_start = null;
 let socket_flowchart = null;
 let socket_popup = null;
+// 存储后端(server.js)沟通的 socket
+let socket_backend = null;
 let invoke_window = null;
 
 // var ffi = require('ffi-napi');
@@ -1292,19 +1294,25 @@ wss.on("connection", function (ws) {
             } else if (msg.message.id == 3) {
                 socket_popup = ws;
                 console.log("set socket_popup at time: ", new Date());
+            } else if (msg.message.id == 4) {
+                socket_backend = ws;
+                console.log("set socket_backend at time: ", new Date());
             } else {
                 //其他的ID是用来标识不同的浏览器标签页的
                 // await new Promise(resolve => setTimeout(resolve, 200));
                 let handles = await driver.getAllWindowHandles();
+                // 获得所有位于新的 handles 而不存在于 old_handles 中的 handles,即获得所有新创建页面的句柄
                 if (arrayDifference(handles, old_handles).length > 0) {
                     old_handles = handles;
                     current_handle = handles[handles.length - 1];
+                    // 切换到新增加的窗口
                     await driver.switchTo().window(current_handle);
                     console.log(
                         "New tab opened, change current_handle to: ",
                         current_handle
                     );
                     // 调整浏览器窗口大小,不然扩展会白屏
+                    // 先调整为当前窗口大小加10px,然后再调整回原来的大小
                     let size = await driver.manage().window().getRect();
                     let width = size.width;
                     let height = size.height;

+ 19 - 6
ElectronJS/package-lock.json

@@ -10,6 +10,7 @@
             "license": "AGPL-3.0",
             "dependencies": {
                 "cors": "^2.8.5",
+                "cross-env": "^7.0.3",
                 "electron-squirrel-startup": "^1.0.0",
                 "express": "^4.21.2",
                 "formidable": "^3.5.0",
@@ -1774,9 +1775,26 @@
                 "node": ">=0.8"
             }
         },
+        "node_modules/cross-env": {
+            "version": "7.0.3",
+            "resolved": "https://registry.npmjs.org/cross-env/-/cross-env-7.0.3.tgz",
+            "integrity": "sha512-+/HKd6EgcQCJGh2PSjZuUitQBQynKor4wrFbRg4DtAgS1aWO+gU52xpH7M9ScGgXSYmAVS9bIJ8EzuaGw0oNAw==",
+            "license": "MIT",
+            "dependencies": {
+                "cross-spawn": "^7.0.1"
+            },
+            "bin": {
+                "cross-env": "src/bin/cross-env.js",
+                "cross-env-shell": "src/bin/cross-env-shell.js"
+            },
+            "engines": {
+                "node": ">=10.14",
+                "npm": ">=6",
+                "yarn": ">=1"
+            }
+        },
         "node_modules/cross-spawn": {
             "version": "7.0.3",
-            "dev": true,
             "license": "MIT",
             "dependencies": {
                 "path-key": "^3.1.0",
@@ -3422,7 +3440,6 @@
         },
         "node_modules/isexe": {
             "version": "2.0.0",
-            "dev": true,
             "license": "ISC"
         },
         "node_modules/jackspeak": {
@@ -4391,7 +4408,6 @@
         },
         "node_modules/path-key": {
             "version": "3.1.1",
-            "dev": true,
             "license": "MIT",
             "engines": {
                 "node": ">=8"
@@ -5076,7 +5092,6 @@
         },
         "node_modules/shebang-command": {
             "version": "2.0.0",
-            "dev": true,
             "license": "MIT",
             "dependencies": {
                 "shebang-regex": "^3.0.0"
@@ -5087,7 +5102,6 @@
         },
         "node_modules/shebang-regex": {
             "version": "3.0.0",
-            "dev": true,
             "license": "MIT",
             "engines": {
                 "node": ">=8"
@@ -5657,7 +5671,6 @@
         },
         "node_modules/which": {
             "version": "2.0.2",
-            "dev": true,
             "license": "ISC",
             "dependencies": {
                 "isexe": "^2.0.0"

+ 2 - 1
ElectronJS/package.json

@@ -6,7 +6,7 @@
     "description": "NoCode Visual Web Crawler",
     "main": "main.js",
     "scripts": {
-        "start_direct": "electron .",
+        "start_direct": "cross-env NODE_ENV=development electron .",
         "change_version": "node change_version.js",
         "start": "electron-forge start",
         "package": "electron-forge package",
@@ -32,6 +32,7 @@
     "repository": "https://github.com/NaiboWang/EasySpider",
     "dependencies": {
         "cors": "^2.8.5",
+        "cross-env": "^7.0.3",
         "electron-squirrel-startup": "^1.0.0",
         "express": "^4.21.2",
         "formidable": "^3.5.0",

+ 452 - 43
ElectronJS/server.js

@@ -9,6 +9,9 @@ const formidable = require("formidable");
 const express = require("express");
 const multer = require("multer");
 const cors = require("cors");
+const { param } = require("express/lib/router");
+const http_request = require('http'); // 用于发送请求
+const { generateKey } = require("crypto");
 
 function travel(dir, callback) {
   fs.readdirSync(dir).forEach((file) => {
@@ -76,6 +79,15 @@ if (!fs.existsSync(path.join(getDir(), "config.json"))) {
   );
 }
 
+let child_processes = {};
+let child_logs = {};
+
+let config = fs.readFileSync(
+    path.join(getDir(), `config.json`),
+    "utf8"
+);
+config = JSON.parse(config);
+
 exports.getDir = getDir;
 exports.getEasySpiderLocation = getEasySpiderLocation;
 FileMimes = JSON.parse(
@@ -115,6 +127,77 @@ fileServer.listen(8075, () => {
   console.log("Server listening on http://localhost:8075");
 });
 
+
+/**
+ * Write single data and success header to a response and end the response.
+ * @param {Response} res default response object
+ * @param {any} data response data
+ * @param {number} statusCode response status code
+ * @param {string} contentType response content type
+ */
+function writeAndEnd(res, data, statusCode = 200, contentType = 'application/json') {
+  res.writeHead(statusCode, { 'Content-Type': contentType });
+  res.write(data);
+  res.end();
+}
+
+
+/**
+ * Write a success response with JSON content type.
+ * @param {Response} res default response object
+ * @param {any} data response data
+ * @param {string} successMessage success message(optional)
+ */
+function writeSuccess(res, data, successMessage = "") {
+  // Write a success response with JSON content type
+  writeAndEnd(res, JSON.stringify({ success: successMessage, status: true, ...data}), 200, 'application/json');
+}
+
+
+/**
+ * Write an error response with JSON content type.
+ * @param {Response} res default response object
+ * @param {number} errorCode error code
+ * @param {string} errorMessage error message(optional)
+ */
+function writeError(res, errorCode, errorMessage="Internal Server Error") {
+  // Write an error response with JSON content type
+  writeAndEnd(res, JSON.stringify({ error: errorMessage, status: false }), errorCode, 'application/json');
+}
+
+function generateUuid() {
+  var s = [];
+  var hexDigits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+  for (var i = 0; i < 36; i++) {
+    s[i] = hexDigits.substr(Math.floor(Math.random() * 0x10), 1)
+  }
+  s[14] = "4"
+  s[19] = hexDigits.substr((s[19] & 0x3) | 0x8, 1)
+  s[8] = s[13] = s[18] = s[23] = "-"
+  let uuid = s.join("")
+  return uuid
+}
+
+// When error occurs in the handler, it will be caught and logged, and a 500 response will be sent if headers have not been sent yet.
+// This is useful to prevent the server from crashing due to unhandled exceptions in the request handlers
+function safeHandler(handler, res) {
+  return (...args) => {
+    try {
+      handler(...args);
+    } catch (err) {
+      console.error("Error handling request:", err);
+      if (!res.headersSent) {
+        res.writeHead(500, { 'Content-Type': 'text/plain' });
+        if (process.env.NODE_ENV === 'development') {
+          res.end(`Internal Server Error: \n${err.stack}`);
+        } else {
+          res.end("Internal Server Error");
+        }
+      }
+    }
+  };
+}
+
 exports.start = function (port = 8074) {
   http
     .createServer(function (req, res) {
@@ -143,9 +226,6 @@ exports.start = function (port = 8074) {
         //     console.log(data);
         //     res.end('File uploaded and read successfully.');
         // });
-      } else if (pathName.indexOf(".") < 0) {
-        //如果没有后缀名, 则为后台请求
-        res.writeHead(200, { "Content-Type": "application/json" });
       }
       // else if(pathName.indexOf("index.html") >= 0) {
       //     fs.readFile(path.join(__dirname,"src", pathName), async (err, data) => {
@@ -164,7 +244,7 @@ exports.start = function (port = 8074) {
       //         }
       //     })
       // }
-      else {
+      else if (pathName.indexOf(".") >= 0) {
         //如果有后缀名, 则为前端请求
         // console.log(path.join(__dirname,"src/taskGrid", pathName));
         const filePath = safeJoin(safeBase, pathName);
@@ -200,7 +280,7 @@ exports.start = function (port = 8074) {
       req.on("data", function (chunk) {
         body += chunk;
       });
-      req.on("end", function () {
+      req.on("end", safeHandler(() => {
         // 设置响应头部信息及编码
         if (pathName == "/queryTasks") {
           //查询所有服务信息,只包括id和服务名称
@@ -226,9 +306,12 @@ exports.start = function (port = 8074) {
             }
           });
           output.sort(compare("mtime"));
+          // 只修改外层为 {} 的响应增加 status 字段,其他响应不变,否则就和之前不兼容了
+          res.writeHead(200, { "Content-Type": "application/json" });
           res.write(JSON.stringify(output));
           res.end();
         } else if (pathName == "/queryOSVersion") {
+          res.writeHead(200, { "Content-Type": "application/json" });
           res.write(
             JSON.stringify({ version: process.platform, bit: process.arch })
           );
@@ -252,6 +335,7 @@ exports.start = function (port = 8074) {
               }
             }
           );
+          res.writeHead(200, { "Content-Type": "application/json" });
           res.write(JSON.stringify(output));
           res.end();
         } else if (pathName == "/queryTask") {
@@ -263,15 +347,11 @@ exports.start = function (port = 8074) {
               "utf8"
             );
             // parse JSON string to JSON object
+            res.writeHead(200, { "Content-Type": "application/json" });
             res.write(data);
             res.end();
           } catch (error) {
-            res.write(
-              JSON.stringify({
-                error: "Cannot find task based on specified task ID.",
-              })
-            );
-            res.end();
+            writeError(res, 404, "Cannot find task based on specified task ID.");
           }
         } else if (pathName == "/queryExecutionInstance") {
           let params = url.parse(req.url, true).query;
@@ -282,18 +362,14 @@ exports.start = function (port = 8074) {
               "utf8"
             );
             // parse JSON string to JSON object
+            res.writeHead(200, { "Content-Type": "application/json" });
             res.write(data);
             res.end();
           } catch (error) {
-            res.write(
-              JSON.stringify({
-                error:
-                  "Cannot find execution instance based on specified execution ID.",
-              })
-            );
-            res.end();
+            writeError(res, 404, "Cannot find execution instance based on specified execution ID.");
           }
         } else if (pathName == "/") {
+          res.writeHead(200, { "Content-Type": "text/plain" });
           res.write("Hello World!", "utf8");
           res.end();
         } else if (pathName == "/deleteTask") {
@@ -317,17 +393,9 @@ exports.start = function (port = 8074) {
                 }
               }
             );
-            res.write(
-              JSON.stringify({ success: "Task has been deleted successfully." })
-            );
-            res.end();
+            writeSuccess(res, {}, "Task has been deleted successfully.");
           } catch (error) {
-            res.write(
-              JSON.stringify({
-                error: "Cannot find task based on specified task ID.",
-              })
-            );
-            res.end();
+            writeError(res, 404, "Cannot find task based on specified task ID.")
           }
         } else if (pathName == "/manageTask") {
           body = querystring.parse(body);
@@ -384,25 +452,40 @@ exports.start = function (port = 8074) {
             data,
             (err) => {}
           );
-
+          
+          res.writeHead(200, { "Content-Type": "text/plain" });
           res.write(id.toString(), "utf8");
           res.end();
         } else if (pathName == "/invokeTask") {
           body = querystring.parse(body);
-          let data = JSON.parse(body.params);
+          let data;
+          if (body.params === undefined || body.params == "") {
+            data = {};
+          } else {
+            try{
+              data = JSON.parse(body.params);
+            } catch (error) {
+              console.error(error);
+              writeError(res, 400, "Fail to parse parameters from json string.");
+              return;
+            }
+          }
           let id = body.id;
+          if (id === undefined || id == "") {
+            writeError(res, 400, "Task ID is required.");
+            return;
+          }
           let task = fs.readFileSync(
             path.join(getDir(), `tasks/${id}.json`),
             "utf8"
           );
           task = JSON.parse(task);
-          try {
-            task["links"] = data["urlList_0"];
-            if (task["links"] == undefined) {
-              task["links"] = "about:blank";
+          // 允许不填写 urlList_0,此时采用任务中的默认值
+          if (data["urlList_0"] !== undefined && data["urlList_0"] != "") {
+              try {
+                task["links"] = data["urlList_0"];
+              } catch (error) {
             }
-          } catch (error) {
-            task["links"] = "about:blank";
           }
           for (const [key, value] of Object.entries(data)) {
             for (let i = 0; i < task["inputParameters"].length; i++) {
@@ -434,7 +517,9 @@ exports.start = function (port = 8074) {
                   file_names.push(parseInt(file.split(".")[0]));
                 }
                 console.log(file);
-              } catch (error) {}
+              } catch (error) {
+                console.error(error);
+              }
             }
           );
           let eid = 0;
@@ -452,6 +537,9 @@ exports.start = function (port = 8074) {
             task,
             (err) => {}
           );
+          console.log(`Task ${id} has been generated to file ${path.join(getDir(), `execution_instances/${eid}.json`)}`);
+          // res.writeHead
+          res.writeHead(200, { "Content-Type": "text/plain" });
           res.write(eid.toString(), "utf8");
           res.end();
         } else if (pathName == "/getConfig") {
@@ -464,6 +552,7 @@ exports.start = function (port = 8074) {
           if(lang == undefined){
             lang = "-";
           }
+          res.writeHead(200, { "Content-Type": "application/json" });
           res.write(JSON.stringify(config_file));
           res.end();
         } else if (pathName == "/setUserDataFolder") {
@@ -476,14 +565,334 @@ exports.start = function (port = 8074) {
           config["user_data_folder"] = body["user_data_folder"];
           config = JSON.stringify(config);
           fs.writeFile(path.join(getDir(), `config.json`), config, (err) => {});
-          res.write(
-            JSON.stringify({
-              success: "User data folder has been set successfully.",
-            })
+          writeSuccess(res, {}, "User data folder has been set successfully.");
+        } else if (pathName == "/executeTask") {
+          body = querystring.parse(body);
+          if (body === undefined || body.id === undefined || body.id == "") {
+            writeError(res, 400, "Execution instance ID is required.");
+            return;
+          }
+          let timeout = 10;
+          if (body.timeout !== undefined && body.timeout != "") {
+            try{
+              timeout = parseInt(body.timeout);
+            } catch (error) {
+              writeError(res, 400, "Timeout must be a number.");
+              return;
+            }
+          }
+          // 1. Find executable path
+          let platform_dir = "";
+          let executable_name = "easyspider_executestage";
+
+          if (process.platform === "win32" && process.arch === "x64") {
+              platform_dir = "chrome_win64";
+              executable_name += ".exe";
+          } else if (process.platform === "win32" && process.arch === "ia32") {
+              platform_dir = "chrome_win32";
+              executable_name += ".exe";
+          } else if (process.platform === "linux") {
+              platform_dir = "chrome_linux64";
+          } else if (process.platform === "darwin") {
+              writeError(res, 400, "Executing from remote control is not supported on macOS.");
+              return;
+          }
+
+          const dev_executable_path = path.join(__dirname, platform_dir, executable_name);
+          const packaged_executable_path = path.join(getEasySpiderLocation(), 'resources', 'app', platform_dir, executable_name);
+          let executable_path = "";
+
+          if (fs.existsSync(dev_executable_path)) {
+              executable_path = dev_executable_path;
+              console.log("Using development executable path:", executable_path);
+          } else if (fs.existsSync(packaged_executable_path)) {
+              executable_path = packaged_executable_path;
+          }
+
+          if (executable_path === "") {
+              writeError(res, 500, "Could not find the executable for this platform.");
+              return;
+          }
+
+          if (body.use_user_data == "true" || body.use_user_data == "1") {
+            body.use_user_data = 1;
+          } else {
+            body.use_user_data = 0;
+          }
+          try{
+            body.id = JSON.parse(body.id);
+          } catch (error) {
+            writeError(res, 400, "Fail to parse execution instance ID from json string.");
+          }
+          if (Array.isArray(body.id)) {
+            console.log("Multiple execution instances detected.");
+            let not_found = [];
+            for (let i = 0; i < body.id.length; i++) {
+              try{
+                // 尝试读取一次任务文件
+                let eid = parseInt(body.id[i]);
+                let file = fs.readFileSync(
+                  path.join(getDir(), `execution_instances/${eid}.json`),
+                  "utf8"
+                );
+                let task = JSON.parse(file);
+                // 忽略逻辑删除的任务
+                if (task == undefined || task.id == -2) {
+                  console.log(`${eid} not found.`)
+                  not_found.push(eid);
+                }
+              } catch (error) {
+                not_found.push(body.id[i]);
+              }
+            }
+            if (not_found.length > 0) {
+              writeError(res, 404, `Cannot find execution instances based on specified execution IDs: ${not_found.join(", ")}`);
+              return;
+            }
+            for (let i = 0; i < body.id.length; i++) {
+              if (child_processes[body.id[i]] != null) {
+                writeError(res, 400, `Execution instance ${body.id[i]} is already running. If you want to run it again, please stop the current execution instance first.`);
+                return;
+              }
+            }
+          } else {
+            try{
+              // 尝试读取一次任务文件
+              let eid = parseInt(body.id);
+              let file = fs.readFileSync(
+                path.join(getDir(), `execution_instances/${eid}.json`),
+                "utf8"
+              );
+              let task = JSON.parse(file);
+              // 忽略逻辑删除的任务
+              if (task == undefined || task.id == -2) {
+                writeError(res, 404, "Cannot find execution instance based on specified execution ID.");
+                return;
+              }
+            } catch (error) {
+              writeError(res, 404, "Cannot find execution instance based on specified execution ID.");
+              return;
+            }
+            if (child_processes[body.id] != null) {
+              writeError(res, 400, `Execution instance ${body.id} is already running. If you want to run it again, please stop the current execution instance first.`);
+              return;
+            }
+          }
+          let config;
+          try{
+            config = fs.readFileSync(
+              path.join(getDir(), `config.json`),
+              "utf8"
+            );
+            config = JSON.parse(config);
+          } catch (error) {
+            writeError(res, 500, "Fail to parse config.json.");
+            return;
+          }
+          let ids_string;
+          if (Array.isArray(body.id)) {
+            // 多个执行实例
+            ids_string = body.id.join(",");
+            for (let i = 0; i < body.id.length; i++) {
+              child_logs[body.id[i]] = ""; // 初始化日志
+            }
+          } else {
+            ids_string = body.id;
+            child_logs[body.id] = ""; // 初始化日志
+          }
+          console.log(`Executing task with IDs: ${ids_string}`);
+          let spawn = require("child_process").spawn;
+          let server_address = `${config.webserver_address}:${config.webserver_port}`;
+          let secret_key = generateUuid(); // 生成一个随机的密钥
+          let parameters = [
+              "--ids",
+              "[" + ids_string + "]",
+              "--server_address",
+              server_address,
+              "--user_data",
+              body.use_user_data.toString(),
+              "--remote_control",
+              "1",
+              "--remote_control_key",
+              secret_key,
+          ];
+          const child_process = spawn(
+            executable_path,
+            parameters,
+           { detached: false, env: { ...process.env, 'PYTHONUNBUFFERED': '1', 'PYTHONUTF8': '1'} } // 设置环境变量,强制 utf-8 输出
           );
-          res.end();
+          if (!child_process.pid) {
+            writeError(res, 500, "Failed to start the child process and get its PID.");
+            return;
+          }
+          console.log(`Started child process with PID: ${child_process.pid}`);
+
+          let ipc_port_captured = false;
+          child_process.stdout.on("data", (data) => {
+            const output = data.toString();
+            console.log(`[PID ${child_process.pid}] stdout: ${output}`);
+            if (Array.isArray(body.id)) {
+              for (let i = 0; i < body.id.length; i++) {
+                child_logs[body.id[i]] = (child_logs[body.id[i]] || "") + output;
+              }
+            } else {
+              child_logs[body.id] = (child_logs[body.id] || "") + output;
+            }
+            
+            const match = output.match(/IPC_SERVER_PORT:(\d+)/);
+            if (match && match[1]) {
+              const ipc_port = parseInt(match[1], 10);
+              console.log(`Captured IPC port ${ipc_port} for PID ${child_process.pid}`);
+              
+              // 存储进程信息
+              const process_info = {
+                pid: child_process.pid,
+                ipc_port: ipc_port,
+                process: child_process,
+                key: secret_key,
+              };
+              if (Array.isArray(body.id)) {
+                for (let i = 0; i < body.id.length; i++) {
+                  child_processes[body.id[i]] = process_info;
+                }
+              } else {
+                child_processes[body.id] = process_info;
+              }
+              
+              if (!ipc_port_captured) {
+                ipc_port_captured = true;
+                writeSuccess(res, {message: `Task execution started successfully for ID(s): ${ids_string}`});
+              }
+            }
+          });
+
+          child_process.stderr.on("data", (data) => {
+            console.error(`[PID ${child_process.pid}] stderr: ${data.toString()}`);
+          });
+         
+          child_process.on('close', (code) => {
+            console.log(`Child process with PID ${child_process.pid} exited with code ${code}`);
+            // 清理记录
+            for (const id in child_processes) {
+              if (child_processes[id].pid === child_process.pid) {
+                delete child_processes[id];
+              }
+            }
+          });
+          
+          // 添加一个超时,以防Python脚本未能成功启动IPC服务器
+          setTimeout(() => {
+            if (!ipc_port_captured) {
+              writeError(res, 500, "Failed to get IPC port from child process within timeout.");
+              child_process.kill('SIGKILL'); // 强制杀死没有响应的进程
+            }
+          },  timeout * 1000); // 5秒超时
+        } else if (pathName == "/stopTask") {
+        body = querystring.parse(body);
+          if (!body.id) {
+            writeError(res, 400, "Execution instance ID is required to stop a task.");
+            return;
+          }
+
+          const process_info = child_processes[body.id];
+          if (!process_info || !process_info.ipc_port) {
+            writeError(res, 404, `No running process found for execution instance ID: ${body.id}. It might have already finished.`);
+            return;
+          }
+
+          const options = {
+            hostname: '127.0.0.1',
+            port: process_info.ipc_port,
+            path: '/shutdown',
+            method: 'GET',
+            headers: {
+              'Authorization': process_info.key, // 使用之前生成的密钥进行身份验证
+            }
+          };
+
+          console.log(`Sending shutdown command to http://localhost:${process_info.ipc_port}/shutdown`);
+
+          const req = http_request.request(options, (api_res) => {
+            if (api_res.statusCode === 200) {
+              writeSuccess(res, { message: `Shutdown command sent successfully to task ID ${body.id}.` });
+            } else {
+              writeError(res, 500, `IPC server responded with status: ${api_res.statusCode}`);
+            }
+          });
+
+          req.on('error', (e) => {
+            console.error(`Problem with request to IPC server: ${e.stack}`);
+            writeError(res, 500, "Failed to send command to the task process. It might have crashed.");
+          });
+
+          req.end();
+
+        } else if (pathName == "/getTaskLog") {
+          let params = url.parse(req.url, true).query;
+          if (params === undefined || params.id === undefined || params.id == "") {
+            writeError(res, 400, "Execution instance ID is required.");
+            return;
+          }
+          let id = params.id;
+          const process_info = child_processes[id];
+          if (process_info && process_info.ipc_port) {
+            // 进程正在运行,直接读取日志
+            writeSuccess(res, { log: child_logs[id] || "" });
+            return;
+          }
+          // 进程没有运行,则读取日志文件
+          // 列出 Data/Task_${id} 目录下的所有文件
+          let logFileFolder = path.join(getDir(), `Data/Task_${id}`);
+          // 列出目录下的所有文件,返回名称为 *.log 的文件
+          let logFilePath = "";
+          let logFileName = "";
+          fs.readdir(logFileFolder, (err, files) => {
+            if (err) {
+              console.error(err);
+              writeError(res, 400, "Log file does not exist.");
+              return;
+            }
+            // 查找以 .log 结尾的文件
+            files.forEach((file) => {
+              if (file.endsWith(".log")) {
+                let p = path.join(logFileFolder, file);
+                if (file > logFileName) {
+                  // 取最新的日志文件
+                  logFilePath = p;
+                  logFileName = file;
+                }
+              }
+            });
+            if (logFilePath === "") {
+              writeError(res, 404, "Log file not found.");
+              return;
+            }
+            fs.readFile(logFilePath, "utf8", (err, data) => {
+              if (err) {
+                console.error(err);
+                writeError(res, 500, "Failed to read log file.");
+                return;
+              }
+              // 缓存日志
+              writeSuccess(res, { log: data });
+            });
+          })
+        } else if (pathName == "/getTaskStatus"){
+          let params = url.parse(req.url, true).query;
+          if (params === undefined || params.id === undefined || params.id == "") {
+            writeError(res, 400, "Execution instance ID is required.");
+            return;
+          }
+          let id = params.id;
+          const process_info = child_processes[id];
+          if (process_info && process_info.ipc_port) {
+            // 进程正在运行,直接读取日志
+            writeSuccess(res, { running: true });
+          } else {
+            writeSuccess(res, { running: false })
+          }
         }
-      });
+      }, res));
     })
     .listen(port);
   console.log("Server has started.");

+ 12 - 7
ElectronJS/src/taskGrid/FlowChart.js

@@ -4,7 +4,7 @@ let root = {
     index: 0, //在nodeList中的索引号
     id: 0,
     parentId: 0,
-    type: -1,
+    type: -1, // 0: 顺序结构;1:循环结构;2:分支结构
     option: 0,
     title: "root",
     sequence: [],
@@ -484,7 +484,7 @@ function operationChange(e, theNode) {
     vueData.nowNodeIndex = actionSequence[theNode.getAttribute("data")];
     theNode.style.borderColor = "blue";
     handleElement(); //处理元素
-    if (debuggable) {
+    if (debuggable && e.button == 0) {
         trailElement(app._data.nowNode, 0);
     } else {
         debuggable = true;
@@ -664,13 +664,14 @@ function toolBoxKernel(e, param = null) {
             isInLoop: false,
         };
         nodeList.push(t);
-        if (option == 8) //循环
+        if (option == 8) // 循环模式下 type 设置为 1
         {
             t["type"] = 1;
-        } else if (option == 9) //判断
+        } else if (option == 9) // 判断模式下 type 设置为 2
         {
             t["type"] = 2;
-            // 增加两个分支
+            // 增加两个分支节点
+            // 这两个分支节点需要放入 nodeList 中,并且需要在 t 的 sequence 中添加它们的 index 
             nt = {
                 id: 0,
                 parentId: 0,
@@ -894,6 +895,10 @@ function deleteElement() {
 }
 
 document.getElementById("flowchart_graph").oncontextmenu = function (e) {
+    let menu = document.getElementById("contextMenu")
+    if (menu) {
+        menu.remove(); //如果右键菜单已经存在,先删除它
+    }
     // 创建一个包含删除选项的右键菜单
     let contextMenu = document.createElement("div");
     contextMenu.id = "contextMenu";
@@ -937,8 +942,8 @@ document.getElementById("flowchart_graph").oncontextmenu = function (e) {
     // 设置右键菜单的样式
     contextMenu.style.position = "absolute";
     contextMenu.style.backgroundColor = "rgb(248, 249, 250)";
-    contextMenu.style.left = event.clientX + "px";
-    contextMenu.style.top = event.clientY + "px";
+    contextMenu.style.left = e.pageX + "px";
+    contextMenu.style.top = e.pageY + "px";
     contextMenu.style.width = LANG("180px", "250px");
 
     // 添加删除元素的功能

+ 183 - 29
ExecuteStage/easyspider_executestage.py

@@ -45,6 +45,7 @@ import sys
 # import hashlib
 import time
 import requests
+from http.server import BaseHTTPRequestHandler, HTTPServer
 from multiprocessing import freeze_support
 freeze_support()  # 防止无限死循环多开
 try:
@@ -73,9 +74,11 @@ desired_capabilities["pageLoadStrategy"] = "none"
 
 
 class BrowserThread(Thread):
-    def __init__(self, browser_t, id, service, version, event, saveName, config, option, commandline_config=""):
+    def __init__(self, browser_t, id, service, version, event, saveName, config, option, shutdown_event, commandline_config=""):
         Thread.__init__(self)
         self.logs = io.StringIO()
+        # 退出事件,用于远程执行时的中断
+        self.shutdown_event = shutdown_event
         self.log = bool(service.get("recordLog", True))
         self.browser = browser_t
         self.option = option
@@ -466,28 +469,41 @@ class BrowserThread(Thread):
     def run(self):
         # 挨个执行程序
         for i in range(len(self.links)):
-            self.print_and_log("正在执行第", i + 1, "/", len(self.links), "个链接")
-            self.print_and_log("Executing link", i + 1,
-                               "/", len(self.links))
-            self.executeNode(0)
-            self.urlId = self.urlId + 1
+            if self.shutdown_event.is_set():
+                self.print_and_log("接收到终止信号,正在中断任务... | Received termination signal, interrupting task...")
+                break
+            self.event.wait()  # 暂停/恢复
+            self.executeNode(self.startSteps, self.links[i], "", i)
         # files = os.listdir("Data/Task_" + str(self.id) + "/" + self.saveName)
         # 如果目录为空,则删除该目录
         # if not files:
         #     os.rmdir("Data/Task_" + str(self.id) + "/" + self.saveName)
-        self.print_and_log("Done!")
-        self.print_and_log("执行完成!")
-        self.saveData(exit=True)
-        self.removeDuplicateData()
+        if self.shutdown_event.is_set():
+            self.print_and_log("任务已中断 | Task interrupted")
+        else:
+            self.print_and_log("Done!")
+            self.print_and_log("执行完成!")
+            self.saveData(exit=True)
+            self.removeDuplicateData()
+            
         if self.outputFormat == "mysql":
             self.mysql.close()
-        try:
-            quitWaitTime = self.service["quitWaitTime"]
-        except:
-            quitWaitTime = 60
-        self.print_and_log(f"任务执行完毕,将在{quitWaitTime}秒后自动退出浏览器并清理临时用户目录,等待时间可在保存任务对话框中设置。")
-        self.print_and_log(f"The task is completed, the browser will exit automatically and the temporary user directory will be cleaned up after {quitWaitTime} seconds, the waiting time can be set in the save task dialog.")
-        time.sleep(quitWaitTime)
+        
+        if not self.shutdown_event.is_set():
+            try:
+                quitWaitTime = self.service["quitWaitTime"]
+            except:
+                quitWaitTime = 60
+            self.print_and_log(f"任务执行完毕,将在{quitWaitTime}秒后自动退出浏览器并清理临时用户目录,等待时间可在保存任务对话框中设置。")
+            self.print_and_log(f"The task is completed, the browser will exit automatically and the temporary user directory will be cleaned up after {quitWaitTime} seconds, the waiting time can be set in the save task dialog.")
+            
+            # 使退出前的等待可被中断
+            wait_end_time = time.time() + quitWaitTime
+            while time.time() < wait_end_time:
+                if self.shutdown_event.is_set():
+                    break
+                time.sleep(0.1)
+
         try:
             self.browser.quit()
         except:
@@ -508,8 +524,8 @@ class BrowserThread(Thread):
     # 定义一个自定义的 print 函数,它将内容同时打印到屏幕和文件中
     def print_and_log(self, *args, **kwargs):
         now = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
-        # 将内容打印到屏幕
-        print(*args, **kwargs)
+        # 将内容打印到屏幕,立刻输出
+        print(*args, **kwargs, flush=True)
 
         # 将内容写入文件
         print(now + ":", *args, file=self.logs, **kwargs)
@@ -914,6 +930,9 @@ class BrowserThread(Thread):
 
     # 执行节点关键函数部分
     def executeNode(self, nodeId, loopValue="", loopPath="", index=0):
+        if self.shutdown_event.is_set():
+            return
+        self.event.wait()
         node = self.procedure[nodeId]
         # WebDriverWait(self.browser, 10).until
         # # 等待元素出现才进行操作,10秒内未出现则报错
@@ -982,6 +1001,7 @@ class BrowserThread(Thread):
             elif node["option"] == 9:  # 条件分支
                 self.judgeExecute(node, loopValue, loopPath, index)
         except Exception as e:
+            if self.shutdown_event.is_set(): return
             self.print_and_log("执行节点<" + node["title"] + ">时出错,将继续执行,错误为:", e)
             self.print_and_log("Error executing node <" + node["title"] + ">, will continue to execute, error is:", e)
         
@@ -1003,6 +1023,9 @@ class BrowserThread(Thread):
 
     # 对判断条件的处理
     def judgeExecute(self, node, loopElement, clickPath="", index=0):
+        if self.shutdown_event.is_set():
+            return
+        self.event.wait()
         executeBranchId = 0  # 要执行的BranchId
         for i in node["sequence"]:
             cnode = self.procedure[i]  # 获得条件分支
@@ -1082,6 +1105,9 @@ class BrowserThread(Thread):
                 "判断条件内所有条件分支的条件都不满足|None of the conditions in the judgment condition are met")
 
     def handleHistory(self, node, xpath, thisHandle, thisHistoryURL, thisHistoryLength, index, element=None, elements=None):
+        if self.shutdown_event.is_set():
+            return
+        self.event.wait()
         try:
             changed_handle = self.browser.current_window_handle != thisHandle
         except:  # 如果网页被意外关闭了的情况下
@@ -1139,6 +1165,9 @@ class BrowserThread(Thread):
 
     # 对循环的处理
     def loopExecute(self, node, loopValue, loopPath="", index=0):
+        if self.shutdown_event.is_set():
+            return
+        self.event.wait()
         time.sleep(0.1)  # 第一次执行循环的时候强制等待1秒
         thisHandle = self.browser.current_window_handle  # 记录本次循环内的标签页的ID
         try:
@@ -1198,7 +1227,10 @@ class BrowserThread(Thread):
             # 无跳转标签页操作
             count = 0  # 执行次数
             bodyText = "-"
+
             while True:  # do while循环
+                if self.shutdown_event.is_set():
+                    break
                 try:
                     finished = False
                     if node["parameters"]["exitCount"] == 0:
@@ -1301,6 +1333,8 @@ class BrowserThread(Thread):
                 index = 0
                 skipCount = node["parameters"]["skipCount"]
                 while index < len(elements):
+                    if self.shutdown_event.is_set():
+                        break
                     if index < skipCount:
                         index += 1
                         self.print_and_log("跳过第" + str(index) + "个元素")
@@ -1345,6 +1379,8 @@ class BrowserThread(Thread):
             index = 0
             skipCount = node["parameters"]["skipCount"]
             while index < len(paths):
+                if self.shutdown_event.is_set():
+                    break
                 if index < skipCount:
                     index += 1
                     self.print_and_log("跳过第" + str(index) + "个元素")
@@ -1390,6 +1426,8 @@ class BrowserThread(Thread):
             skipCount = node["parameters"]["skipCount"]
             index = 0
             for text in textList:
+                if self.shutdown_event.is_set():
+                    break
                 if index < skipCount:
                     index += 1
                     self.print_and_log("跳过第" + str(index) + "个文本")
@@ -1424,6 +1462,8 @@ class BrowserThread(Thread):
             skipCount = node["parameters"]["skipCount"]
             index = 0
             for url in urlList:
+                if self.shutdown_event.is_set():
+                    break
                 if index < skipCount:
                     index += 1
                     self.print_and_log("跳过第" + str(index) + "个网址")
@@ -1449,6 +1489,8 @@ class BrowserThread(Thread):
                         break
         elif int(node["parameters"]["loopType"]) <= 7:  # 命令返回值
             while True:  # do while循环
+                if self.shutdown_event.is_set():
+                    break
                 if int(node["parameters"]["loopType"]) == 5:  # JS
                     output = self.execute_code(
                         0, node["parameters"]["code"], node["parameters"]["waitTime"],
@@ -1478,6 +1520,9 @@ class BrowserThread(Thread):
 
     # 打开网页操作
     def openPage(self, param, loopValue):
+        if self.shutdown_event.is_set():
+            return
+        self.event.wait()
         time.sleep(1)  # 打开网页后强行等待至少1秒
         if len(self.browser.window_handles) > 1:
             self.browser.switch_to.window(
@@ -1543,6 +1588,9 @@ class BrowserThread(Thread):
 
     # 键盘输入操作
     def inputInfo(self, param, loopValue):
+        if self.shutdown_event.is_set():
+            return
+        self.event.wait()
         time.sleep(0.1)  # 输入之前等待0.1秒
         try:
             xpath = replace_field_values(
@@ -1596,6 +1644,9 @@ class BrowserThread(Thread):
 
     # 点击元素操作
     def clickElement(self, param, loopElement=None, clickPath="", index=0):
+        if self.shutdown_event.is_set():
+            return
+        self.event.wait()
         try:
             maxWaitTime = int(param["maxWaitTime"])
         except:
@@ -1784,6 +1835,10 @@ class BrowserThread(Thread):
         self.scrollDown(param)  # 根据参数配置向下滚动
 
     def get_content(self, p, element):
+        if self.shutdown_event.is_set():
+            return ""
+        self.event.wait()
+        # self.print_and_log(p)
         content = ""
         if p["contentType"] == 0:
             # 先处理特殊节点类型
@@ -1964,12 +2019,19 @@ class BrowserThread(Thread):
         return content
 
     def clearOutputParameters(self):
+        if self.shutdown_event.is_set():
+            return
+        self.event.wait()
+
         for key in self.outputParameters:
             self.outputParameters[key] = ""
         self.recordLog("清空输出参数|Clear output parameters")
 
     # 提取数据操作
     def getData(self, param, loopElement, isInLoop=True, parentPath="", index=0):
+        if self.shutdown_event.is_set():
+            return
+        self.event.wait()
         parentPath = replace_field_values(
             parentPath, self.outputParameters, self)
         if param["clear"] == 1:
@@ -2188,6 +2250,50 @@ class BrowserThread(Thread):
                             self.maxViewLength, self.outputParametersRecord)
             self.OUTPUT.append(line)
 
+
+def start_ipc_server(shutdown_event_to_set, key):
+    """在一个新线程中启动一个远程控制服务器。用于在 nodejs 父进程和 python 子进程之间进行通信。
+    :param shutdown_event_to_set: 用于设置主程序中的全局关闭事件
+    :param key: 用于验证远程控制请求的密钥
+    """
+    
+    class ShutdownHandler(BaseHTTPRequestHandler):
+        def do_GET(self):
+            request_key = self.headers.get('Authorization')
+            if request_key != key and request_key != "Bearer " + key:
+                self.send_response(403)
+                self.end_headers()
+                self.wfile.write(b'Forbidden: Invalid key.')
+                return
+
+            if self.path == '/shutdown':
+                print("IPC server received shutdown command.")
+                # 设置主程序中的全局关闭事件
+                shutdown_event_to_set.set()
+                self.send_response(200)
+                self.end_headers()
+                self.wfile.write(b'Shutdown signal received.')
+            else:
+                self.send_response(404)
+                self.end_headers()
+                self.wfile.write(b'Not Found.')
+        def log_message(self, format, *args):
+                # 覆盖此方法以禁止向 stderr 打印日志
+                return
+
+    # 绑定到 localhost 和一个随机可用端口 (port 0)
+    httpd = HTTPServer(("127.0.0.1", 0), ShutdownHandler)
+    
+    # 获取被分配的端口号
+    ipc_port = httpd.socket.getsockname()[1]
+
+     # 关键步骤:将端口号打印到 stdout,以便 Node.js 父进程捕获
+    print(f"IPC_SERVER_PORT:{ipc_port}", flush=True)
+    
+    # 在当前线程中运行服务器,直到被关闭
+    httpd.serve_forever()
+
+
 if __name__ == '__main__':
     # 如果需要调试程序,请在命令行参数中加入--keyboard 0 来禁用键盘监听以提升调试速度
     # If you need to debug the program, please add --keyboard 0 in the command line parameters to disable keyboard listening to improve debugging speed
@@ -2200,6 +2306,8 @@ if __name__ == '__main__':
         "read_type": "remote",
         "headless": False,
         "server_address": "http://localhost:8074",
+        "remote_control": False, # 是否开启远程控制服务器。这个所谓的“远程”其实是指 electron 主程序的控制,无法从外部网络访问(因为端口随机)
+        "remote_control_key": "", # 如果开启远程控制,那么初始化时需要填写此 key,保证安全性
         "keyboard": True,  # 是否监听键盘输入
         "pause_key": "p",  # 暂停键
         "version": "0.6.3",
@@ -2207,7 +2315,10 @@ if __name__ == '__main__':
         "user_folder": "",
     }
     c = Config(commandline_config)
+    remote_key = c.remote_control_key
+    c.remote_control_key = "hidden for security"  # 清空远程控制密钥,防止被其他人使用
     print(c)
+    c.remote_control_key = remote_key  # 恢复远程控制密钥
     options = webdriver.ChromeOptions()
     driver_path = "chromedriver.exe"
     print(sys.platform, platform.architecture())
@@ -2222,6 +2333,15 @@ if __name__ == '__main__':
         if c.config_folder == "":
             c.config_folder = os.path.expanduser(
                 "~/Library/Application Support/EasySpider/")
+    # 在 linux 里,EasySpider 打包版默认是在 EasySpider 文件夹内执行的,没有 windows 上那种外层的 25kb 启动器
+    # 因此需要特殊处理并判断此时 chrome, chromedriver 和扩展的路径。
+    elif sys.platform == "linux" and platform.architecture()[0] == "64bit" and os.path.exists(os.path.join(os.getcwd(), "resources")):
+        print("Finding chromedriver in EasySpider",
+              os.getcwd())
+        # 相对于下一条检查语句,这里去掉了 EasySpider 文件夹这一层
+        options.binary_location = "resources/app/chrome_linux64/chrome"
+        driver_path = "resources/app/chrome_linux64/chromedriver_linux64"
+        options.add_extension("resources/app/XPathHelper.crx")
     elif os.path.exists(os.getcwd() + "/EasySpider/resources"):  # 打包后的路径
         print("Finding chromedriver in EasySpider",
               os.getcwd() + "/EasySpider")
@@ -2250,9 +2370,23 @@ if __name__ == '__main__':
         # 软件dev用
         print("Finding chromedriver in EasySpider",
               os.getcwd() + "/ElectronJS")
-        options.binary_location = "../ElectronJS/chrome_win64/chrome.exe"  # 指定chrome位置
-        driver_path = "../ElectronJS/chrome_win64/chromedriver_win64.exe"
-        options.add_extension("../ElectronJS/XPathHelper.crx")
+        if sys.platform == "win32" and platform.architecture()[0] == "32bit":
+            options.binary_location = os.path.join(
+                os.getcwd(), "EasySpider/resources/app/chrome_win32/chrome.exe")  # 指定chrome位置
+            driver_path = os.path.join(
+                os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe")
+            options.add_extension("EasySpider/resources/app/XPathHelper.crx")
+        elif sys.platform == "win32" and platform.architecture()[0] == "64bit":
+            options.binary_location = "../ElectronJS/chrome_win64/chrome.exe"  # 指定chrome位置
+            driver_path = "../ElectronJS/chrome_win64/chromedriver_win64.exe"
+            options.add_extension("../ElectronJS/XPathHelper.crx")
+        elif sys.platform == "linux" and platform.architecture()[0] == "64bit":
+            options.binary_location = "../ElectronJS/chrome_linux64/chrome"
+            driver_path = "../ElectronJS/chrome_linux64/chromedriver_linux64"
+            options.add_extension("../ElectronJS/XPathHelper.crx")
+        else:
+            print("Unsupported platform for automatic detection. You need to specify chrome executable path, chromedriver path in code.")
+            sys.exit()
     else:
         options.binary_location = "./chrome.exe"  # 指定chrome位置
         driver_path = "./chromedriver.exe"
@@ -2293,6 +2427,17 @@ if __name__ == '__main__':
     tmp_options = []
     for id in c.ids:
         tmp_options.append({"options": copy.deepcopy(options), "tmp_user_data_folder": ""})
+    
+    ipc_thread = None
+    shutdown_event = Event()
+    if c.remote_control:
+        if c.remote_control_key == "":
+            print("Remote control is enabled, but no remote control key is set, please set the --remote_control_key parameter to a non-empty value.")
+            print("远程控制已启用,但未设置远程控制密钥,请将--remote_control_key参数设置为非空值。")
+            sys.exit(1)
+        else:
+            ipc_thread = threading.Thread(target=start_ipc_server, args=(shutdown_event, c.remote_control_key), daemon=True)
+            ipc_thread.start() 
 
     if c.user_data:
         tmp_user_folder_parent = os.path.join(os.getcwd(), "TempUserDataFolder")
@@ -2450,7 +2595,7 @@ if __name__ == '__main__':
         event = Event()
         event.set()
         thread = BrowserThread(browser_t, id, service,
-                               c.version, event, c.saved_file_name, config=config, option=tmp_options[i], commandline_config=c)
+                               c.version, event, c.saved_file_name, config=config, option=tmp_options[i], shutdown_event=shutdown_event, commandline_config=c)
         print("Thread with task id: ", id, " is created")
         threads.append(thread)
         thread.start()
@@ -2478,17 +2623,26 @@ if __name__ == '__main__':
         #     print("过Cloudflare验证模式有时候会不稳定,如果无法通过验证则需要隔几分钟重试一次,或者可以更换新的用户信息文件夹再执行任务。")
         #     print("Passing the Cloudflare verification mode is sometimes unstable. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.")
         # 使用监听器监听键盘输入
+    listener = None
     try:
         from pynput.keyboard import Key, Listener
         if c.keyboard:
-            with Listener(on_press=on_press_creator(press_time, event),
-                          on_release=on_release_creator(event, press_time)) as listener:
-                listener.join()
+            listener = Listener(on_press=on_press_creator(press_time, event),
+                          on_release=on_release_creator(event, press_time))
     except:
         pass
         # print("您的操作系统不支持暂停功能。")
         # print("Your operating system does not support the pause function.")
 
-    for thread in threads:
-        print()
-        thread.join()
+    try:
+        while (any(thread.is_alive() for thread in threads)):
+            for thread in threads:
+                thread.join(0.1)
+    except (KeyboardInterrupt, SystemExit):
+        print("程序被手动终止,正在关闭浏览器...")
+        print("The program is manually terminated, closing the browser...")
+        if not shutdown_event.is_set():
+            shutdown_event.set()
+    finally:
+        if listener is not None and listener.is_alive():
+            listener.stop()