naibo 1 year ago
parent
commit
1b6661afb8

+ 106 - 63
.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py

@@ -5,9 +5,10 @@ import copy
 import platform
 import shutil
 import string
+import threading
 # import undetected_chromedriver as uc
 from utils import detect_optimizable, download_image, extract_text_from_html, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, \
-    on_press_creator, on_release_creator, readCode, replace_field_values, send_email, split_text_by_lines, write_to_csv, write_to_excel, write_to_json
+    on_press_creator, on_release_creator, readCode, rename_downloaded_file, replace_field_values, send_email, split_text_by_lines, write_to_csv, write_to_excel, write_to_json
 from myChrome import MyChrome
 from threading import Thread, Event
 from PIL import Image
@@ -112,9 +113,13 @@ class BrowserThread(Thread):
         self.print_and_log("Save Name for task ID", id, "is:", self.saveName)
         if not os.path.exists("Data/Task_" + str(id)):
             os.mkdir("Data/Task_" + str(id))
-        if not os.path.exists("Data/Task_" + str(id) + "/" + self.saveName):
-            os.mkdir("Data/Task_" + str(id) + "/" +
-                     self.saveName)  # 创建保存文件夹用来保存截图
+        self.downloadFolder = "Data/Task_" + str(id) + "/" + self.saveName
+        if not os.path.exists(self.downloadFolder):
+            os.mkdir(self.downloadFolder)  # 创建保存文件夹用来保存截图和文件
+        if not os.path.exists(self.downloadFolder + "/files"):
+            os.mkdir(self.downloadFolder + "/files")
+        if not os.path.exists(self.downloadFolder + "/images"):
+            os.mkdir(self.downloadFolder + "/images")
         self.getDataStep = 0
         self.startSteps = 0
         try:
@@ -142,12 +147,21 @@ class BrowserThread(Thread):
             self.print_and_log("Loading stealth.min.js")
         self.browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
             'source': js})  # TMALL 反扒
+        self.browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
+        "source": """
+            Object.defineProperty(navigator, 'webdriver', {
+            get: () => undefined
+            })
+        """
+        })
         WebDriverWait(self.browser, 10)
         self.browser.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
-        path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(self.id))
+        path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(self.id), self.saveName, "files")
         self.paramss = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': path}}
-
-        self.browser.execute("send_command", self.paramss)  # 下载地址改变
+        self.browser.execute("send_command", self.paramss)  # 下载目录改变
+        self.monitor_event = threading.Event()
+        self.monitor_thread = threading.Thread(target=rename_downloaded_file, args=(path, self.monitor_event)) #path后面的逗号不能省略,是元组固定写法
+        self.monitor_thread.start()
         # self.browser.get('about:blank')
         self.procedure = service["graph"]  # 程序执行流程
         try:
@@ -187,12 +201,19 @@ class BrowserThread(Thread):
             self.links = list(filter(isnotnull, service["url"]))  # 要执行的link
         self.OUTPUT = []  # 采集的数据
         try:
-            self.dataWriteMode = service["dataWriteMode"] # 数据写入模式,1为追加,2为覆盖
+            self.dataWriteMode = service["dataWriteMode"] # 数据写入模式,1为追加,2为覆盖,3为重命名文件
         except:
             self.dataWriteMode = 1
         if self.outputFormat == "csv" or self.outputFormat == "txt" or self.outputFormat == "xlsx" or self.outputFormat == "json":
-            if self.dataWriteMode == 2 and os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat):
-                os.remove("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat)
+            if os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat):
+                if self.dataWriteMode == 2:
+                    os.remove("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat)
+                elif self.dataWriteMode == 3:
+                    i = 2
+                    while os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '_' + str(i) + '.' + self.outputFormat):
+                        i = i + 1
+                    self.saveName = self.saveName + '_' + str(i)
+                    self.print_and_log("文件已存在,已重命名为", self.saveName)
         self.writeMode = 1  # 写入模式,0为新建,1为追加
         if self.outputFormat == "csv" or self.outputFormat == "txt" or self.outputFormat == "xlsx":
             if not os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat):
@@ -521,7 +542,7 @@ class BrowserThread(Thread):
                                "/", len(self.links))
             self.executeNode(0)
             self.urlId = self.urlId + 1
-        files = os.listdir("Data/Task_" + str(self.id) + "/" + self.saveName)
+        # files = os.listdir("Data/Task_" + str(self.id) + "/" + self.saveName)
         # 如果目录为空,则删除该目录
         # if not files:
         #     os.rmdir("Data/Task_" + str(self.id) + "/" + self.saveName)
@@ -544,6 +565,7 @@ class BrowserThread(Thread):
             shutil.rmtree(self.option["tmp_user_data_folder"])
         except:
             pass
+        self.monitor_event.set()
         self.print_and_log("清理完成!|Clean up completed!")
         self.print_and_log("您现在可以安全的关闭此窗口了。|You can safely close this window now.")
         
@@ -768,6 +790,8 @@ class BrowserThread(Thread):
         elif int(codeMode) == 5:
             try:
                 code = readCode(code)
+                # global_namespace = globals().copy()
+                # global_namespace["self"] = self
                 output = exec(code)
                 self.recordLog("执行下面的代码:" + code)
                 self.recordLog("Execute the following code:" + code)
@@ -847,6 +871,23 @@ class BrowserThread(Thread):
             self.print_and_log("根据设置的自定义操作,任务已刷新页面|Task refreshed page according to custom operation")
         elif codeMode == 9:  # 发送邮件
             send_email(node["parameters"]["emailConfig"])
+        elif codeMode == 10: # 清空所有字段值
+            self.clearOutputParameters()
+        elif codeMode == 11: # 生成新的数据行
+            line = new_line(self.outputParameters,
+                            self.maxViewLength, self.outputParametersRecord)
+            self.OUTPUT.append(line)
+        elif codeMode == 12: # 退出程序
+            self.print_and_log("根据设置的自定义操作,任务已退出|Task exited according to custom operation")
+            self.saveData(exit=True)
+            self.browser.quit()
+            self.print_and_log("正在清理临时用户目录……|Cleaning up temporary user directory...")
+            try:
+                shutil.rmtree(self.option["tmp_user_data_folder"])
+            except:
+                pass
+            self.print_and_log("清理完成!|Clean up completed!")
+            os._exit(0)
         else:  # 0 1 5 6
             output = self.execute_code(
                 codeMode, code, max_wait_time, iframe=params["iframe"])
@@ -1106,7 +1147,25 @@ class BrowserThread(Thread):
             self.recordLog(
                 "判断条件内所有条件分支的条件都不满足|None of the conditions in the judgment condition are met")
 
-    def handleHistory(self, node, xpath, thisHistoryURL, thisHistoryLength, index, element=None, elements=None):
+    def handleHistory(self, node, xpath, thisHandle, thisHistoryURL, thisHistoryLength, index, element=None, elements=None):
+        try:
+            changed_handle = self.browser.current_window_handle != thisHandle
+        except:  # 如果网页被意外关闭了的情况下
+            self.browser.switch_to.window(
+                self.browser.window_handles[-1])
+            changed_handle = self.browser.window_handles[-1] != thisHandle
+        if changed_handle:  # 如果执行完一次循环之后标签页的位置发生了变化
+            try:
+                while True:  # 一直关闭窗口直到当前标签页
+                    self.browser.close()  # 关闭使用完的标签页
+                    self.browser.switch_to.window(
+                        self.browser.window_handles[-1])
+                    if self.browser.current_window_handle == thisHandle:
+                        break
+            except Exception as e:
+                self.print_and_log("关闭标签页发生错误:", e)
+                self.print_and_log(
+                    "Error occurred while closing tab: ", e)
         if self.history["index"] != thisHistoryLength and self.history["handle"] == self.browser.current_window_handle:  # 如果执行完一次循环之后历史记录发生了变化,注意当前页面的判断
             difference = thisHistoryLength - self.history["index"]  # 计算历史记录变化差值
             self.browser.execute_script('history.go(' + str(difference) + ')')  # 回退历史记录
@@ -1132,12 +1191,13 @@ class BrowserThread(Thread):
                 if self.browser.current_url == thisHistoryURL or ti > thisHistoryLength:  # 如果执行完一次循环之后网址发生了变化
                     break
             time.sleep(2)
-            if element == None: # 不固定元素列表
-                element = self.browser.find_elements(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
-            else: # 固定元素列表
-                element = self.browser.find_element(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
-            # if index > 0:
-                # index -= 1  # 如果是data:开头的网址,就要重试一次
+            if xpath != "":
+                if element == None: # 不固定元素列表
+                    element = self.browser.find_elements(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
+                else: # 固定元素列表
+                    element = self.browser.find_element(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
+                # if index > 0:
+                    # index -= 1  # 如果是data:开头的网址,就要重试一次
         else:
             if element == None:
                 element = elements
@@ -1321,25 +1381,7 @@ class BrowserThread(Thread):
                     if self.BREAK:
                         self.BREAK = False
                         break
-                    try:
-                        changed_handle = self.browser.current_window_handle != thisHandle
-                    except:  # 如果网页被意外关闭了的情况下
-                        self.browser.switch_to.window(
-                            self.browser.window_handles[-1])
-                        changed_handle = self.browser.window_handles[-1] != thisHandle
-                    if changed_handle:  # 如果执行完一次循环之后标签页的位置发生了变化
-                        try:
-                            while True:  # 一直关闭窗口直到当前标签页
-                                self.browser.close()  # 关闭使用完的标签页
-                                self.browser.switch_to.window(
-                                    self.browser.window_handles[-1])
-                                if self.browser.current_window_handle == thisHandle:
-                                    break
-                        except Exception as e:
-                            self.print_and_log("关闭标签页发生错误:", e)
-                            self.print_and_log(
-                                "Error occurred while closing tab: ", e)
-                    index, elements = self.handleHistory(node, xpath, thisHistoryURL, thisHistoryLength, index, elements=elements)
+                    index, elements = self.handleHistory(node, xpath, thisHandle, thisHistoryURL, thisHistoryLength, index, elements=elements)
                     if int(node["parameters"]["breakMode"]) > 0:  # 如果设置了退出循环的脚本条件
                         output = self.execute_code(int(
                             node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"],
@@ -1381,25 +1423,7 @@ class BrowserThread(Thread):
                     if self.BREAK:
                         self.BREAK = False
                         break
-                    try:
-                        changed_handle = self.browser.current_window_handle != thisHandle
-                    except:  # 如果网页被意外关闭了的情况下
-                        self.browser.switch_to.window(
-                            self.browser.window_handles[-1])
-                        changed_handle = self.browser.window_handles[-1] != thisHandle
-                    if changed_handle:  # 如果执行完一次循环之后标签页的位置发生了变化
-                        try:
-                            while True:  # 一直关闭窗口直到当前标签页
-                                self.browser.close()  # 关闭使用完的标签页
-                                self.browser.switch_to.window(
-                                    self.browser.window_handles[-1])
-                                if self.browser.current_window_handle == thisHandle:
-                                    break
-                        except Exception as e:
-                            self.print_and_log("关闭标签页发生错误:", e)
-                            self.print_and_log(
-                                "Error occurred while closing tab: ", e)
-                    index, element = self.handleHistory(node, path, thisHistoryURL, thisHistoryLength, index, element=element)
+                    index, element = self.handleHistory(node, path, thisHandle, thisHistoryURL, thisHistoryLength, index, element=element)
                 except NoSuchElementException:
                     self.print_and_log("Loop element not found: ", path)
                     self.print_and_log("找不到循环元素:", path)
@@ -1447,6 +1471,7 @@ class BrowserThread(Thread):
                     code = get_output_code(output)
                     if code <= 0:
                         break
+                index, _ = self.handleHistory(node, "", thisHandle, thisHistoryURL, thisHistoryLength, index)
         elif int(node["parameters"]["loopType"]) == 4:  # 固定网址列表
             # tempList = node["parameters"]["textList"].split("\r\n")
             urlList = list(
@@ -1715,6 +1740,21 @@ class BrowserThread(Thread):
                 script = 'var result = document.evaluate(`' + path + \
                          '`, document, null, XPathResult.ANY_TYPE, null);for(let i=0;i<arguments[0];i++){result.iterateNext();} result.iterateNext().click();'
                 self.browser.execute_script(script, str(index))  # 用js的点击方法
+            elif click_way == 2: # 双击
+                try:
+                    actions = ActionChains(self.browser)  # 实例化一个action对象
+                    actions.double_click(element).perform()
+                except Exception as e:
+                    self.browser.execute_script("arguments[0].scrollIntoView();", element)
+                    try:
+                        actions = ActionChains(self.browser)  # 实例化一个action对象
+                        actions.double_click(element).perform()
+                    except Exception as e:
+                        self.print_and_log(f"Selenium双击元素{path}失败,将尝试使用JavaScript双击")
+                        self.print_and_log(f"Failed to double click element {path} with Selenium, will try to double click with JavaScript")
+                        script = 'var result = document.evaluate(`' + path + \
+                            '`, document, null, XPathResult.ANY_TYPE, null);for(let i=0;i<arguments[0];i++){result.iterateNext();} result.iterateNext().click();'
+                        self.browser.execute_script(script, str(index))  # 用js的点击方法
             self.recordLog("点击元素|Click element: " + path)
         except TimeoutException:
             self.print_and_log(
@@ -1797,7 +1837,6 @@ class BrowserThread(Thread):
                 self.print_and_log("History Length Error")
                 self.history["index"] = 0
         self.scrollDown(param)  # 根据参数配置向下滚动
-        # rt.end()
 
     def get_content(self, p, element):
         content = ""
@@ -1824,7 +1863,7 @@ class BrowserThread(Thread):
                     downloadPic = 0
                 if downloadPic == 1:
                     download_image(self, content, "Data/Task_" +
-                                   str(self.id) + "/" + self.saveName + "/", element)
+                                   str(self.id) + "/" + self.saveName + "/images", element)
             else:  # 普通节点
                 if p["splitLine"] == 1:
                     text = extract_text_from_html(element.get_attribute('outerHTML'))
@@ -1853,7 +1892,7 @@ class BrowserThread(Thread):
                     downloadPic = 0
                 if downloadPic == 1:
                     download_image(self, content, "Data/Task_" +
-                                   str(self.id) + "/" + self.saveName + "/", element)
+                                   str(self.id) + "/" + self.saveName + "/images", element)
             else:
                 command = 'var arr = [];\
                 var content = arguments[0];\
@@ -1965,6 +2004,8 @@ class BrowserThread(Thread):
                 content = element.get_attribute(attribute_name)
             except:
                 content = ""
+        elif p["contentType"] == 15:  # 常量值
+            content = p["JS"]
         if content == None:
             content = ""
         return content
@@ -2208,7 +2249,7 @@ if __name__ == '__main__':
         "server_address": "http://localhost:8074",
         "keyboard": True,  # 是否监听键盘输入
         "pause_key": "p",  # 暂停键
-        "version": "0.6.0",
+        "version": "0.6.2",
     }
     c = Config(config)
     print(c)
@@ -2283,7 +2324,9 @@ if __name__ == '__main__':
 
     options.add_argument(
         "--disable-blink-features=AutomationControlled")  # TMALL 反扒
-
+    # 阻止http -> https的重定向
+    options.add_argument("--disable-features=CrossSiteDocumentBlockingIfIsolating,CrossSiteDocumentBlockingAlways,IsolateOrigins,site-per-process")
+    options.add_argument("--disable-web-security")  # 禁用同源策略
     options.add_argument('-ignore-certificate-errors')
     options.add_argument('-ignore -ssl-errors')
 
@@ -2370,8 +2413,8 @@ if __name__ == '__main__':
             cloudflare = 0
         if cloudflare == 0:
             options.add_argument('log-level=3')  # 隐藏日志
-            path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(id))
-            print("Data path:", path)
+            path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(id), "files")
+            print("文件下载路径|File Download path:", path)
             options.add_experimental_option("prefs", {
                 # 设置文件下载路径
                 "download.default_directory": path,

+ 34 - 3
.temp_to_pub/EasySpider_windows_x64/Code/utils.py

@@ -59,7 +59,31 @@ def send_email(config):
             smtp_server.quit()
         except:
             pass
+  
+def rename_downloaded_file(download_dir, stop_event):
+    original_files = set(os.listdir(download_dir))
 
+    while not stop_event.is_set():
+        files = os.listdir(download_dir)
+        for file in files:
+            if file in original_files:
+                continue  # 跳过原始文件和已重命名的文件
+
+            full_path = os.path.join(download_dir, file)
+
+            if not full_path.endswith('.crdownload') and not full_path.endswith('.htm') and not full_path.endswith('.html') and not full_path.startswith('esfile_'):
+                new_name = "esfile_" + file.split('/')[-1] + '_' + str(uuid.uuid4()) + '_' + file.split('/')[-1]
+                new_path = os.path.join(download_dir, new_name)
+                try:
+                    os.rename(full_path, new_path)
+                    original_files.add(new_name)  # 记录新文件名以避免再次重命名
+                    print(f"文件已重命名为|File has been renamed to: {new_path}")
+                except:
+                    print("文件重命名失败|File rename failed")
+
+        time.sleep(1)  # 每一秒检查一次
+        # print("下载文件重命名监控中,请等待...|Download file rename monitoring, please wait...")
+    print("下载文件重命名监控已停止。|Download file rename monitoring has stopped.")
 
 def is_valid_url(url):
     try:
@@ -505,10 +529,17 @@ def write_to_excel(file_name, data, types, record):
         for i in range(len(line)):
             if record[i]:
                 to_write.append(line[i])
-        ws.append(to_write)
+        try:
+            ws.append(to_write)
+        except:
+            print("写入Excel文件失败,请检查数据类型是否正确。")
+            print("Failed to write to Excel file, please check if the data type is correct.")
     # 保存工作簿
-    wb.save(file_name)
-
+    try:
+        wb.save(file_name)
+    except:
+        print("保存Excel文件失败,请检查文件是否被其他程序打开。")
+        print("Failed to save Excel file, please check if the file is opened by other programs.")
 
 class Time:
     def __init__(self, type1=""):

+ 18 - 1
.temp_to_pub/EasySpider_windows_x64/myCode.py

@@ -23,7 +23,7 @@ For more complex operations, please download the source code and compile it for
 """
 
 # 请在下面编写你的代码,不要有代码缩进!!! | Please write your code below, do not indent the code!!!
-
+print(globals())
 # 导包 | Import packages
 from selenium.common.exceptions import ElementClickInterceptedException
 
@@ -56,3 +56,20 @@ finally:
     print("All parameters:", self.outputParameters)
     print(test(3))
     print("执行完毕|Execution completed")
+
+import time
+time.sleep(3)
+
+def new_line(outputParameters, maxViewLength, record):
+    line = []
+    print("Use this function to print a new line in the console")
+    i = 0
+    for value in outputParameters.values():
+        line.append(value)
+        if record[i]:
+            print(value[:maxViewLength], " ", end="")
+        i += 1
+    print("")
+    return line
+
+new_line(self.outputParameters, 10, [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True])

File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/tasks/112.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/tasks/212.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/tasks/228.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/tasks/229.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/tasks/295.json


+ 1 - 1
.temp_to_pub/EasySpider_windows_x64/tasks/70.json

@@ -1 +1 @@
-{"id":70,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/24/2023, 8:21:45 PM","version":"0.3.1","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":7,"title":"移动到元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":2,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"allXPaths":"","loopType":1}}]}
+{"id":-2,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/24/2023, 8:21:45 PM","version":"0.3.1","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":7,"title":"移动到元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":2,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"allXPaths":"","loopType":1}}]}

File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/tasks/95.json


+ 1 - 1
.temp_to_pub/compress.py

@@ -64,7 +64,7 @@ def compress_folder_to_7z_split(folder_path, output_file):
         except:
             subprocess.call(["7zz", "a", "-v95m", output_file, folder_path])
 
-easyspider_version = "0.6.0"
+easyspider_version = "0.6.2"
 
 if __name__ == "__main__":
 

BIN
ElectronJS/EasySpider_en.crx


BIN
ElectronJS/EasySpider_zh.crx


+ 1 - 1
ElectronJS/change_version.py

@@ -30,7 +30,7 @@ def update_file_version(file_path, new_version, key="当前版本/Current Versio
             file.write(line)
 
 
-version = "0.6.0"
+version = "0.6.2"
 
 # py html js
 

+ 17 - 1
ElectronJS/main.js

@@ -651,7 +651,11 @@ async function beginInvoke(msg, ws) {
                         if (parameters.xpath.includes("point(")) {
                             await click_element(element, point);
                         } else {
-                            await click_element(element);
+                            if (parameters.clickWay == 2){ //双击
+                                await click_element(element, "double");
+                            } else {
+                                await click_element(element); //单击
+                            }
                         }
                         let alertHandleType = parameters.alertHandleType;
                         if (alertHandleType == 1) {
@@ -1002,6 +1006,14 @@ async function beginInvoke(msg, ws) {
                                 "Attribute value obtained: " + result,
                                 "success"
                             );
+                        } else if(param.contentType == 15) {
+                            //元素的属性值
+                            let result = param.JS;
+                            notify_browser(
+                                "获取的常量值:" + result,
+                                "Constant value obtained: " + result,
+                                "success"
+                            );
                         } else {
                             //其他暂不支持
                             notify_browser(
@@ -1130,6 +1142,8 @@ async function click_element(element, type = "click") {
             // await actions.click().perform();
             let script = `document.elementFromPoint(${x}, ${y}).click();`;
             await driver.executeScript(script);
+        } else if (type == "double") {
+            await driver.actions().doubleClick(element).perform();
         } else {
             await element.click();
         }
@@ -1341,6 +1355,8 @@ async function runBrowser(lang = "en", user_data_folder = "", mobile = false) {
     let options = new chrome.Options();
     options.addArguments("--disable-blink-features=AutomationControlled");
     options.addArguments("--disable-infobars");
+    options.addArguments("--disable-web-security");
+    options.addArguments("--disable-features=CrossSiteDocumentBlockingIfIsolating,CrossSiteDocumentBlockingAlways,IsolateOrigins,site-per-process");
     // 添加实验性选项以排除'enable-automation'开关
     options.set("excludeSwitches", ["enable-automation"]);
     options.excludeSwitches("enable-automation");

+ 3 - 3
ElectronJS/package.json

@@ -1,7 +1,7 @@
 {
     "name": "easy-spider",
     "productName": "EasySpider",
-    "version": "0.6.0",
+    "version": "0.6.2",
     "icon": "./favicon",
     "description": "NoCode Visual Web Crawler",
     "main": "main.js",
@@ -67,7 +67,7 @@
             ],
             "packagerConfig": {
                 "icon": "./favicon",
-                "appVersion": "0.6.0",
+                "appVersion": "0.6.2",
                 "name": "EasySpider",
                 "executableName": "EasySpider",
                 "appCopyright": "Naibo Wang ([email protected])",
@@ -80,4 +80,4 @@
             "publishers": []
         }
     }
-}
+}

+ 1 - 1
ElectronJS/src/index.html

@@ -40,7 +40,7 @@
 
         <p><a @click="changeLang('en')" class="btn btn-outline-primary btn-lg"
               style="margin-top: 15px; width: 300px;height:60px;padding-top:12px;">English</a></p>
-        <p style="font-size: 17px">当前版本/Current Version: <b>v0.6.0</b></p>
+        <p style="font-size: 17px">当前版本/Current Version: <b>v0.6.2</b></p>
         <p style="font-size: 17px"><a href="https://github.com/NaiboWang/EasySpider/releases"
                                       target="_blank">Github</a>最新版本/Newest Version:<b>{{newest_version}}</b></p>
         <!--        <p>如发现新版本更新,可从以下Github仓库下载最新版本使用/If a new version is found, you can download the latest version from the following Github repository:</p>-->

+ 8 - 2
ElectronJS/src/taskGrid/FlowChart.html

@@ -170,10 +170,11 @@
                     </div>
                     <label>Maximum wait time for page load after clicking (in seconds):</label>
                     <input spellcheck=false onkeydown="inputDelete(event)" class="form-control" v-model.number="nowNode['parameters']['maxWaitTime']" type="number" required></input>
-                    <label>Click Type:</label>
+                    <label>Click Type (including double-click):</label>
                     <select v-model='nowNode["parameters"]["clickWay"]' class="form-control">
                         <option :value = 0>Selenium</option>
                         <option :value = 1>JavaScript</option>
+                        <option :value = 2>Double-click</option>
                     </select>
                     <label>Open link in new tab:</label>
                     <select v-model='nowNode["parameters"]["newTab"]' class="form-control">
@@ -271,6 +272,7 @@
                             <option :value = 4>Background Image Address</option>
                             <option :value = 5>Webpage URL</option>
                             <option :value = 6>Webpage Title</option>
+                            <option :value = 15>Constant String</option>
                             <option :value = 7>Element Screenshot</option>
                             <option :value = 8>OCR Results</option>
                             <option :value = 14>Properties of elements</option>
@@ -280,7 +282,11 @@
                             <option :value = 10>Selected value of the current select box</option>
                             <option :value = 11>Selected text of the current select box</option>
                         </select>
-                        <div v-if='params.parameters[paraIndex]["contentType"] == 14'>
+                        <div v-if='params.parameters[paraIndex]["contentType"] == 15'>
+                            <label>Constant String:</label>
+                            <input spellcheck=false onkeydown="inputDelete(event)" class="form-control" v-model='params.parameters[paraIndex]["JS"]' placeholder="This field type is usually used for remarks"></input>
+                        </div>
+                        <div v-else-if='params.parameters[paraIndex]["contentType"] == 14'>
                             <label>Attribute Name:</label>
                             <input spellcheck=false onkeydown="inputDelete(event)" class="form-control" v-model='params.parameters[paraIndex]["JS"]' placeholder="Attribute names, such as href to represent the href attribute of the current element, that is, the link address."></input>
                         </div>

+ 8 - 2
ElectronJS/src/taskGrid/FlowChart_CN.html

@@ -170,10 +170,11 @@
                     </div>
                     <label>点击后页面加载最长等待时间(秒):</label>
                     <input spellcheck=false onkeydown="inputDelete(event)" class="form-control" v-model.number="nowNode['parameters']['maxWaitTime']" type="number" required></input>
-                    <label>点击类型:</label>
+                    <label>点击类型(如是否双击):</label>
                     <select v-model='nowNode["parameters"]["clickWay"]' class="form-control">
                         <option :value = 0>Selenium点击</option>
                         <option :value = 1>JavaScript点击</option>
+                        <option :value = 2>双击</option>
                     </select>
                     <label>在新标签页打开超链接:</label>
                     <select v-model='nowNode["parameters"]["newTab"]' class="form-control">
@@ -271,6 +272,7 @@
                             <option :value = 4>背景图片地址</option>
                             <option :value = 5>页面网址</option>
                             <option :value = 6>页面标题</option>
+                            <option :value = 15>常量字符串</option>
                             <option :value = 7>元素截图</option>
                             <option :value = 8>OCR识别文字</option>
                             <option :value = 14>元素的属性值</option>
@@ -280,7 +282,11 @@
                             <option :value = 10>当前选择框选中的选项值</option>
                             <option :value = 11>当前选择框选中的选项文本</option>
                         </select>
-                        <div v-if='params.parameters[paraIndex]["contentType"] == 14'>
+                        <div v-if='params.parameters[paraIndex]["contentType"] == 15'>
+                            <label>常量字符串:</label>
+                            <input spellcheck=false onkeydown="inputDelete(event)" class="form-control" v-model='params.parameters[paraIndex]["JS"]' placeholder="此字段类型通常作为备注使用"></input>
+                        </div>
+                        <div v-else-if='params.parameters[paraIndex]["contentType"] == 14'>
                             <label>属性名称:</label>
                             <input spellcheck=false onkeydown="inputDelete(event)" class="form-control" v-model='params.parameters[paraIndex]["JS"]' placeholder="属性名称,如class表示当前元素的class属性值,即元素所拥有的类名。"></input>
                         </div>

+ 3 - 3
ElectronJS/src/taskGrid/logic.js

@@ -446,7 +446,7 @@ function modifyParameters(t, param) {
     }
 }
 
-function showSuccess(msg, time = 4000) {
+function showSuccess(msg, time = 1000) {
     $("#tip").text(msg);
     $("#tip").slideDown(); //提示框
     let fadeout = setTimeout(function () {
@@ -491,7 +491,7 @@ if (mobile == "true") {
 }
 
 let serviceInfo = {
-    "version": "0.6.0"
+    "version": "0.6.2"
 };
 
 function saveService(type) {
@@ -625,7 +625,7 @@ function saveService(type) {
         "links": links,
         "create_time": $("#create_time").val(),
         "update_time": formatDateTime(new Date()),
-        "version": "0.6.0",
+        "version": "0.6.2",
         "saveThreshold": saveThreshold,
         // "cloudflare": cloudflare,
         "quitWaitTime": parseInt($("#quitWaitTime").val()),

+ 1 - 1
ElectronJS/src/taskGrid/newTask.html

@@ -33,7 +33,7 @@
             <h4 style="text-align: center;">{{"New Task~新任务" | lang}}</h4>
             <div class="form-group">
                 <label>{{"Please Input URL (http or https):~请输入网页网址(以http或https开头):" | lang}}  </label>
-                <textarea class="form-control" id="links" placeholder="links" style="min-height: 100px;">{{"https://www.ebay.com~https://www.jd.com" | lang}}</textarea>
+                <textarea class="form-control" id="links" placeholder="links" style="min-height: 100px;">{{"https://www.ebay.com~https://www.baidu.com" | lang}}</textarea>
             </div>
             <button type="submit" id="send" class="btn btn-primary">{{"Start Design~开始设计" | lang}}</button>
 <!--            <div class="form-group" style="margin-top: 10px">-->

File diff suppressed because it is too large
+ 0 - 0
ElectronJS/tasks/318.json


+ 1 - 0
ElectronJS/tasks/319.json

@@ -0,0 +1 @@
+{"id":-2,"name":"百度一下,你就知道","url":"https://www.baidu.com?id=1","links":"https://www.baidu.com?id=11\nhttps://www.baidu.com?id=12","create_time":"2024-04-22 05:45:12","update_time":"2024-04-22 05:45:20","version":"0.6.2","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"csv","saveName":"current_time","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","removeDuplicate":0,"desc":"https://www.baidu.com?id=1","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.baidu.com?id=11\nhttps://www.baidu.com?id=12","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.baidu.com?id=11\nhttps://www.baidu.com?id=12"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.baidu.com?id=1","links":"https://www.baidu.com?id=11\nhttps://www.baidu.com?id=12","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}}]}

File diff suppressed because it is too large
+ 0 - 0
ElectronJS/tasks/320.json


+ 1 - 1
ExecuteStage/.vscode/launch.json

@@ -12,7 +12,7 @@
             "justMyCode": false,
             //  "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
             // "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
-            "args": ["--ids", "[79]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
+            "args": ["--ids", "[83]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
         "--read_type", "remote"]
             // "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
         }

+ 21 - 2
ExecuteStage/easyspider_executestage.py

@@ -1740,6 +1740,21 @@ class BrowserThread(Thread):
                 script = 'var result = document.evaluate(`' + path + \
                          '`, document, null, XPathResult.ANY_TYPE, null);for(let i=0;i<arguments[0];i++){result.iterateNext();} result.iterateNext().click();'
                 self.browser.execute_script(script, str(index))  # 用js的点击方法
+            elif click_way == 2: # 双击
+                try:
+                    actions = ActionChains(self.browser)  # 实例化一个action对象
+                    actions.double_click(element).perform()
+                except Exception as e:
+                    self.browser.execute_script("arguments[0].scrollIntoView();", element)
+                    try:
+                        actions = ActionChains(self.browser)  # 实例化一个action对象
+                        actions.double_click(element).perform()
+                    except Exception as e:
+                        self.print_and_log(f"Selenium双击元素{path}失败,将尝试使用JavaScript双击")
+                        self.print_and_log(f"Failed to double click element {path} with Selenium, will try to double click with JavaScript")
+                        script = 'var result = document.evaluate(`' + path + \
+                            '`, document, null, XPathResult.ANY_TYPE, null);for(let i=0;i<arguments[0];i++){result.iterateNext();} result.iterateNext().click();'
+                        self.browser.execute_script(script, str(index))  # 用js的点击方法
             self.recordLog("点击元素|Click element: " + path)
         except TimeoutException:
             self.print_and_log(
@@ -1989,6 +2004,8 @@ class BrowserThread(Thread):
                 content = element.get_attribute(attribute_name)
             except:
                 content = ""
+        elif p["contentType"] == 15:  # 常量值
+            content = p["JS"]
         if content == None:
             content = ""
         return content
@@ -2232,7 +2249,7 @@ if __name__ == '__main__':
         "server_address": "http://localhost:8074",
         "keyboard": True,  # 是否监听键盘输入
         "pause_key": "p",  # 暂停键
-        "version": "0.6.0",
+        "version": "0.6.2",
     }
     c = Config(config)
     print(c)
@@ -2307,7 +2324,9 @@ if __name__ == '__main__':
 
     options.add_argument(
         "--disable-blink-features=AutomationControlled")  # TMALL 反扒
-
+    # 阻止http -> https的重定向
+    options.add_argument("--disable-features=CrossSiteDocumentBlockingIfIsolating,CrossSiteDocumentBlockingAlways,IsolateOrigins,site-per-process")
+    options.add_argument("--disable-web-security")  # 禁用同源策略
     options.add_argument('-ignore-certificate-errors')
     options.add_argument('-ignore -ssl-errors')
 

+ 1 - 1
Extension/manifest_v3/package.json

@@ -1,6 +1,6 @@
 {
     "name": "EasySpider",
-    "version": "0.6.0",
+    "version": "0.6.2",
     "type": "module",
     "scripts": {
         "build": "rollup -c",

+ 1 - 1
Extension/manifest_v3/src/manifest.json

@@ -1,6 +1,6 @@
 {
     "name": "EasySpider",
-    "version": "0.6.0",
+    "version": "0.6.2",
     "description": "EasySpider's chrome extension",
     "author": "Naibo Wang",
     "manifest_version": 3,

Some files were not shown because too many files changed in this diff