Selaa lähdekoodia

UC Update Iframe

naibo 2 vuotta sitten
vanhempi
sitoutus
751fa6e055
32 muutettua tiedostoa jossa 190 lisäystä ja 51 poistoa
  1. 11 16
      .temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py
  2. 32 5
      .temp_to_pub/EasySpider_windows_x64/Code/utils.py
  3. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/0.json
  4. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/1.json
  5. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/10.json
  6. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/11.json
  7. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/2.json
  8. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/3.json
  9. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/4.json
  10. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/5.json
  11. 1 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/6.json
  12. 1 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/7.json
  13. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/8.json
  14. 0 0
      .temp_to_pub/EasySpider_windows_x64/execution_instances/9.json
  15. 0 0
      .temp_to_pub/EasySpider_windows_x64/tasks/108.json
  16. 0 0
      .temp_to_pub/EasySpider_windows_x64/tasks/109.json
  17. 0 0
      .temp_to_pub/EasySpider_windows_x64/tasks/110.json
  18. 1 0
      .temp_to_pub/EasySpider_windows_x64/tasks/111.json
  19. 0 0
      .temp_to_pub/EasySpider_windows_x64/tasks/112.json
  20. 0 0
      .temp_to_pub/EasySpider_windows_x64/tasks/113.json
  21. 2 0
      ElectronJS/.gitignore
  22. BIN
      ElectronJS/EasySpider_en.crx
  23. BIN
      ElectronJS/EasySpider_zh.crx
  24. 4 5
      ElectronJS/src/taskGrid/FlowChart.html
  25. 1 1
      ElectronJS/src/taskGrid/FlowChart.js
  26. 6 7
      ElectronJS/src/taskGrid/FlowChart_CN.html
  27. 2 2
      ElectronJS/src/taskGrid/invokeTask.html
  28. 1 1
      ExecuteStage/.vscode/launch.json
  29. 3 3
      ExecuteStage/easyspider_executestage.py
  30. 76 2
      ExecuteStage/myChrome.py
  31. 32 5
      ExecuteStage/utils.py
  32. 17 4
      Extension/manifest_v3/package.js

+ 11 - 16
.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py

@@ -242,7 +242,7 @@ class BrowserThread(Thread):
                     str(self.id) + "/" + self.saveName + '.xlsx'
                 write_to_excel(file_name, self.OUTPUT, self.outputParametersTypes, self.outputParametersRecord)
             elif self.outputFormat == "mysql":
-                self.mysql.write_to_mysql(self.OUTPUT, self.outputParametersRecord)
+                self.mysql.write_to_mysql(self.OUTPUT, self.outputParametersRecord, self.outputParametersTypes)
                 
             self.OUTPUT = []
             self.log = ""
@@ -1345,19 +1345,19 @@ if __name__ == '__main__':
         if sys.platform == "win32" and platform.architecture()[0] == "32bit":
             options.binary_location = os.path.join(
                 os.getcwd(), "EasySpider/resources/app/chrome_win32/chrome.exe")  # 指定chrome位置
-            options.add_extension("EasySpider/resources/app/XPathHelper.crx")
             driver_path = os.path.join(
                 os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe")
+            option.add_extension("EasySpider/resources/app/XPathHelper.crx")
         elif sys.platform == "win32" and platform.architecture()[0] == "64bit":
             options.binary_location = os.path.join(
                 os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
             driver_path = os.path.join(
                 os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe")
-            options.add_extension("EasySpider/resources/app/XPathHelper.crx")
+            option.add_extension("EasySpider/resources/app/XPathHelper.crx")
         elif sys.platform == "linux" and platform.architecture()[0] == "64bit":
             options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
-            options.add_extension("EasySpider/resources/app/XPathHelper.crx")
             driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64"
+            option.add_extension("EasySpider/resources/app/XPathHelper.crx")
         else:
             print("Unsupported platform")
             sys.exit()
@@ -1370,21 +1370,16 @@ if __name__ == '__main__':
     #     # option.binary_location = "C:\\Users\\q9823\\AppData\\Local\\Google\\Chrome\\Application\\chrome.exe"
     #     driver_path = "./Chrome/chromedriver.exe"
     elif os.path.exists(os.getcwd()+"/../ElectronJS"):
-        if os.getcwd().find("ElectronJS") >= 0:  # 软件dev用
-            print("Finding chromedriver in EasySpider",
-                  os.getcwd())
-            options.binary_location = "chrome_win64/chrome.exe"
-            driver_path = "chrome_win64/chromedriver_win64.exe"
-            options.add_extension("../ElectronJS/XPathHelper.crx")
-        else:  # 直接在executeStage文件夹内使用python easyspider_executestage.py时的路径
-            print("Finding chromedriver in EasySpider",
-                  os.getcwd()+"/ElectronJS")
-            option.binary_location = "../ElectronJS/chrome_win64/chrome.exe"  # 指定chrome位置
-            driver_path = "../ElectronJS/chrome_win64/chromedriver_win64.exe"
-            option.add_extension("../ElectronJS/XPathHelper.crx")
+        # 软件dev用
+        print("Finding chromedriver in EasySpider",
+               os.getcwd()+"/ElectronJS")
+        option.binary_location = "../ElectronJS/chrome_win64/chrome.exe"  # 指定chrome位置
+        driver_path = "../ElectronJS/chrome_win64/chromedriver_win64.exe"
+        option.add_extension("../ElectronJS/XPathHelper.crx")
     else:
         options.binary_location = "./chrome.exe"  # 指定chrome位置
         driver_path = "./chromedriver.exe"
+        option.add_extension("XPathHelper.crx")
 
     option.add_experimental_option(
         'excludeSwitches', ['enable-automation'])  # 以开发者模式

+ 32 - 5
.temp_to_pub/EasySpider_windows_x64/Code/utils.py

@@ -1,6 +1,7 @@
 # 控制流程的暂停和继续
 
 import csv
+import datetime
 import json
 import os
 import time
@@ -228,15 +229,41 @@ class myMySQL:
             print("The data table " + table_name + " already exists.")
         cursor.close()
 
-    def write_to_mysql(self, OUTPUT, record):
+    def write_to_mysql(self, OUTPUT, record, types):
         # 创建一个游标对象
         cursor = self.conn.cursor()
 
-        for row in OUTPUT:
+        for line in OUTPUT:
+            for i in range(len(line)):
+                if types[i] == "int" or types[i] == "bigInt":
+                    try:
+                        line[i] = int(line[i])
+                    except:
+                        line[i] = 0
+                elif types[i] == "double":
+                    try:
+                        line[i] = float(line[i])
+                    except:
+                        line[i] = 0.0
+                elif types[i] == "datetime":
+                    try:
+                        line[i] = datetime.datetime.strptime(line[i], '%Y-%m-%d %H:%M:%S')
+                    except:
+                        line[i] = datetime.datetime.strptime("1970-01-01 00:00:00", '%Y-%m-%d %H:%M:%S')
+                elif types[i] == "date":
+                    try:
+                        line[i] = datetime.datetime.strptime(line[i], '%Y-%m-%d')
+                    except:
+                        line[i] = datetime.datetime.strptime("1970-01-01", '%Y-%m-%d')
+                elif types[i] == "time":
+                    try:
+                        line[i] = datetime.datetime.strptime(line[i], '%H:%M:%S')
+                    except:
+                        line[i] = datetime.datetime.strptime("00:00:00", '%H:%M:%S')
             to_write = []
-            for i in range(len(row)):
+            for i in range(len(line)):
                 if record[i]:
-                    to_write.append(row[i])
+                    to_write.append(line[i])
             # 构造插入数据的 SQL 语句
             sql = f"INSERT INTO "+ self.table_name +" "+self.field_sql+" VALUES ("
             for item in to_write:
@@ -248,7 +275,7 @@ class myMySQL:
                 cursor.execute(sql, to_write)
             except Exception as e:
                 print("Error:", e)
-                # print("Error SQL:", sql)
+                print("Error SQL:", sql, to_write)
                 print("插入数据库错误,请查看以上的错误提示,然后检查数据的类型是否正确,是否文本过长(超过一万的文本类型要设置为大文本)。")
                 print("Inserting database error, please check the above error, and then check whether the data type is correct, whether the text is too long (text type over 10,000 should be set to large text).")
                 print("重新执行任务时,请删除数据库中的数据表" + self.table_name + ",然后再次运行程序。")

Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/0.json


Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/1.json


Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/10.json


Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/11.json


Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/2.json


Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/3.json


Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/4.json


Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/5.json


+ 1 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/6.json

@@ -0,0 +1 @@
+{"id":6,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/8/2023, 7:54:10 AM","update_time":"7/8/2023, 7:54:10 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":2,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"sadf<enter>","value":"sadf<enter>"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"sadf<enter>","allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/input[1]","//input[contains(., '')]","id(\"key\")","//INPUT[@class='text defcolor']","/html/body/div[last()-6]/div/div[last()-2]/div/input"]}}]}

+ 1 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/7.json

@@ -0,0 +1 @@
+{"id":7,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/8/2023, 7:54:10 AM","update_time":"7/8/2023, 7:54:46 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":2,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"sadf<enter>","value":"sadf<enter>"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","iframe":false,"wait":5,"waitType":"1","beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"sadf<enter>","allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/input[1]","//input[contains(., '')]","id(\"key\")","//INPUT[@class='text defcolor']","/html/body/div[last()-6]/div/div[last()-2]/div/input"]}}]}

Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/8.json


Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/9.json


Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/tasks/108.json


Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/tasks/109.json


Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/tasks/110.json


+ 1 - 0
.temp_to_pub/EasySpider_windows_x64/tasks/111.json

@@ -0,0 +1 @@
+{"id":111,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/8/2023, 7:54:10 AM","update_time":"7/8/2023, 7:54:46 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":2,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"sadf<enter>","value":"sadf<enter>"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","iframe":false,"wait":5,"waitType":"1","beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"sadf<enter>","allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/input[1]","//input[contains(., '')]","id(\"key\")","//INPUT[@class='text defcolor']","/html/body/div[last()-6]/div/div[last()-2]/div/input"]}}]}

Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/tasks/112.json


Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/tasks/113.json


+ 2 - 0
ElectronJS/.gitignore

@@ -15,3 +15,5 @@ execution_instances/*
 info.log
 npminstall-debug.log
 mysql_config.json
+EasySpider_en/
+EasySpider_zh/

BIN
ElectronJS/EasySpider_en.crx


BIN
ElectronJS/EasySpider_zh.crx


+ 4 - 5
ElectronJS/src/taskGrid/FlowChart.html

@@ -265,6 +265,8 @@
                             <option :value = 10>Selected value of the current select box</option>
                             <option :value = 11>Selected text of the current select box</option>
                         </select>
+                        <label>Default value when cannot find this element:</label>
+                        <input onkeydown="inputDelete(event)" class="form-control" v-model='paras.parameters[paraIndex]["default"]'></textarea>
                         <div v-if='paras.parameters[paraIndex]["contentType"] == 9'>
                             <label>JavaScript Code (Use Field["FieldName"] to input the last extracted value of a field): </label>
                             <textarea onkeydown="inputDelete(event)" class="form-control" rows="2"
@@ -299,9 +301,6 @@
                         </select>
                         <label>Parameter Description:</label>
                         <textarea onkeydown="inputDelete(event)" class="form-control" style="min-height: 60px" v-model='paras.parameters[paraIndex]["desc"]'></textarea>
-                        <label>Default value when cannot find this element:</label>
-                        <input onkeydown="inputDelete(event)" class="form-control" v-model='paras.parameters[paraIndex]["default"]'></textarea>
-
                     </div>
 
                 </div>
@@ -578,8 +577,8 @@
 
                 </div>
                 <div class="modal-footer">
-                    <button type="button" id="saveAsButton" class="btn btn-outline-primary">Save as</button>
-                    <button type="button" id="saveButton" class="btn btn-primary">Save</button>
+                    <button type="button" id="saveAsButton" style="width: 100px" class="btn btn-outline-primary">Save as</button>
+                    <button type="button" id="saveButton" style="width: 100px" class="btn btn-primary">Save</button>
                 </div>
             </div>
             <!-- /.modal-content -->

+ 1 - 1
ElectronJS/src/taskGrid/FlowChart.js

@@ -101,7 +101,7 @@ let app = new Vue({
                 if(newVal == 3){
                     this.nowNode["title"] = LANG("退出循环", "Exit Loop");
                 } else {
-                    this.nowNode["title"] = LANG("自定义操作", "Custom Operation");
+                    this.nowNode["title"] = LANG("自定义操作", "Custom Action");
                 }
             }
         }

+ 6 - 7
ElectronJS/src/taskGrid/FlowChart_CN.html

@@ -249,6 +249,8 @@
                             <option value = "longText">超大文本(单个值长度超过100万)</option>
                             <option value = "bigInt">大整数(位数超过9位)</option>
                         </select>
+                        <label>元素找不到时的值:</label>
+                        <input onkeydown="inputDelete(event)" class="form-control" v-model='paras.parameters[paraIndex]["default"]'></input>
                         <label>采集内容类型</label>
                         <select v-model='paras.parameters[paraIndex]["contentType"]' class="form-control">
                             <option :value = 0>文本(包括子元素)</option>
@@ -287,7 +289,7 @@
                                 <option :value = 1>是</option>
                             </select>
                         </div>
-<!--                        <label>提取方式</label>-->
+                        <!--                        <label>提取方式</label>-->
 <!--                        <select v-model='paras.parameters[paraIndex]["extractType"]' class="form-control">-->
 <!--                            <option :value = 0>普通提取</option>-->
 <!--                            <option :value = 1>OCR提取</option>-->
@@ -298,10 +300,7 @@
                             <option :value = 0>否</option>
                         </select>
                         <label>参数描述:</label>
-                        <textarea onkeydown="inputDelete(event)" class="form-control" style="min-height: 60px" v-model='paras.parameters[paraIndex]["desc"]'></textarea>
-                        <label>元素找不到时的值:</label>
-                        <input onkeydown="inputDelete(event)" class="form-control" v-model='paras.parameters[paraIndex]["default"]'></textarea>
-
+                        <input onkeydown="inputDelete(event)" class="form-control" style="min-height: 60px" v-model='paras.parameters[paraIndex]["desc"]'></input>
                     </div>
 
                 </div>
@@ -578,8 +577,8 @@
 
                 </div>
                 <div class="modal-footer">
-                    <button type="button" id="saveAsButton" class="btn btn-outline-primary">另存为</button>
-                    <button type="button" id="saveButton" class="btn btn-primary">保存</button>
+                    <button type="button" id="saveAsButton" style="width: 100px" class="btn btn-outline-primary">另存为</button>
+                    <button type="button" id="saveButton" style="width: 100px" class="btn btn-primary">保存</button>
                 </div>
             </div>
             <!-- /.modal-content -->

+ 2 - 2
ElectronJS/src/taskGrid/invokeTask.html

@@ -213,7 +213,7 @@
                 <input type="text" class="form-control" v-model="mysql_config_path"></input>
             </div>
         </form>
-        <label>{{"Click the button below to execute the task. Manual intervention is possible during the task execution process, ~点击以下按钮执行任务,任务执行过程中可以" | lang }}<b>{{"~人工干预," | lang}}</b>{{"such as manually input a password or captcha (Note: set a waiting time in the task flow for manual intervention): ~如手动输入密码,验证码等(注意任务流程中设定好操作后的等待时间以用来手工干预):" | lang}}</label>
+        <label style="display: block">{{"Click the button below to execute the task. Click p on the keyboard to pause the task. Manual intervention is possible during the task execution process, ~点击以下按钮执行任务,任务执行过程中可以按p键暂停任务的执行以便" | lang }}<b>{{"~人工干预," | lang}}</b>{{"such as manually input a password or captcha: ~如手动输入密码,验证码等。" | lang}}</label>
         <button class="btn btn-primary" v-on:click="localExecuteInstant(false)">{{"Directly Run Locally (Clean Mode)~本地直接执行(纯净模式)" |
             lang}}
         </button>
@@ -221,7 +221,7 @@
             lang}}
         </button>
         <!-- <button style="margin-left: 5px;" v-on:click="remoteExcuteInstant" class="btn btn-primary">Directly Run Remotely</button> -->
-        <label style="margin-top: 15px">{{"When running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.:~任务执行过程中,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。" | lang}}</label>
+        <label style="margin-top: 15px;display: block">{{"You can also use the XPath Helper extension to test XPaths when executing the task:~执行任务的过程中也可以随时使用XPath Helper扩展来调试XPath。" | lang}}</label>
         <div style="margin-bottom: 10px;">
             <label style="margin-top: 10px;">{{"Execution ID (EID):~执行ID:" | lang}}</label>
             <input class="form-control" v-model="ID"></input>

+ 1 - 1
ExecuteStage/.vscode/launch.json

@@ -12,7 +12,7 @@
             "justMyCode": true,
             //  "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
             // "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
-            "args": ["--id", "[0]", "--headless", "0", "--user_data", "1"]
+            "args": ["--id", "[11]", "--headless", "0", "--user_data", "1"]
         }
     ]
 }

+ 3 - 3
ExecuteStage/easyspider_executestage.py

@@ -41,7 +41,7 @@ import pytesseract
 from PIL import Image
 # import uuid
 from threading import Thread, Event
-from myChrome import MyChrome
+from myChrome import MyChrome, MyUCChrome
 from utils import check_pause, download_image, get_output_code, isnull, myMySQL, new_line, write_to_csv, write_to_excel
 desired_capabilities = DesiredCapabilities.CHROME
 desired_capabilities["pageLoadStrategy"] = "none"
@@ -242,7 +242,7 @@ class BrowserThread(Thread):
                     str(self.id) + "/" + self.saveName + '.xlsx'
                 write_to_excel(file_name, self.OUTPUT, self.outputParametersTypes, self.outputParametersRecord)
             elif self.outputFormat == "mysql":
-                self.mysql.write_to_mysql(self.OUTPUT, self.outputParametersRecord)
+                self.mysql.write_to_mysql(self.OUTPUT, self.outputParametersRecord, self.outputParametersTypes)
                 
             self.OUTPUT = []
             self.log = ""
@@ -1473,7 +1473,7 @@ if __name__ == '__main__':
             browser_t = MyChrome(
                 options=options, chrome_options=option, executable_path=driver_path)
         elif cloudflare == 1:
-            browser_t = uc.Chrome(
+            browser_t = MyUCChrome(
                 options=options, chrome_options=option, executable_path=driver_path)
             print("Pass Cloudflare Mode")
             print("过Cloudflare验证模式")

+ 76 - 2
ExecuteStage/myChrome.py

@@ -1,5 +1,3 @@
-
-
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.common.action_chains import ActionChains
@@ -14,10 +12,12 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 from selenium.webdriver.support.ui import Select
 from selenium.webdriver import ActionChains
 from selenium.webdriver.common.by import By
+import undetected_chromedriver as uc
 desired_capabilities = DesiredCapabilities.CHROME
 desired_capabilities["pageLoadStrategy"] = "none"
 
 
+
 class MyChrome(webdriver.Chrome):
 
     def __init__(self, *args, **kwargs):
@@ -89,3 +89,77 @@ class MyChrome(webdriver.Chrome):
                 raise NoSuchElementException
         else:
             return super().find_elements(by=by, value=value)
+        
+
+class MyUCChrome(uc.Chrome):
+
+    def __init__(self, *args, **kwargs):
+        self.iframe_env = False  # 现在的环境是root还是iframe
+        super().__init__(*args, **kwargs)  # 调用父类的 __init__
+
+    def find_element(self, by=By.ID, value=None, iframe=False):
+        # 在这里改变查找元素的行为
+        if self.iframe_env:
+            super().switch_to.default_content()
+            self.iframe_env = False
+        if iframe:
+            # 获取所有的 iframe
+            try:
+                iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
+            except Exception as e:
+                print(e)
+            find_element = False
+            # 遍历所有的 iframe 并点击里面的元素
+            for iframe in iframes:
+                # 切换到 iframe
+                super().switch_to.default_content()
+                super().switch_to.frame(iframe)
+                self.iframe_env = True
+                try:
+                    # 在 iframe 中查找并点击元素
+                    # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
+                    element = super().find_element(by=by, value=value)
+                    find_element = True
+                except:
+                    print("No such element found in the iframe")
+                # 完成操作后切回主文档
+                # super().switch_to.default_content()
+                if find_element:
+                    return element
+            if not find_element:
+                raise NoSuchElementException
+        else:
+            return super().find_element(by=by, value=value)
+
+    def find_elements(self, by=By.ID, value=None, iframe=False):
+        # 在这里改变查找元素的行为
+        if self.iframe_env:
+            super().switch_to.default_content()
+            self.iframe_env = False
+        if iframe:
+            # 获取所有的 iframe
+            iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
+            find_element = False
+            # 遍历所有的 iframe 并点击里面的元素
+            for iframe in iframes:
+                # 切换到 iframe
+                try:
+                    super().switch_to.default_content()
+                    super().switch_to.frame(iframe)
+                    self.iframe_env = True
+                    # 在 iframe 中查找并点击元素
+                    # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
+                    elements = super().find_elements(by=by, value=value)
+                    if len(elements) > 0:
+                        find_element = True
+                    # 完成操作后切回主文档
+                    # super().switch_to.default_content()
+                    if find_element:
+                        return elements
+                except:
+                    print("No such element found in the iframe")
+            if not find_element:
+                raise NoSuchElementException
+        else:
+            return super().find_elements(by=by, value=value)
+

+ 32 - 5
ExecuteStage/utils.py

@@ -1,6 +1,7 @@
 # 控制流程的暂停和继续
 
 import csv
+import datetime
 import json
 import os
 import time
@@ -228,15 +229,41 @@ class myMySQL:
             print("The data table " + table_name + " already exists.")
         cursor.close()
 
-    def write_to_mysql(self, OUTPUT, record):
+    def write_to_mysql(self, OUTPUT, record, types):
         # 创建一个游标对象
         cursor = self.conn.cursor()
 
-        for row in OUTPUT:
+        for line in OUTPUT:
+            for i in range(len(line)):
+                if types[i] == "int" or types[i] == "bigInt":
+                    try:
+                        line[i] = int(line[i])
+                    except:
+                        line[i] = 0
+                elif types[i] == "double":
+                    try:
+                        line[i] = float(line[i])
+                    except:
+                        line[i] = 0.0
+                elif types[i] == "datetime":
+                    try:
+                        line[i] = datetime.datetime.strptime(line[i], '%Y-%m-%d %H:%M:%S')
+                    except:
+                        line[i] = datetime.datetime.strptime("1970-01-01 00:00:00", '%Y-%m-%d %H:%M:%S')
+                elif types[i] == "date":
+                    try:
+                        line[i] = datetime.datetime.strptime(line[i], '%Y-%m-%d')
+                    except:
+                        line[i] = datetime.datetime.strptime("1970-01-01", '%Y-%m-%d')
+                elif types[i] == "time":
+                    try:
+                        line[i] = datetime.datetime.strptime(line[i], '%H:%M:%S')
+                    except:
+                        line[i] = datetime.datetime.strptime("00:00:00", '%H:%M:%S')
             to_write = []
-            for i in range(len(row)):
+            for i in range(len(line)):
                 if record[i]:
-                    to_write.append(row[i])
+                    to_write.append(line[i])
             # 构造插入数据的 SQL 语句
             sql = f"INSERT INTO "+ self.table_name +" "+self.field_sql+" VALUES ("
             for item in to_write:
@@ -248,7 +275,7 @@ class myMySQL:
                 cursor.execute(sql, to_write)
             except Exception as e:
                 print("Error:", e)
-                # print("Error SQL:", sql)
+                print("Error SQL:", sql, to_write)
                 print("插入数据库错误,请查看以上的错误提示,然后检查数据的类型是否正确,是否文本过长(超过一万的文本类型要设置为大文本)。")
                 print("Inserting database error, please check the above error, and then check whether the data type is correct, whether the text is too long (text type over 10,000 should be set to large text).")
                 print("重新执行任务时,请删除数据库中的数据表" + self.table_name + ",然后再次运行程序。")

+ 17 - 4
Extension/manifest_v3/package.js

@@ -1,4 +1,4 @@
-import fs from 'fs';
+import fs from 'fs-extra';
 import path from 'path';
 import { fileURLToPath } from 'url';
 import readline from 'readline';
@@ -49,8 +49,7 @@ execSync(`npm run crx EasySpider_en`, (error, stdout, stderr) => {
     console.log(`stdout: ${stdout}`);
 });
 fs.copyFileSync(path.join(__dirname, './EasySpider_en.crx'), path.join(__dirname, '../../ElectronJS/EasySpider_en.crx'));
-
-
+copyFolderSync(path.join(__dirname, './EasySpider_en'), path.join(__dirname, '../../ElectronJS/EasySpider_en'));
 // 生成中文插件
 try{
     removeDir(path.join(__dirname, `EasySpider_zh`));
@@ -90,7 +89,7 @@ execSync(`npm run crx EasySpider_zh`, (error, stdout, stderr) => {
     console.log(`stdout: ${stdout}`);
 });
 fs.copyFileSync(path.join(__dirname, './EasySpider_zh.crx'), path.join(__dirname, '../../ElectronJS/EasySpider_zh.crx'));
-
+copyFolderSync(path.join(__dirname, './EasySpider_zh'), path.join(__dirname, '../../ElectronJS/EasySpider_zh'));
 
 function removeDir(dir) {
     let files = fs.readdirSync(dir)
@@ -108,3 +107,17 @@ function removeDir(dir) {
     fs.rmdirSync(dir)//如果文件夹是空的,就将自己删除掉
 }
 
+function copyFolderSync(source, target) {
+    try {
+        // 如果目标文件夹已存在,则先删除
+        if (fs.existsSync(target)) {
+            fs.removeSync(target);
+        }
+
+        // 复制文件夹
+        fs.copySync(source, target);
+        console.log('文件夹复制完成!');
+    } catch (err) {
+        console.error('复制文件夹时出错:', err);
+    }
+}

Kaikkia tiedostoja ei voida näyttää, sillä liian monta tiedostoa muuttui tässä diffissä