Browse Source

相对URL转绝对

naibo 2 years ago
parent
commit
0b29cfc371

File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/2.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/3.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/4.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/5.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/execution_instances/6.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/tasks/108.json


File diff suppressed because it is too large
+ 0 - 0
.temp_to_pub/EasySpider_windows_x64/tasks/34.json


+ 1 - 1
ExecuteStage/.vscode/launch.json

@@ -12,7 +12,7 @@
             "justMyCode": true,
             //  "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
             // "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
-            "args": ["--id", "[3]", "--headless", "0", "--user_data", "1"]
+            "args": ["--id", "[6]", "--headless", "0", "--user_data", "1"]
         }
     ]
 }

+ 4 - 0
ExecuteStage/easyspider_executestage.py

@@ -14,6 +14,7 @@ import sys
 import time
 # import keyboard
 import requests
+from urllib.parse import urljoin
 from lxml import etree
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.common.keys import Keys
@@ -1203,6 +1204,9 @@ class BrowserThread(Thread):
                         # 拼接所有文本内容并去掉两边的空白
                         content = ' '.join(result.strip()
                                            for result in content if result.strip())
+                        if p["nodeType"] == 2:
+                            base_url = self.browser.current_url
+                            content = urljoin(base_url, content) # 合并链接相对路径为绝对路径
                     else:
                         content = p["default"]
                         if not self.dataNotFoundKeys[p["name"]]:

Some files were not shown because too many files changed in this diff