|
@@ -335,7 +335,10 @@ class BrowserThread(Thread):
|
|
|
node["parameters"]["quickExtractable"] = False # 是否可以快速提取
|
|
|
# 如果(不)固定元素列表循环中只有一个提取数据操作,且提取数据操作的提取内容为元素截图,那么可以快速提取
|
|
|
if len(node["sequence"]) == 1 and self.procedure[node["sequence"][0]]["option"] == 3 and (int(node["parameters"]["loopType"]) == 1 or int(node["parameters"]["loopType"]) == 2):
|
|
|
- params = self.procedure[node["sequence"][0]]["parameters"]["params"]
|
|
|
+ try:
|
|
|
+ params = self.procedure[node["sequence"][0]]["parameters"]["params"]
|
|
|
+ except:
|
|
|
+ params = self.procedure[node["sequence"][0]]["parameters"]["paras"] # 兼容0.5.0及以下版本的EasySpider
|
|
|
try:
|
|
|
waitElement = self.procedure[node["sequence"][0]]["parameters"]["waitElement"]
|
|
|
except:
|
|
@@ -1031,7 +1034,8 @@ class BrowserThread(Thread):
|
|
|
ti = 0
|
|
|
# print("CURRENT URL:", self.browser.current_url)
|
|
|
# time.sleep(2)
|
|
|
- if self.browser.current_url.startswith("data:") or self.browser.current_url.startswith("chrome:"):
|
|
|
+ # if self.browser.current_url.startswith("data:") or self.browser.current_url.startswith("chrome:"):
|
|
|
+ if self.browser.current_url != thisHistoryURL and self.history["index"] != thisHistoryLength and self.history["handle"] == self.browser.current_window_handle:
|
|
|
while self.browser.current_url != thisHistoryURL: # 如果执行完一次循环之后网址发生了变化
|
|
|
try:
|
|
|
self.browser.execute_script("history.go(1)") # 如果是data:开头的网址,就前进一步
|
|
@@ -1045,8 +1049,8 @@ class BrowserThread(Thread):
|
|
|
element = self.browser.find_elements(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
|
|
|
else: # 固定元素列表
|
|
|
element = self.browser.find_element(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
|
|
|
- if index > 0:
|
|
|
- index -= 1 # 如果是data:开头的网址,就要重试一次
|
|
|
+ # if index > 0:
|
|
|
+ # index -= 1 # 如果是data:开头的网址,就要重试一次
|
|
|
else:
|
|
|
if element == None:
|
|
|
element = elements
|
|
@@ -1199,8 +1203,16 @@ class BrowserThread(Thread):
|
|
|
self.print_and_log("找不到循环元素: ", xpath)
|
|
|
index = 0
|
|
|
while index < len(elements):
|
|
|
+ try:
|
|
|
+ element = elements[index]
|
|
|
+ element_text = element.text
|
|
|
+ except StaleElementReferenceException: # 如果元素已经失效,重试
|
|
|
+ self.print_and_log("元素已失效,重新获取元素|Element has expired, reacquiring element")
|
|
|
+ elements = self.browser.find_elements(By.XPATH,
|
|
|
+ xpath, iframe=node["parameters"]["iframe"])
|
|
|
+ element = elements[index]
|
|
|
for i in node["sequence"]: # 挨个顺序执行循环里所有的操作
|
|
|
- self.executeNode(i, elements[index],
|
|
|
+ self.executeNode(i, element,
|
|
|
xpath, index)
|
|
|
if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行
|
|
|
self.CONTINUE = False
|