|
@@ -89,7 +89,6 @@ class BrowserThread(Thread):
|
|
filter(isnull, service["links"].split("\n"))) # 要执行的link的列表
|
|
filter(isnull, service["links"].split("\n"))) # 要执行的link的列表
|
|
self.OUTPUT = [] # 采集的数据
|
|
self.OUTPUT = [] # 采集的数据
|
|
self.containJudge = service["containJudge"] # 是否含有判断语句
|
|
self.containJudge = service["containJudge"] # 是否含有判断语句
|
|
- self.bodyText = "" # 记录bodyText
|
|
|
|
tOut = service["outputParameters"] # 生成输出参数对象
|
|
tOut = service["outputParameters"] # 生成输出参数对象
|
|
self.outputParameters = {}
|
|
self.outputParameters = {}
|
|
self.dataNotFoundKeys = {} # 记录没有找到数据的key
|
|
self.dataNotFoundKeys = {} # 记录没有找到数据的key
|
|
@@ -123,10 +122,6 @@ class BrowserThread(Thread):
|
|
except:
|
|
except:
|
|
para["iframe"] = False
|
|
para["iframe"] = False
|
|
if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1 and para["nodeType"] <= 2:
|
|
if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1 and para["nodeType"] <= 2:
|
|
- # # iframe中提取数据的绝对寻址操作不可优化
|
|
|
|
- # if para["relative"] == False and para["iframe"] == True:
|
|
|
|
- # para["optimizable"] = False
|
|
|
|
- # else:
|
|
|
|
para["optimizable"] = True
|
|
para["optimizable"] = True
|
|
else:
|
|
else:
|
|
para["optimizable"] = False
|
|
para["optimizable"] = False
|
|
@@ -206,7 +201,7 @@ class BrowserThread(Thread):
|
|
if rt != "":
|
|
if rt != "":
|
|
rt.end()
|
|
rt.end()
|
|
|
|
|
|
- def execute_code(self, codeMode, code, max_wait_time, element=None):
|
|
|
|
|
|
+ def execute_code(self, codeMode, code, max_wait_time, element=None, iframe=False):
|
|
output = ""
|
|
output = ""
|
|
if code == "":
|
|
if code == "":
|
|
return ""
|
|
return ""
|
|
@@ -221,6 +216,24 @@ class BrowserThread(Thread):
|
|
except:
|
|
except:
|
|
replaced_text = code
|
|
replaced_text = code
|
|
code = replaced_text
|
|
code = replaced_text
|
|
|
|
+ if iframe and self.browser.iframe_env == False:
|
|
|
|
+ # 获取所有的 iframe
|
|
|
|
+ self.browser.switch_to.default_content()
|
|
|
|
+ iframes = self.browser.find_elements(
|
|
|
|
+ By.CSS_SELECTOR, "iframe", iframe=False)
|
|
|
|
+ # 遍历所有的 iframe 并点击里面的元素
|
|
|
|
+ for iframe in iframes:
|
|
|
|
+ # 切换到 iframe
|
|
|
|
+ try:
|
|
|
|
+ self.browser.switch_to.default_content()
|
|
|
|
+ self.browser.switch_to.frame(iframe)
|
|
|
|
+ self.browser.iframe_env = True
|
|
|
|
+ break
|
|
|
|
+ except:
|
|
|
|
+ print("Iframe switch failed")
|
|
|
|
+ elif not iframe and self.browser.iframe_env == True:
|
|
|
|
+ self.browser.switch_to.default_content()
|
|
|
|
+ self.browser.iframe_env = False
|
|
if int(codeMode) == 0:
|
|
if int(codeMode) == 0:
|
|
self.recordLog("Execute JavaScript:" + code)
|
|
self.recordLog("Execute JavaScript:" + code)
|
|
self.recordLog("执行JavaScript:" + code)
|
|
self.recordLog("执行JavaScript:" + code)
|
|
@@ -271,12 +284,13 @@ class BrowserThread(Thread):
|
|
By.XPATH, loopPath, iframe=paras["iframe"])
|
|
By.XPATH, loopPath, iframe=paras["iframe"])
|
|
element = elements[index]
|
|
element = elements[index]
|
|
output = self.execute_code(
|
|
output = self.execute_code(
|
|
- codeMode, code, max_wait_time, element)
|
|
|
|
|
|
+ codeMode, code, max_wait_time, element, iframe=paras["iframe"])
|
|
except:
|
|
except:
|
|
output = ""
|
|
output = ""
|
|
print("JavaScript execution failed")
|
|
print("JavaScript execution failed")
|
|
else:
|
|
else:
|
|
- output = self.execute_code(codeMode, code, max_wait_time)
|
|
|
|
|
|
+ output = self.execute_code(
|
|
|
|
+ codeMode, code, max_wait_time, iframe=paras["iframe"])
|
|
recordASField = int(paras["recordASField"])
|
|
recordASField = int(paras["recordASField"])
|
|
if recordASField:
|
|
if recordASField:
|
|
self.outputParameters[node["title"]] = output
|
|
self.outputParameters[node["title"]] = output
|
|
@@ -321,9 +335,12 @@ class BrowserThread(Thread):
|
|
time.sleep(0.1) # 移动之前等待0.1秒
|
|
time.sleep(0.1) # 移动之前等待0.1秒
|
|
if para["useLoop"]: # 使用循环的情况下,传入的clickPath就是实际的xpath
|
|
if para["useLoop"]: # 使用循环的情况下,传入的clickPath就是实际的xpath
|
|
path = loopPath
|
|
path = loopPath
|
|
|
|
+ # element = loopElement
|
|
else:
|
|
else:
|
|
index = 0
|
|
index = 0
|
|
path = para["xpath"] # 不然使用元素定义的xpath
|
|
path = para["xpath"] # 不然使用元素定义的xpath
|
|
|
|
+ # element = self.browser.find_element(
|
|
|
|
+ # By.XPATH, path, iframe=para["iframe"])
|
|
try:
|
|
try:
|
|
elements = self.browser.find_elements(
|
|
elements = self.browser.find_elements(
|
|
By.XPATH, path, iframe=para["iframe"])
|
|
By.XPATH, path, iframe=para["iframe"])
|
|
@@ -405,7 +422,9 @@ class BrowserThread(Thread):
|
|
break
|
|
break
|
|
elif tType == 1: # 当前页面包含文本
|
|
elif tType == 1: # 当前页面包含文本
|
|
try:
|
|
try:
|
|
- if self.bodyText.find(cnode["parameters"]["value"]) >= 0:
|
|
|
|
|
|
+ bodyText = self.browser.find_element(
|
|
|
|
+ By.CSS_SELECTOR, "body", iframe=cnode["parameters"]["iframe"]).text
|
|
|
|
+ if bodyText.find(cnode["parameters"]["value"]) >= 0:
|
|
executeBranchId = i
|
|
executeBranchId = i
|
|
break
|
|
break
|
|
except: # 找不到元素下一个条件
|
|
except: # 找不到元素下一个条件
|
|
@@ -426,7 +445,7 @@ class BrowserThread(Thread):
|
|
continue
|
|
continue
|
|
elif tType == 4: # 当前循环元素包括元素
|
|
elif tType == 4: # 当前循环元素包括元素
|
|
try:
|
|
try:
|
|
- if loopElement.find_element(By.XPATH, cnode["parameters"]["value"][1:], iframe=cnode["parameters"]["iframe"]):
|
|
|
|
|
|
+ if loopElement.find_element(By.XPATH, cnode["parameters"]["value"][1:]):
|
|
executeBranchId = i
|
|
executeBranchId = i
|
|
break
|
|
break
|
|
except: # 找不到元素或者xpath写错了,下一个条件
|
|
except: # 找不到元素或者xpath写错了,下一个条件
|
|
@@ -434,13 +453,13 @@ class BrowserThread(Thread):
|
|
elif tType <= 7: # JS命令返回值
|
|
elif tType <= 7: # JS命令返回值
|
|
if tType == 5: # JS命令返回值等于
|
|
if tType == 5: # JS命令返回值等于
|
|
output = self.execute_code(
|
|
output = self.execute_code(
|
|
- 0, cnode["parameters"]["code"], cnode["parameters"]["waitTime"])
|
|
|
|
|
|
+ 0, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], iframe=cnode["parameters"]["iframe"])
|
|
elif tType == 6: # System
|
|
elif tType == 6: # System
|
|
output = self.execute_code(
|
|
output = self.execute_code(
|
|
- 1, cnode["parameters"]["code"], cnode["parameters"]["waitTime"])
|
|
|
|
|
|
+ 1, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], iframe=cnode["parameters"]["iframe"])
|
|
elif tType == 7: # 针对当前循环项的JS命令返回值
|
|
elif tType == 7: # 针对当前循环项的JS命令返回值
|
|
output = self.execute_code(
|
|
output = self.execute_code(
|
|
- 2, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], loopElement)
|
|
|
|
|
|
+ 2, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], loopElement, iframe=cnode["parameters"]["iframe"])
|
|
try:
|
|
try:
|
|
if output.find("rue") != -1: # 如果返回值中包含true
|
|
if output.find("rue") != -1: # 如果返回值中包含true
|
|
code = 1
|
|
code = 1
|
|
@@ -512,7 +531,7 @@ class BrowserThread(Thread):
|
|
break
|
|
break
|
|
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
|
|
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
|
|
output = self.execute_code(int(
|
|
output = self.execute_code(int(
|
|
- node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"])
|
|
|
|
|
|
+ node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"])
|
|
code = get_output_code(output)
|
|
code = get_output_code(output)
|
|
if code <= 0:
|
|
if code <= 0:
|
|
break
|
|
break
|
|
@@ -547,13 +566,13 @@ class BrowserThread(Thread):
|
|
time.sleep(node["parameters"]["historyWait"])
|
|
time.sleep(node["parameters"]["historyWait"])
|
|
# else:
|
|
# else:
|
|
# time.sleep(2)
|
|
# time.sleep(2)
|
|
- # 切换历史记录等待2秒或者:
|
|
|
|
|
|
+ # 切换历史记录等待:
|
|
self.Log("Change history back time or:",
|
|
self.Log("Change history back time or:",
|
|
node["parameters"]["historyWait"])
|
|
node["parameters"]["historyWait"])
|
|
self.browser.execute_script('window.stop()')
|
|
self.browser.execute_script('window.stop()')
|
|
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
|
|
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
|
|
output = self.execute_code(int(
|
|
output = self.execute_code(int(
|
|
- node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"])
|
|
|
|
|
|
+ node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"])
|
|
code = get_output_code(output)
|
|
code = get_output_code(output)
|
|
if code <= 0:
|
|
if code <= 0:
|
|
break
|
|
break
|
|
@@ -600,7 +619,7 @@ class BrowserThread(Thread):
|
|
raise
|
|
raise
|
|
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
|
|
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
|
|
output = self.execute_code(int(
|
|
output = self.execute_code(int(
|
|
- node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"])
|
|
|
|
|
|
+ node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"])
|
|
code = get_output_code(output)
|
|
code = get_output_code(output)
|
|
if code <= 0:
|
|
if code <= 0:
|
|
break
|
|
break
|
|
@@ -612,7 +631,7 @@ class BrowserThread(Thread):
|
|
self.executeNode(i, text, "", 0)
|
|
self.executeNode(i, text, "", 0)
|
|
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
|
|
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
|
|
output = self.execute_code(int(
|
|
output = self.execute_code(int(
|
|
- node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"])
|
|
|
|
|
|
+ node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"])
|
|
code = get_output_code(output)
|
|
code = get_output_code(output)
|
|
if code <= 0:
|
|
if code <= 0:
|
|
break
|
|
break
|
|
@@ -630,7 +649,7 @@ class BrowserThread(Thread):
|
|
self.executeNode(i, url, "", 0)
|
|
self.executeNode(i, url, "", 0)
|
|
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
|
|
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
|
|
output = self.execute_code(int(
|
|
output = self.execute_code(int(
|
|
- node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"])
|
|
|
|
|
|
+ node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"])
|
|
code = get_output_code(output)
|
|
code = get_output_code(output)
|
|
if code <= 0:
|
|
if code <= 0:
|
|
break
|
|
break
|
|
@@ -638,10 +657,10 @@ class BrowserThread(Thread):
|
|
while True: # do while循环
|
|
while True: # do while循环
|
|
if int(node["parameters"]["loopType"]) == 5: # JS
|
|
if int(node["parameters"]["loopType"]) == 5: # JS
|
|
output = self.execute_code(
|
|
output = self.execute_code(
|
|
- 0, node["parameters"]["code"], node["parameters"]["waitTime"])
|
|
|
|
|
|
+ 0, node["parameters"]["code"], node["parameters"]["waitTime"], iframe=node["parameters"]["iframe"])
|
|
elif int(node["parameters"]["loopType"]) == 6: # System
|
|
elif int(node["parameters"]["loopType"]) == 6: # System
|
|
output = self.execute_code(
|
|
output = self.execute_code(
|
|
- 1, node["parameters"]["code"], node["parameters"]["waitTime"])
|
|
|
|
|
|
+ 1, node["parameters"]["code"], node["parameters"]["waitTime"], iframe=node["parameters"]["iframe"])
|
|
code = get_output_code(output)
|
|
code = get_output_code(output)
|
|
if code <= 0:
|
|
if code <= 0:
|
|
break
|
|
break
|
|
@@ -707,29 +726,8 @@ class BrowserThread(Thread):
|
|
except:
|
|
except:
|
|
self.history["index"] = 0
|
|
self.history["index"] = 0
|
|
self.scrollDown(para) # 控制屏幕向下滚动
|
|
self.scrollDown(para) # 控制屏幕向下滚动
|
|
- if self.containJudge:
|
|
|
|
- try:
|
|
|
|
- self.bodyText = self.browser.find_element(
|
|
|
|
- By.CSS_SELECTOR, "body", iframe=False).text
|
|
|
|
- self.Log('URL Page: ' + url)
|
|
|
|
- self.recordLog('URL Page: ' + url)
|
|
|
|
- except TimeoutException:
|
|
|
|
- self.Log(
|
|
|
|
- 'Time out after set seconds when getting body text: ' + url)
|
|
|
|
- self.recordLog(
|
|
|
|
- 'Time out after set seconds when getting body text:: ' + url)
|
|
|
|
- self.browser.execute_script('window.stop()')
|
|
|
|
- time.sleep(1)
|
|
|
|
- self.Log("Need to wait 1 second to get body text")
|
|
|
|
- # 再执行一遍
|
|
|
|
- self.bodyText = self.browser.find_element(
|
|
|
|
- By.CSS_SELECTOR, "body", iframe=False).text
|
|
|
|
- except Exception as e:
|
|
|
|
- self.Log(e)
|
|
|
|
- self.recordLog(str(e))
|
|
|
|
|
|
|
|
# 键盘输入事件
|
|
# 键盘输入事件
|
|
-
|
|
|
|
def inputInfo(self, para, loopValue):
|
|
def inputInfo(self, para, loopValue):
|
|
time.sleep(0.1) # 输入之前等待0.1秒
|
|
time.sleep(0.1) # 输入之前等待0.1秒
|
|
self.Log("Wait 0.1 second before input")
|
|
self.Log("Wait 0.1 second before input")
|
|
@@ -739,7 +737,7 @@ class BrowserThread(Thread):
|
|
# textbox.send_keys(Keys.CONTROL, 'a')
|
|
# textbox.send_keys(Keys.CONTROL, 'a')
|
|
# textbox.send_keys(Keys.BACKSPACE)
|
|
# textbox.send_keys(Keys.BACKSPACE)
|
|
self.execute_code(
|
|
self.execute_code(
|
|
- 2, para["beforeJS"], para["beforeJSWaitTime"], textbox) # 执行前置JS
|
|
|
|
|
|
+ 2, para["beforeJS"], para["beforeJSWaitTime"], textbox, iframe=para["iframe"]) # 执行前置JS
|
|
# Send the HOME key
|
|
# Send the HOME key
|
|
textbox.send_keys(Keys.HOME)
|
|
textbox.send_keys(Keys.HOME)
|
|
# Send the SHIFT + END key combination
|
|
# Send the SHIFT + END key combination
|
|
@@ -764,10 +762,7 @@ class BrowserThread(Thread):
|
|
if value.lower().find("<enter>") >= 0:
|
|
if value.lower().find("<enter>") >= 0:
|
|
textbox.send_keys(Keys.ENTER)
|
|
textbox.send_keys(Keys.ENTER)
|
|
self.execute_code(
|
|
self.execute_code(
|
|
- 2, para["afterJS"], para["afterJSWaitTime"], textbox) # 执行后置js
|
|
|
|
- # global bodyText # 每次执行点击,输入元素和打开网页操作后,需要更新bodyText
|
|
|
|
- self.bodyText = self.browser.find_element(
|
|
|
|
- By.CSS_SELECTOR, "body").text
|
|
|
|
|
|
+ 2, para["afterJS"], para["afterJSWaitTime"], textbox, iframe=para["iframe"]) # 执行后置js
|
|
except:
|
|
except:
|
|
print("Cannot find input box element:" +
|
|
print("Cannot find input box element:" +
|
|
para["xpath"] + ", please try to set the wait time before executing this operation")
|
|
para["xpath"] + ", please try to set the wait time before executing this operation")
|
|
@@ -780,10 +775,6 @@ class BrowserThread(Thread):
|
|
def clickElement(self, para, loopElement=None, clickPath="", index=0):
|
|
def clickElement(self, para, loopElement=None, clickPath="", index=0):
|
|
time.sleep(0.1) # 点击之前等待0.1秒
|
|
time.sleep(0.1) # 点击之前等待0.1秒
|
|
self.Log("Wait 0.1 second before clicking element")
|
|
self.Log("Wait 0.1 second before clicking element")
|
|
- if para["useLoop"]: # 使用循环的情况下,传入的clickPath就是实际的xpath
|
|
|
|
- path = clickPath
|
|
|
|
- else:
|
|
|
|
- path = para["xpath"] # 不然使用元素定义的xpath
|
|
|
|
try:
|
|
try:
|
|
maxWaitTime = int(para["maxWaitTime"])
|
|
maxWaitTime = int(para["maxWaitTime"])
|
|
except:
|
|
except:
|
|
@@ -792,11 +783,22 @@ class BrowserThread(Thread):
|
|
self.browser.set_script_timeout(maxWaitTime)
|
|
self.browser.set_script_timeout(maxWaitTime)
|
|
# 点击前对该元素执行一段JavaScript代码
|
|
# 点击前对该元素执行一段JavaScript代码
|
|
try:
|
|
try:
|
|
- element = self.browser.find_element(
|
|
|
|
|
|
+ # element = self.browser.find_element(
|
|
|
|
+ # By.XPATH, path, iframe=para["iframe"])
|
|
|
|
+ if para["useLoop"]: # 使用循环的情况下,传入的clickPath就是实际的xpath
|
|
|
|
+ path = clickPath
|
|
|
|
+ # element = loopElement
|
|
|
|
+ else:
|
|
|
|
+ index = 0
|
|
|
|
+ path = para["xpath"] # 不然使用元素定义的xpath
|
|
|
|
+ # element = self.browser.find_element(
|
|
|
|
+ # By.XPATH, path, iframe=para["iframe"])
|
|
|
|
+ elements = self.browser.find_elements(
|
|
By.XPATH, path, iframe=para["iframe"])
|
|
By.XPATH, path, iframe=para["iframe"])
|
|
|
|
+ element = elements[index]
|
|
if para["beforeJS"] != "":
|
|
if para["beforeJS"] != "":
|
|
self.execute_code(2, para["beforeJS"],
|
|
self.execute_code(2, para["beforeJS"],
|
|
- para["beforeJSWaitTime"], element)
|
|
|
|
|
|
+ para["beforeJSWaitTime"], element, iframe=para["iframe"])
|
|
except:
|
|
except:
|
|
print("Cannot find element:" +
|
|
print("Cannot find element:" +
|
|
path + ", please try to set the wait time before executing this operation")
|
|
path + ", please try to set the wait time before executing this operation")
|
|
@@ -809,7 +811,7 @@ class BrowserThread(Thread):
|
|
except:
|
|
except:
|
|
click_way = 0
|
|
click_way = 0
|
|
try:
|
|
try:
|
|
- if click_way == 0 or para["iframe"]: # 用selenium的点击方法
|
|
|
|
|
|
+ if click_way == 0: # 用selenium的点击方法
|
|
actions = ActionChains(self.browser) # 实例化一个action对象
|
|
actions = ActionChains(self.browser) # 实例化一个action对象
|
|
actions.click(element).perform()
|
|
actions.click(element).perform()
|
|
elif click_way == 1: # 用js的点击方法
|
|
elif click_way == 1: # 用js的点击方法
|
|
@@ -824,13 +826,13 @@ class BrowserThread(Thread):
|
|
except Exception as e:
|
|
except Exception as e:
|
|
self.Log(e)
|
|
self.Log(e)
|
|
self.recordLog(str(e))
|
|
self.recordLog(str(e))
|
|
- # 点击前对该元素执行一段JavaScript代码
|
|
|
|
|
|
+ # 点击后对该元素执行一段JavaScript代码
|
|
try:
|
|
try:
|
|
if para["afterJS"] != "":
|
|
if para["afterJS"] != "":
|
|
element = self.browser.find_element(
|
|
element = self.browser.find_element(
|
|
By.XPATH, path, iframe=para["iframe"])
|
|
By.XPATH, path, iframe=para["iframe"])
|
|
self.execute_code(2, para["afterJS"],
|
|
self.execute_code(2, para["afterJS"],
|
|
- para["afterJSWaitTime"], element)
|
|
|
|
|
|
+ para["afterJSWaitTime"], element, iframe=para["iframe"])
|
|
except:
|
|
except:
|
|
print("Cannot find element:" + path)
|
|
print("Cannot find element:" + path)
|
|
self.recordLog("Cannot find element:" +
|
|
self.recordLog("Cannot find element:" +
|
|
@@ -866,25 +868,6 @@ class BrowserThread(Thread):
|
|
"return history.length")
|
|
"return history.length")
|
|
# 如果打开了新窗口,切换到新窗口
|
|
# 如果打开了新窗口,切换到新窗口
|
|
self.scrollDown(para) # 根据参数配置向下滚动
|
|
self.scrollDown(para) # 根据参数配置向下滚动
|
|
- if self.containJudge: # 有判断语句才执行以下操作
|
|
|
|
- # global bodyText # 每次执行点击,输入元素和打开网页操作后,需要更新bodyText
|
|
|
|
- try:
|
|
|
|
- self.bodyText = self.browser.find_element(
|
|
|
|
- By.CSS_SELECTOR, "body").text
|
|
|
|
- except TimeoutException:
|
|
|
|
- self.Log('Time out after 10 seconds when getting body text')
|
|
|
|
- self.recordLog(
|
|
|
|
- 'Time out after 10 seconds when getting body text')
|
|
|
|
- self.browser.execute_script('window.stop()')
|
|
|
|
- time.sleep(1)
|
|
|
|
- self.Log("wait one second after get body text")
|
|
|
|
- # 再执行一遍
|
|
|
|
- self.bodyText = self.browser.find_element(
|
|
|
|
- By.CSS_SELECTOR, "body").text
|
|
|
|
- # rt.end()
|
|
|
|
- except Exception as e:
|
|
|
|
- self.Log(e)
|
|
|
|
- self.recordLog(str(e))
|
|
|
|
# rt.end()
|
|
# rt.end()
|
|
|
|
|
|
def get_content(self, p, element):
|
|
def get_content(self, p, element):
|
|
@@ -1000,7 +983,8 @@ class BrowserThread(Thread):
|
|
print(e)
|
|
print(e)
|
|
print("注意以上错误,要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://blog.csdn.net/u010454030/article/details/80515501\nhttps://www.bilibili.com/video/BV1xz4y1b72D/")
|
|
print("注意以上错误,要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://blog.csdn.net/u010454030/article/details/80515501\nhttps://www.bilibili.com/video/BV1xz4y1b72D/")
|
|
elif p["contentType"] == 9:
|
|
elif p["contentType"] == 9:
|
|
- content = self.execute_code(2, p["JS"], p["JSWaitTime"], element)
|
|
|
|
|
|
+ content = self.execute_code(
|
|
|
|
+ 2, p["JS"], p["JSWaitTime"], element, iframe=p["iframe"])
|
|
elif p["contentType"] == 10: # 下拉框选中的值
|
|
elif p["contentType"] == 10: # 下拉框选中的值
|
|
try:
|
|
try:
|
|
select_element = Select(element)
|
|
select_element = Select(element)
|
|
@@ -1020,9 +1004,19 @@ class BrowserThread(Thread):
|
|
|
|
|
|
def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
|
|
def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
|
|
pageHTML = etree.HTML(self.browser.page_source)
|
|
pageHTML = etree.HTML(self.browser.page_source)
|
|
- try:
|
|
|
|
- loopElementOuterHTML = loopElement.get_attribute('outerHTML')
|
|
|
|
- except:
|
|
|
|
|
|
+ if loopElement != "": # 只在数据在循环中提取时才需要获取循环元素
|
|
|
|
+ try:
|
|
|
|
+ loopElementOuterHTML = loopElement.get_attribute('outerHTML')
|
|
|
|
+ except:
|
|
|
|
+ try: # 循环点击每个链接如果没有新标签页打开,loopElement会丢失,此时需要重新获取
|
|
|
|
+ elements = self.browser.find_elements(
|
|
|
|
+ By.XPATH, parentPath, iframe=para["paras"][0]["iframe"])
|
|
|
|
+ loopElement = elements[index]
|
|
|
|
+ loopElementOuterHTML = loopElement.get_attribute(
|
|
|
|
+ 'outerHTML')
|
|
|
|
+ except:
|
|
|
|
+ loopElementOuterHTML = ""
|
|
|
|
+ else:
|
|
loopElementOuterHTML = ""
|
|
loopElementOuterHTML = ""
|
|
loopElementHTML = etree.HTML(loopElementOuterHTML)
|
|
loopElementHTML = etree.HTML(loopElementOuterHTML)
|
|
for p in para["paras"]:
|
|
for p in para["paras"]:
|
|
@@ -1053,7 +1047,7 @@ class BrowserThread(Thread):
|
|
content = loopElementHTML.xpath(
|
|
content = loopElementHTML.xpath(
|
|
"/html/body/" + loopElementHTML[0][0].tag + xpath)
|
|
"/html/body/" + loopElementHTML[0][0].tag + xpath)
|
|
else:
|
|
else:
|
|
- if xpath.find("/html/body") < 0:
|
|
|
|
|
|
+ if xpath.find("/body") < 0:
|
|
xpath = "/html/body" + xpath
|
|
xpath = "/html/body" + xpath
|
|
content = pageHTML.xpath(xpath)
|
|
content = pageHTML.xpath(xpath)
|
|
if len(content) > 0:
|
|
if len(content) > 0:
|
|
@@ -1145,7 +1139,7 @@ class BrowserThread(Thread):
|
|
By.XPATH, "//body", iframe=p["iframe"])
|
|
By.XPATH, "//body", iframe=p["iframe"])
|
|
try:
|
|
try:
|
|
self.execute_code(
|
|
self.execute_code(
|
|
- 2, p["beforeJS"], p["beforeJSWaitTime"], element) # 执行前置js
|
|
|
|
|
|
+ 2, p["beforeJS"], p["beforeJSWaitTime"], element, iframe=p["iframe"]) # 执行前置js
|
|
content = self.get_content(p, element)
|
|
content = self.get_content(p, element)
|
|
except StaleElementReferenceException: # 发生找不到元素的异常后,等待几秒重新查找
|
|
except StaleElementReferenceException: # 发生找不到元素的异常后,等待几秒重新查找
|
|
self.recordLog(
|
|
self.recordLog(
|
|
@@ -1174,7 +1168,7 @@ class BrowserThread(Thread):
|
|
continue # 再出现类似问题直接跳过
|
|
continue # 再出现类似问题直接跳过
|
|
self.outputParameters[p["name"]] = content
|
|
self.outputParameters[p["name"]] = content
|
|
self.execute_code(
|
|
self.execute_code(
|
|
- 2, p["afterJS"], p["afterJSWaitTime"], element) # 执行后置JS
|
|
|
|
|
|
+ 2, p["afterJS"], p["afterJSWaitTime"], element, iframe=p["iframe"]) # 执行后置JS
|
|
line = []
|
|
line = []
|
|
for value in self.outputParameters.values():
|
|
for value in self.outputParameters.values():
|
|
line.append(value)
|
|
line.append(value)
|