|  | @@ -6,8 +6,8 @@ import platform
 | 
	
		
			
				|  |  |  import shutil
 | 
	
		
			
				|  |  |  import string
 | 
	
		
			
				|  |  |  import undetected_chromedriver as uc
 | 
	
		
			
				|  |  | -from utils import detect_optimizable, download_image, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, \
 | 
	
		
			
				|  |  | -    on_press_creator, on_release_creator, readCode, replace_field_values, send_email, write_to_csv, write_to_excel, write_to_json
 | 
	
		
			
				|  |  | +from utils import detect_optimizable, download_image, extract_text_from_html, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, \
 | 
	
		
			
				|  |  | +    on_press_creator, on_release_creator, readCode, replace_field_values, send_email, split_text_by_lines, write_to_csv, write_to_excel, write_to_json
 | 
	
		
			
				|  |  |  from myChrome import MyChrome
 | 
	
		
			
				|  |  |  from threading import Thread, Event
 | 
	
		
			
				|  |  |  from PIL import Image
 | 
	
	
		
			
				|  | @@ -295,9 +295,13 @@ class BrowserThread(Thread):
 | 
	
		
			
				|  |  |                      except:
 | 
	
		
			
				|  |  |                          pass
 | 
	
		
			
				|  |  |                      try:
 | 
	
		
			
				|  |  | -                        node["parameters"]["recordASField"] += param["recordASField"]
 | 
	
		
			
				|  |  | +                        node["parameters"]["recordASField"] = param["recordASField"]
 | 
	
		
			
				|  |  |                      except:
 | 
	
		
			
				|  |  | -                        node["parameters"]["recordASField"] += 1
 | 
	
		
			
				|  |  | +                        node["parameters"]["recordASField"] = 1
 | 
	
		
			
				|  |  | +                    try:
 | 
	
		
			
				|  |  | +                        splitLine = int(param["splitLine"])
 | 
	
		
			
				|  |  | +                    except:
 | 
	
		
			
				|  |  | +                        param["splitLine"] = 0
 | 
	
		
			
				|  |  |                      if param["contentType"] == 8:
 | 
	
		
			
				|  |  |                          self.print_and_log(
 | 
	
		
			
				|  |  |                              "默认的ddddocr识别功能如果觉得不好用,可以自行修改源码get_content函数->contentType == 8的位置换成自己想要的OCR模型然后自己编译运行;或者可以先设置采集内容类型为“元素截图”把图片保存下来,然后用自定义操作调用自己写的程序,程序的功能是读取这个最新生成的图片,然后用好用的模型,如PaddleOCR把图片识别出来,然后把返回值返回给程序作为参数输出。")
 | 
	
	
		
			
				|  | @@ -1754,7 +1758,11 @@ class BrowserThread(Thread):
 | 
	
		
			
				|  |  |                      download_image(self, content, "Data/Task_" +
 | 
	
		
			
				|  |  |                                     str(self.id) + "/" + self.saveName + "/", element)
 | 
	
		
			
				|  |  |              else:  # 普通节点
 | 
	
		
			
				|  |  | -                content = element.text
 | 
	
		
			
				|  |  | +                if p["splitLine"] == 1:
 | 
	
		
			
				|  |  | +                    text = extract_text_from_html(element.get_attribute('outerHTML'))
 | 
	
		
			
				|  |  | +                    content = split_text_by_lines(text)
 | 
	
		
			
				|  |  | +                else:
 | 
	
		
			
				|  |  | +                    content = element.text
 | 
	
		
			
				|  |  |          elif p["contentType"] == 1:  # 只采集当期元素下的文本,不包括子元素
 | 
	
		
			
				|  |  |              if p["nodeType"] == 2:
 | 
	
		
			
				|  |  |                  if element.get_attribute("href") != None:
 |