| 
					
				 | 
			
			
				@@ -1,6 +1,6 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-using HtmlAgilityPack; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-using System.Collections.Generic; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+using System.Collections.Generic; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 using System.Linq; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+using System.Text.RegularExpressions; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 namespace Masuit.LuceneEFCore.SearchEngine.Extensions 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 { 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -22,45 +22,15 @@ namespace Masuit.LuceneEFCore.SearchEngine.Extensions 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         /// <summary> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        /// 移除html标签 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        /// 去除html标签后并截取字符串 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         /// </summary> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        /// <param name="html"></param> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        /// <param name="html">源html</param> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         /// <returns></returns> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        public static string RemoveUnwantedTags(this string html) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        public static string RemoveHtmlTag(this string html) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if (string.IsNullOrEmpty(html)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                return string.Empty; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            var document = new HtmlDocument(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            document.LoadHtml(html); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            var nodes = new Queue<HtmlNode>(document.DocumentNode.SelectNodes("./*|./text()")); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            while (nodes.Count > 0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                var node = nodes.Dequeue(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                var parentNode = node.ParentNode; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                if (node.Name != "#text") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    var childNodes = node.SelectNodes("./*|./text()"); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    if (childNodes != null) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        foreach (var child in childNodes) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            nodes.Enqueue(child); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            parentNode.InsertBefore(child, node); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    parentNode.RemoveChild(node); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return document.DocumentNode.InnerHtml; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            string strText = Regex.Replace(html, "<[^>]+>", ""); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            strText = Regex.Replace(strText, "&[^;]+;", ""); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return strText; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 } 
			 |