| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926 | 
							- using System;
 
- using System.IO;
 
- using System.IO.Compression;
 
- using System.Net;
 
- using System.Text;
 
- using System.Text.RegularExpressions;
 
- using System.Threading;
 
- using System.Web;
 
- using Masuit.Tools.Win32;
 
- namespace Masuit.Tools.Html
 
- {
 
-     /// <summary>
 
-     ///1、获取HTML<br/>
 
-     ///1.1获取指定页面的HTML代码 GetHtml(string url, string postData, bool isPost, CookieContainer cookieContainer)<br/>
 
-     ///1.2获取HTMLGetHtml(string url, CookieContainer cookieContainer)<br/>
 
-     ///2、获取字符流<br/>
 
-     ///2.1获取字符流GetStream(string url, CookieContainer cookieContainer)<br/>
 
-     ///3、清除HTML标记 <br/>
 
-     ///3.1清除HTML标记  NoHTML(string Htmlstring)<br/>
 
-     ///4、匹配页面的链接 <br/>
 
-     ///4.1获取页面的链接正则 GetHref(string HtmlCode)<br/>
 
-     ///5、匹配页面的图片地址<br/>
 
-     /// 5.1匹配页面的图片地址 GetImgSrc(string HtmlCode, string imgHttp)<br/>
 
-     ///5.2匹配<img src="" />中的图片路径实际链接  GetImg(string ImgString, string imgHttp)<br/>
 
-     ///6、抓取远程页面内容<br/>
 
-     /// 6.1以GET方式抓取远程页面内容 Get_Http(string tUrl)<br/>
 
-     /// 6.2以POST方式抓取远程页面内容 Post_Http(string url, string postData, string encodeType)<br/>
 
-     ///7、压缩HTML输出<br/>
 
-     ///7.1压缩HTML输出 ZipHtml(string Html)<br/>
 
-     ///8、过滤HTML标签<br/>
 
-     /// 8.1过滤指定HTML标签 DelHtml(string s_TextStr, string html_Str)  <br/>
 
-     /// 8.2过滤HTML中的不安全标签 RemoveUnsafeHtml(string content)<br/>
 
-     /// HTML转行成TEXT HtmlToTxt(string strHtml)<br/>
 
-     /// 字符串转换为 HtmlStringToHtml(string str)<br/>
 
-     /// html转换成字符串HtmlToString(string strHtml)<br/>
 
-     /// 获取URL编码<br/>
 
-     /// 判断URL是否有效<br/>
 
-     /// 返回 HTML 字符串的编码解码结果
 
-     /// </summary>
 
-     public static partial class HtmlTools
 
-     {
 
-         #region 私有字段
 
-         private static CookieContainer cc = new CookieContainer();
 
-         private static string contentType = "application/x-www-form-urlencoded";
 
-         private static string accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg," +
 
-                                        " application/x-shockwave-flash, application/x-silverlight, " +
 
-                                        "application/vnd.ms-excel, application/vnd.ms-powerpoint, " +
 
-                                        "application/msword, application/x-ms-application," +
 
-                                        " application/x-ms-xbap," +
 
-                                        " application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*";
 
-         private static string userAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;" +
 
-                                           " .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
 
-         private static int delay = 1000;
 
-         private static int currentTry = 0;
 
-         #endregion
 
-         #region 公有属性
 
-         /// <summary> 
 
-         /// Cookie容器
 
-         /// </summary> 
 
-         public static CookieContainer CookieContainer
 
-         {
 
-             get { return cc; }
 
-         }
 
-         /// <summary> 
 
-         /// 获取网页源码时使用的编码
 
-         /// </summary> 
 
-         public static Encoding Encoding { get; set; } = Encoding.GetEncoding("utf-8");
 
-         /// <summary>
 
-         /// 网络延迟
 
-         /// </summary>
 
-         public static int NetworkDelay
 
-         {
 
-             get
 
-             {
 
-                 Random r = new Random();
 
-                 return r.Next(delay, delay * 2);
 
-                 // return (r.Next(delay / 1000, delay / 1000 * 2)) * 1000;
 
-             }
 
-             set { delay = value; }
 
-         }
 
-         /// <summary>
 
-         /// 最大尝试次数
 
-         /// </summary>
 
-         public static int MaxTry { get; set; } = 300;
 
-         #endregion
 
-         #region 1、获取HTML
 
-         /// <summary>
 
-         /// 去除html标签后并截取字符串
 
-         /// </summary>
 
-         /// <param name="html">源html</param>
 
-         /// <param name="length">截取长度</param>
 
-         /// <returns></returns>
 
-         public static string RemoveHtmlTag(this string html, int length = 0)
 
-         {
 
-             string strText = Regex.Replace(html, "<[^>]+>", "");
 
-             strText = Regex.Replace(strText, "&[^;]+;", "");
 
-             if (length > 0 && strText.Length > length)
 
-             {
 
-                 return strText.Substring(0, length);
 
-             }
 
-             return strText;
 
-         }
 
-         /// <summary>
 
-         /// 获取指定页面的HTML代码
 
-         /// </summary>
 
-         /// <param name="_"></param>
 
-         /// <param name="url">指定页面的路径</param>
 
-         /// <param name="postData">post 提交的字符串</param>
 
-         /// <param name="isPost">是否以post方式发送请求</param>
 
-         /// <param name="cookieContainer">Cookie集合</param>
 
-         public static string GetHtml(this HttpWebRequest _, string url, string postData, bool isPost, CookieContainer cookieContainer)
 
-         {
 
-             if (string.IsNullOrEmpty(postData))
 
-             {
 
-                 return GetHtml(null, url, cookieContainer);
 
-             }
 
-             Thread.Sleep(NetworkDelay);
 
-             currentTry++;
 
-             HttpWebRequest httpWebRequest = null;
 
-             HttpWebResponse httpWebResponse = null;
 
-             try
 
-             {
 
-                 byte[] byteRequest = Encoding.Default.GetBytes(postData);
 
-                 httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
 
-                 httpWebRequest.CookieContainer = cookieContainer;
 
-                 httpWebRequest.ContentType = contentType;
 
-                 httpWebRequest.ServicePoint.ConnectionLimit = MaxTry;
 
-                 httpWebRequest.Referer = url;
 
-                 httpWebRequest.Accept = accept;
 
-                 httpWebRequest.UserAgent = userAgent;
 
-                 httpWebRequest.Method = isPost ? "POST" : "GET";
 
-                 httpWebRequest.ContentLength = byteRequest.Length;
 
-                 httpWebRequest.AllowAutoRedirect = false;
 
-                 Stream stream = httpWebRequest.GetRequestStream();
 
-                 stream.Write(byteRequest, 0, byteRequest.Length);
 
-                 stream.Close();
 
-                 try
 
-                 {
 
-                     httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
 
-                     //redirectURL = httpWebResponse.Headers["Location"];// Get redirected uri
 
-                 }
 
-                 catch (WebException ex)
 
-                 {
 
-                     httpWebResponse = (HttpWebResponse)ex.Response;
 
-                 }
 
-                 //httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
 
-                 Stream responseStream = httpWebResponse.GetResponseStream();
 
-                 StreamReader streamReader = new StreamReader(responseStream, Encoding);
 
-                 string html = streamReader.ReadToEnd();
 
-                 streamReader.Close();
 
-                 responseStream.Close();
 
-                 currentTry = 0;
 
-                 httpWebRequest.Abort();
 
-                 httpWebResponse.Close();
 
-                 return html;
 
-             }
 
-             catch (Exception)
 
-             {
 
-                 if (currentTry <= MaxTry)
 
-                 {
 
-                     GetHtml(null, url, postData, isPost, cookieContainer);
 
-                 }
 
-                 currentTry--;
 
-                 if (httpWebRequest != null) httpWebRequest.Abort();
 
-                 if (httpWebResponse != null) httpWebResponse.Close();
 
-                 return string.Empty;
 
-             }
 
-         }
 
-         /// <summary>
 
-         /// 获取HTML
 
-         /// </summary>
 
-         /// <param name="_"></param>
 
-         /// <param name="url">地址</param>
 
-         /// <param name="cookieContainer">Cookie集合</param>
 
-         public static string GetHtml(this HttpWebRequest _, string url, CookieContainer cookieContainer)
 
-         {
 
-             Thread.Sleep(NetworkDelay);
 
-             currentTry++;
 
-             HttpWebRequest httpWebRequest = null;
 
-             HttpWebResponse httpWebResponse = null;
 
-             try
 
-             {
 
-                 httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
 
-                 httpWebRequest.CookieContainer = cookieContainer;
 
-                 httpWebRequest.ContentType = contentType;
 
-                 httpWebRequest.ServicePoint.ConnectionLimit = MaxTry;
 
-                 httpWebRequest.Referer = url;
 
-                 httpWebRequest.Accept = accept;
 
-                 httpWebRequest.UserAgent = userAgent;
 
-                 httpWebRequest.Method = "GET";
 
-                 httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
 
-                 Stream responseStream = httpWebResponse.GetResponseStream();
 
-                 StreamReader streamReader = new StreamReader(responseStream, Encoding);
 
-                 string html = streamReader.ReadToEnd();
 
-                 streamReader.Close();
 
-                 responseStream.Close();
 
-                 currentTry--;
 
-                 httpWebRequest.Abort();
 
-                 httpWebResponse.Close();
 
-                 return html;
 
-             }
 
-             catch (Exception)
 
-             {
 
-                 if (currentTry <= MaxTry) GetHtml(null, url, cookieContainer);
 
-                 currentTry--;
 
-                 if (httpWebRequest != null) httpWebRequest.Abort();
 
-                 if (httpWebResponse != null) httpWebResponse.Close();
 
-                 return string.Empty;
 
-             }
 
-         }
 
-         #endregion
 
-         #region 2、获取字符流
 
-         ///  <summary>
 
-         ///  2.1获取字符流
 
-         ///  </summary>
 
-         /// ---------------------------------------------------------------------------------------------------------------
 
-         ///  示例:
 
-         ///  System.Net.CookieContainer cookie = new System.Net.CookieContainer(); 
 
-         ///  Stream s = HttpHelper.GetStream("http://www.baidu.com", cookie);
 
-         ///  picVerify.Image = Image.FromStream(s);
 
-         /// ---------------------------------------------------------------------------------------------------------------
 
-         /// <param name="_"></param>
 
-         /// <param name="url">地址</param>
 
-         ///  <param name="cookieContainer">cookieContainer</param>
 
-         public static Stream GetStream(this HttpWebRequest _, string url, CookieContainer cookieContainer)
 
-         {
 
-             currentTry++;
 
-             HttpWebRequest httpWebRequest = null;
 
-             HttpWebResponse httpWebResponse = null;
 
-             try
 
-             {
 
-                 httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
 
-                 httpWebRequest.CookieContainer = cookieContainer;
 
-                 httpWebRequest.ContentType = contentType;
 
-                 httpWebRequest.ServicePoint.ConnectionLimit = MaxTry;
 
-                 httpWebRequest.Referer = url;
 
-                 httpWebRequest.Accept = accept;
 
-                 httpWebRequest.UserAgent = userAgent;
 
-                 httpWebRequest.Method = "GET";
 
-                 httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
 
-                 Stream responseStream = httpWebResponse.GetResponseStream();
 
-                 currentTry--;
 
-                 return responseStream;
 
-             }
 
-             catch (Exception)
 
-             {
 
-                 if (currentTry <= MaxTry)
 
-                 {
 
-                     GetHtml(null, url, cookieContainer);
 
-                 }
 
-                 currentTry--;
 
-                 if (httpWebRequest != null)
 
-                 {
 
-                     httpWebRequest.Abort();
 
-                 }
 
-                 if (httpWebResponse != null)
 
-                 {
 
-                     httpWebResponse.Close();
 
-                 }
 
-                 return null;
 
-             }
 
-         }
 
-         #endregion
 
-         #region 3、清除HTML标记
 
-         /// <summary>
 
-         /// 清理Word文档转html后的冗余标签属性
 
-         /// </summary>
 
-         /// <param name="html"></param>
 
-         /// <returns></returns>
 
-         public static string ClearHtml(this string html)
 
-         {
 
-             string s = Regex.Match(Regex.Replace(html, @"background-color:#?\w{3,7}|font-family:'?[\w|\(|\)]*'?;?", string.Empty), @"<body[^>]*>([\s\S]*)<\/body>").Groups[1].Value.Replace(" ", string.Empty);
 
-             s = Regex.Replace(s, @"\w+-?\w+:0\w+;?", string.Empty);//去除多余的零值属性
 
-             s = Regex.Replace(s, "alt=\"(.+?)\"", string.Empty);//除去alt属性
 
-             s = Regex.Replace(s, @"-aw.+?\s", string.Empty);//去除Word产生的-aw属性
 
-             return s;
 
-         }
 
-         ///<summary>   
 
-         ///3.1清除HTML标记   
 
-         ///</summary>   
 
-         ///<param name="htmlstring">包括HTML的源码</param>   
 
-         ///<returns>已经去除后的文字</returns>   
 
-         public static string RemoveHtml(this string htmlstring)
 
-         {
 
-             //删除脚本   
 
-             htmlstring = Regex.Replace(htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
 
-             //删除HTML   
 
-             Regex regex = new Regex("<.+?>", RegexOptions.IgnoreCase);
 
-             htmlstring = regex.Replace(htmlstring, "");
 
-             htmlstring = Regex.Replace(htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
 
-             htmlstring = Regex.Replace(htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
 
-             htmlstring = Regex.Replace(htmlstring, @"-->", "", RegexOptions.IgnoreCase);
 
-             htmlstring = Regex.Replace(htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
 
-             htmlstring = Regex.Replace(htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
 
-             htmlstring = Regex.Replace(htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
 
-             htmlstring = Regex.Replace(htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
 
-             htmlstring = Regex.Replace(htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
 
-             htmlstring = Regex.Replace(htmlstring, @"&(nbsp|#160);", "   ", RegexOptions.IgnoreCase);
 
-             htmlstring = Regex.Replace(htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
 
-             htmlstring = Regex.Replace(htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
 
-             htmlstring = Regex.Replace(htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
 
-             htmlstring = Regex.Replace(htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
 
-             htmlstring = Regex.Replace(htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
 
-             htmlstring.Replace("<", "");
 
-             htmlstring.Replace(">", "");
 
-             htmlstring.Replace("\r\n", "");
 
-             return htmlstring;
 
-         }
 
-         #endregion
 
-         #region 4、匹配页面的链接
 
-         #region 4.1获取页面的链接正则
 
-         /// <summary>
 
-         /// 4.1获取页面的链接正则
 
-         /// </summary>
 
-         /// <param name="HtmlCode">html代码</param>
 
-         public static string GetHref(this string HtmlCode)
 
-         {
 
-             string MatchVale = "";
 
-             string Reg = @"(h|H)(r|R)(e|E)(f|F) *= *('|"")?((\w|\\|\/|\.|:|-|_)+)[\S]*";
 
-             foreach (Match m in Regex.Matches(HtmlCode, Reg))
 
-             {
 
-                 MatchVale += (m.Value).ToLower().Replace("href=", "").Trim() + "|";
 
-             }
 
-             return MatchVale;
 
-         }
 
-         #endregion
 
-         #region  4.2取得所有链接URL
 
-         /// <summary>
 
-         /// 4.2取得所有链接URL
 
-         /// </summary>
 
-         /// <param name="html">html代码</param>
 
-         /// <returns>提取到的url</returns>
 
-         public static string GetAllUrl(this string html)
 
-         {
 
-             StringBuilder sb = new StringBuilder();
 
-             Match m = Regex.Match(html.ToLower(), "<a href=(.*?)>.*?</a>");
 
-             while (m.Success)
 
-             {
 
-                 sb.AppendLine(m.Result("$1"));
 
-                 m.NextMatch();
 
-             }
 
-             return sb.ToString();
 
-         }
 
-         #endregion
 
-         #region 4.3获取所有连接文本
 
-         /// <summary>
 
-         /// 4.3获取所有连接文本
 
-         /// </summary>
 
-         /// <param name="html">html代码</param>
 
-         /// <returns>所有的带链接的a标签</returns>
 
-         public static string GetAllLinkText(this string html)
 
-         {
 
-             StringBuilder sb = new StringBuilder();
 
-             Match m = Regex.Match(html.ToLower(), "<a href=.*?>(1,100})</a>");
 
-             while (m.Success)
 
-             {
 
-                 sb.AppendLine(m.Result("$1"));
 
-                 m.NextMatch();
 
-             }
 
-             return sb.ToString();
 
-         }
 
-         #endregion
 
-         #endregion
 
-         #region  5、匹配页面的图片地址
 
-         /// <summary>
 
-         /// 替换html的img路径为绝对路径
 
-         /// </summary>
 
-         /// <param name="html"></param>
 
-         /// <param name="imgDest"></param>
 
-         /// <returns></returns>
 
-         public static string ReplaceHtmlImgSource(this string html, string imgDest) => html.Replace("<img src=\"", "<img src=\"" + imgDest + "/");
 
-         /// <summary>
 
-         /// 匹配页面的图片地址
 
-         /// </summary>
 
-         /// <param name="htmlCode">html代码</param>
 
-         /// <param name="imgHttp">要补充的http://路径信息</param>
 
-         public static string GetImgSrc(this string htmlCode, string imgHttp)
 
-         {
 
-             string matchVale = "";
 
-             string Reg = @"<img.+?>";
 
-             foreach (Match m in Regex.Matches(htmlCode.ToLower(), Reg))
 
-             {
 
-                 matchVale += GetImg((m.Value).ToLower().Trim(), imgHttp) + "|";
 
-             }
 
-             return matchVale;
 
-         }
 
-         /// <summary>
 
-         /// 将src的绝对路径换成相对路径
 
-         /// </summary>
 
-         /// <param name="s"></param>
 
-         /// <returns></returns>
 
-         public static string ConvertImgSrcToRelativePath(this string s)
 
-         {
 
-             return Regex.Replace(s, @"<img src=""(http:\/\/.+?)/", @"<img src=""/");
 
-         }
 
-         /// <summary>
 
-         /// 匹配html的所有img标签集合
 
-         /// </summary>
 
-         /// <param name="html"></param>
 
-         /// <returns></returns>
 
-         public static MatchCollection MatchImgTags(this string html) => Regex.Matches(html, @"<img[\s]+src[\s]*=[\s]*((['""](?<src>[^'""]*)[\'""])|(?<src>[^\s]*))");
 
-         /// <summary>
 
-         /// 匹配html的一个img标签
 
-         /// </summary>
 
-         /// <param name="html"></param>
 
-         /// <returns></returns>
 
-         public static Match MatchImgTag(this string html) => Regex.Match(html, @"<img[\s]+src[\s]*=[\s]*((['""](?<src>[^'""]*)[\'""])|(?<src>[^\s]*))");
 
-         /// <summary>
 
-         /// 获取html中第一个img标签的src
 
-         /// </summary>
 
-         /// <param name="html"></param>
 
-         /// <returns></returns>
 
-         public static string MatchFirstImgSrc(this string html)
 
-         {
 
-             return Regex.Match(html, @"<img\s+[^>]*\s*src\s*=\s*['""]?(\S+\.\w{3,4})['""]?[^>]*>").Groups[1].Value;
 
-         }
 
-         /// <summary>
 
-         /// 随机获取html代码中的img标签的src属性
 
-         /// </summary>
 
-         /// <param name="html"></param>
 
-         /// <returns></returns>
 
-         public static string MatchRandomImgSrc(this string html)
 
-         {
 
-             MatchCollection collection = Regex.Matches(html, @"<img\s+[^>]*\s*src\s*=\s*['""]?(\S+\.\w{3,4})['""]?[^>]*>");
 
-             if (collection.Count > 0)
 
-             {
 
-                 return collection[new Random().StrictNext(collection.Count)].Groups[1].Value;
 
-             }
 
-             return String.Empty;
 
-         }
 
-         /// <summary>
 
-         /// 匹配<img src="" />中的图片路径实际链接
 
-         /// </summary>
 
-         /// <param name="imgString"><img src="" />字符串</param>
 
-         /// <param name="imgHttp">图片路径</param>
 
-         public static string GetImg(this string imgString, string imgHttp)
 
-         {
 
-             string matchVale = "";
 
-             string Reg = @"src=.+\.(bmp|jpg|gif|png|)";
 
-             foreach (Match m in Regex.Matches(imgString.ToLower(), Reg))
 
-             {
 
-                 matchVale += (m.Value).ToLower().Trim().Replace("src=", "");
 
-             }
 
-             if (matchVale.IndexOf(".net") != -1 || matchVale.IndexOf(".com") != -1 || matchVale.IndexOf(".org") != -1 || matchVale.IndexOf(".cn") != -1 || matchVale.IndexOf(".cc") != -1 || matchVale.IndexOf(".info") != -1 || matchVale.IndexOf(".biz") != -1 || matchVale.IndexOf(".tv") != -1)
 
-             {
 
-                 return matchVale;
 
-             }
 
-             return imgHttp + matchVale;
 
-         }
 
-         #endregion
 
-         #region 6、抓取远程页面内容
 
-         /// <summary>
 
-         /// 6.1以GET方式抓取远程页面内容
 
-         /// </summary>
 
-         /// <param name="_"></param>
 
-         /// <param name="tUrl">URL</param>
 
-         public static string Get_Http(this HttpWebRequest _, string tUrl)
 
-         {
 
-             string strResult;
 
-             try
 
-             {
 
-                 var hwr = (HttpWebRequest)WebRequest.Create(tUrl);
 
-                 hwr.Timeout = 19600;
 
-                 var hwrs = (HttpWebResponse)hwr.GetResponse();
 
-                 Stream myStream = hwrs.GetResponseStream();
 
-                 var sr = new StreamReader(myStream, Encoding.Default);
 
-                 var sb = new StringBuilder();
 
-                 while (-1 != sr.Peek())
 
-                 {
 
-                     sb.Append(sr.ReadLine() + "\r\n");
 
-                 }
 
-                 strResult = sb.ToString();
 
-                 hwrs.Close();
 
-             }
 
-             catch (Exception ee)
 
-             {
 
-                 strResult = ee.Message;
 
-             }
 
-             return strResult;
 
-         }
 
-         /// <summary>
 
-         /// 6.2以POST方式抓取远程页面内容
 
-         /// </summary>
 
-         /// <param name="_"></param>
 
-         /// <param name="url">URL</param>
 
-         /// <param name="postData">参数列表</param>
 
-         /// <param name="encodeType">编码类型</param>
 
-         public static string Post_Http(this HttpWebRequest _, string url, string postData, string encodeType)
 
-         {
 
-             string strResult;
 
-             try
 
-             {
 
-                 Encoding encoding = Encoding.GetEncoding(encodeType);
 
-                 byte[] POST = encoding.GetBytes(postData);
 
-                 HttpWebRequest myRequest = (HttpWebRequest)WebRequest.Create(url);
 
-                 myRequest.Method = "POST";
 
-                 myRequest.ContentType = "application/x-www-form-urlencoded";
 
-                 myRequest.ContentLength = POST.Length;
 
-                 Stream newStream = myRequest.GetRequestStream();
 
-                 newStream.Write(POST, 0, POST.Length); //设置POST
 
-                 newStream.Close();
 
-                 HttpWebResponse myResponse = (HttpWebResponse)myRequest.GetResponse();
 
-                 StreamReader reader = new StreamReader(myResponse.GetResponseStream(), Encoding.Default);
 
-                 strResult = reader.ReadToEnd();
 
-             }
 
-             catch (Exception ex)
 
-             {
 
-                 strResult = ex.Message;
 
-             }
 
-             return strResult;
 
-         }
 
-         #endregion
 
-         #region 7、压缩HTML输出
 
-         /// <summary>
 
-         /// 7.1压缩HTML输出
 
-         /// </summary>
 
-         /// <param name="html">html</param>
 
-         public static string ZipHtml(this string html)
 
-         {
 
-             html = Regex.Replace(html, @">\s+?<", "><");//去除HTML中的空白字符
 
-             html = Regex.Replace(html, @"\r\n\s*", "");
 
-             html = Regex.Replace(html, @"<body([\s|\S]*?)>([\s|\S]*?)</body>", @"<body$1>$2</body>", RegexOptions.IgnoreCase);
 
-             return html;
 
-         }
 
-         #endregion
 
-         #region 8、过滤HTML标签
 
-         #region 8.1过滤指定HTML标签
 
-         /// <summary>
 
-         /// 8.1过滤指定HTML标签
 
-         /// </summary>
 
-         /// <param name="sTextStr">要过滤的字符</param>
 
-         /// <param name="htmlStr">a img p div</param>
 
-         public static string DelHtml(this string sTextStr, string htmlStr)
 
-         {
 
-             string rStr = "";
 
-             if (!string.IsNullOrEmpty(sTextStr))
 
-             {
 
-                 rStr = Regex.Replace(sTextStr, "<" + htmlStr + "[^>]*>", "", RegexOptions.IgnoreCase);
 
-                 rStr = Regex.Replace(rStr, "</" + htmlStr + ">", "", RegexOptions.IgnoreCase);
 
-             }
 
-             return rStr;
 
-         }
 
-         #endregion
 
-         #region 8.2过滤HTML中的不安全标签
 
-         /// <summary>
 
-         /// 8.2过滤HTML中的不安全标签,去掉尖括号
 
-         /// </summary>
 
-         /// <param name="content">html代码</param>
 
-         /// <returns>过滤后的安全内容</returns>
 
-         public static string RemoveUnsafeHtml(this string content)
 
-         {
 
-             content = Regex.Replace(content, @"(\<|\s+)o([a-z]+\s?=)", "$1$2", RegexOptions.IgnoreCase);
 
-             content = Regex.Replace(content, @"(script|frame|form|meta|behavior|style)([\s|:|>])+", "$1.$2", RegexOptions.IgnoreCase);
 
-             return content;
 
-         }
 
-         #endregion
 
-         #endregion
 
-         #region 转换HTML操作
 
-         #region HTML转行成TEXT
 
-         /// <summary>
 
-         /// HTML转行成TEXT HtmlToTxt(string strHtml)
 
-         /// </summary>
 
-         /// <param name="strHtml">html代码</param>
 
-         /// <returns>普通文本</returns>
 
-         public static string HtmlToTxt(this string strHtml)
 
-         {
 
-             string[] aryReg ={
 
-             @"<script[^>]*?>.*?</script>",
 
-             @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
 
-             @"([\r\n])[\s]+",
 
-             @"&(quot|#34);",
 
-             @"&(amp|#38);",
 
-             @"&(lt|#60);",
 
-             @"&(gt|#62);",
 
-             @"&(nbsp|#160);",
 
-             @"&(iexcl|#161);",
 
-             @"&(cent|#162);",
 
-             @"&(pound|#163);",
 
-             @"&(copy|#169);",
 
-             @"&#(\d+);",
 
-             @"-->",
 
-             @"<!--.*\n"
 
-             };
 
-             string strOutput = strHtml;
 
-             for (int i = 0; i < aryReg.Length; i++)
 
-             {
 
-                 Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);
 
-                 strOutput = regex.Replace(strOutput, string.Empty);
 
-             }
 
-             strOutput.Replace("<", "");
 
-             strOutput.Replace(">", "");
 
-             strOutput.Replace("\r\n", "");
 
-             return strOutput;
 
-         }
 
-         #endregion
 
-         #region 字符串转换为 Html
 
-         /// <summary>
 
-         /// 字符串转换为 HtmlStringToHtml(string str)
 
-         /// </summary>
 
-         /// <param name="str">字符串</param>
 
-         /// <returns>html标签</returns>
 
-         public static string StringToHtml(this string str)
 
-         {
 
-             str = str.Replace("&", "&");
 
-             str = str.Replace(" ", " ");
 
-             str = str.Replace("'", "''");
 
-             str = str.Replace("\"", """);
 
-             str = str.Replace("<", "<");
 
-             str = str.Replace(">", ">");
 
-             str = str.Replace("\n", "<br />");
 
-             str = str.Replace("\r", "<br />");
 
-             str = str.Replace("\r\n", "<br />");
 
-             return str;
 
-         }
 
-         #endregion
 
-         #region Html转换成字符串
 
-         /// <summary>
 
-         /// html转换成字符串
 
-         /// </summary>
 
-         /// <param name="strHtml">html代码</param>
 
-         /// <returns>安全的字符串</returns>
 
-         public static string HtmlToString(this string strHtml)
 
-         {
 
-             strHtml = strHtml.Replace("<br>", "\r\n");
 
-             strHtml = strHtml.Replace(@"<br />", "\r\n");
 
-             strHtml = strHtml.Replace(@"<br/>", "\r\n");
 
-             strHtml = strHtml.Replace(">", ">");
 
-             strHtml = strHtml.Replace("<", "<");
 
-             strHtml = strHtml.Replace(" ", " ");
 
-             strHtml = strHtml.Replace(""", "\"");
 
-             strHtml = Regex.Replace(strHtml, @"<\/?[^>]+>", "", RegexOptions.IgnoreCase);
 
-             return strHtml;
 
-         }
 
-         #endregion
 
-         #endregion
 
-         #region 获取URL编码
 
-         /// <summary>
 
-         /// 获取URL编码
 
-         /// </summary>
 
-         /// <param name="_"></param>
 
-         /// <param name="url">URL</param>
 
-         /// <returns>编码类型</returns>
 
-         public static string GetEncoding(this HttpWebRequest _, string url)
 
-         {
 
-             HttpWebRequest request = null;
 
-             HttpWebResponse response = null;
 
-             StreamReader reader = null;
 
-             try
 
-             {
 
-                 request = (HttpWebRequest)WebRequest.Create(url);
 
-                 request.Timeout = 20000;
 
-                 request.AllowAutoRedirect = false;
 
-                 response = (HttpWebResponse)request.GetResponse();
 
-                 if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
 
-                 {
 
-                     if (response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
 
-                     {
 
-                         reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
 
-                     }
 
-                     else
 
-                     {
 
-                         reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII);
 
-                     }
 
-                     string html = reader.ReadToEnd();
 
-                     Regex regCharset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)");
 
-                     if (regCharset.IsMatch(html))
 
-                     {
 
-                         return regCharset.Match(html).Groups["charset"].Value;
 
-                     }
 
-                     else if (response.CharacterSet != string.Empty)
 
-                     {
 
-                         return response.CharacterSet;
 
-                     }
 
-                     else
 
-                     {
 
-                         return Encoding.Default.BodyName;
 
-                     }
 
-                 }
 
-             }
 
-             finally
 
-             {
 
-                 if (response != null)
 
-                 {
 
-                     response.Close();
 
-                     response = null;
 
-                 }
 
-                 if (reader != null)
 
-                     reader.Close();
 
-                 if (request != null)
 
-                     request = null;
 
-             }
 
-             return Encoding.Default.BodyName;
 
-         }
 
-         #endregion
 
-         #region 判断URL是否有效
 
-         /// <summary>
 
-         /// 判断URL是否有效
 
-         /// </summary>
 
-         /// <param name="_"></param>
 
-         /// <param name="url">待判断的URL,可以是网页以及图片链接等</param>
 
-         /// <returns>200为正确,其余为大致网页错误代码</returns>
 
-         public static int GetUrlError(this HttpWebRequest _, string url)
 
-         {
 
-             int num = 200;
 
-             try
 
-             {
 
-                 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(new Uri(url));
 
-                 ServicePointManager.Expect100Continue = false;
 
-                 ((HttpWebResponse)request.GetResponse()).Close();
 
-             }
 
-             catch (WebException exception)
 
-             {
 
-                 if (exception.Status != WebExceptionStatus.ProtocolError)
 
-                 {
 
-                     return num;
 
-                 }
 
-                 if (exception.Message.IndexOf("500 ") > 0)
 
-                 {
 
-                     return 500;
 
-                 }
 
-                 if (exception.Message.IndexOf("401 ") > 0)
 
-                 {
 
-                     return 401;
 
-                 }
 
-                 if (exception.Message.IndexOf("404") > 0)
 
-                 {
 
-                     num = 404;
 
-                 }
 
-             }
 
-             catch
 
-             {
 
-                 num = 401;
 
-             }
 
-             return num;
 
-         }
 
-         #endregion
 
-         #region 返回 HTML 字符串的编码解码结果
 
-         /// <summary>
 
-         /// 返回 HTML 字符串的编码结果
 
-         /// </summary>
 
-         /// <param name="inputData">字符串</param>
 
-         /// <returns>编码结果</returns>
 
-         public static string HtmlEncode(string inputData)
 
-         {
 
-             return HttpUtility.HtmlEncode(inputData);
 
-         }
 
-         /// <summary>
 
-         /// 返回 HTML 字符串的解码结果
 
-         /// </summary>
 
-         /// <param name="str">字符串</param>
 
-         /// <returns>解码结果</returns>
 
-         public static string HtmlDecode(string str)
 
-         {
 
-             return HttpUtility.HtmlDecode(str);
 
-         }
 
-         #endregion
 
-         /// <summary>
 
-         /// 获取Cookie集合
 
-         /// </summary>
 
-         /// <param name="cookie"></param>
 
-         /// <param name="cookieString">Cookie的键</param>
 
-         /// <returns>Cookie键值集合</returns>
 
-         public static CookieCollection GetCookieCollection(this CookieCollection cookie, string cookieString)
 
-         {
 
-             //string cookieString = "SID=ARRGy4M1QVBtTU-ymi8bL6X8mVkctYbSbyDgdH8inu48rh_7FFxHE6MKYwqBFAJqlplUxq7hnBK5eqoh3E54jqk=;Domain=.google.com;Path=/,LSID=AaMBTixN1MqutGovVSOejyb8mVkctYbSbyDgdH8inu48rh_7FFxHE6MKYwqBFAJqlhCe_QqxLg00W5OZejb_UeQ=;Domain=www.google.com;Path=/accounts";
 
-             Regex re = new Regex("([^;,]+)=([^;,]+);Domain=([^;,]+);Path=([^;,]+)", RegexOptions.IgnoreCase);
 
-             foreach (Match m in re.Matches(cookieString))
 
-             {
 
-                 //name,   value,   path,   domain   
 
-                 Cookie c = new Cookie(m.Groups[1].Value, m.Groups[2].Value, m.Groups[3].Value, m.Groups[3].Value);
 
-                 cookie.Add(c);
 
-             }
 
-             return cookie;
 
-         }
 
-         #region 从HTML中获取文本,保留br,p,img
 
-         /// <summary>
 
-         /// 从HTML中获取文本,保留br,p,img
 
-         /// </summary>
 
-         /// <param name="HTML">html代码</param>
 
-         /// <returns>保留br,p,img的文本</returns>
 
-         public static string GetTextFromHTML(this string HTML)
 
-         {
 
-             Regex regEx = new Regex(@"</?(?!br|/?p|img)[^>]*>", RegexOptions.IgnoreCase);
 
-             return regEx.Replace(HTML, "");
 
-         }
 
-         #endregion
 
-         #region 获取HTML页面内制定Key的Value内容
 
-         /// <summary>
 
-         /// 获取HTML页面内制定Key的Value内容
 
-         /// </summary>
 
-         /// <param name="html">html源代码</param>
 
-         /// <param name="key">键</param>
 
-         /// <returns>获取到的值</returns>
 
-         public static string GetHiddenKeyValue(this string html, string key)
 
-         {
 
-             string result = "";
 
-             string sRegex = string.Format("<input\\s*type=\"hidden\".*?name=\"{0}\".*?\\s*value=[\"|'](?<value>.*?)[\"|'^/]", key);
 
-             Regex re = new Regex(sRegex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
 
-             Match mc = re.Match(html);
 
-             if (mc.Success)
 
-             {
 
-                 result = mc.Groups[1].Value;
 
-             }
 
-             return result;
 
-         }
 
-         #endregion
 
-         /// <summary>
 
-         /// 替换回车换行符为html换行符
 
-         /// </summary>
 
-         /// <param name="str">html</param>
 
-         public static string StrFormat(this string str)
 
-         {
 
-             string str2;
 
-             if (str == null)
 
-             {
 
-                 str2 = "";
 
-             }
 
-             else
 
-             {
 
-                 str = str.Replace("\r\n", "<br />");
 
-                 str = str.Replace("\n", "<br />");
 
-                 str2 = str;
 
-             }
 
-             return str2;
 
-         }
 
-         /// <summary>
 
-         /// 替换html字符
 
-         /// </summary>
 
-         /// <param name="strHtml">html</param>
 
-         public static string EncodeHtml(this string strHtml)
 
-         {
 
-             if (strHtml != "")
 
-             {
 
-                 strHtml = strHtml.Replace(",", "&def");
 
-                 strHtml = strHtml.Replace("'", "&dot");
 
-                 strHtml = strHtml.Replace(";", "&dec");
 
-                 return strHtml;
 
-             }
 
-             return "";
 
-         }
 
-         /// <summary>
 
-         /// 为脚本替换特殊字符串
 
-         /// </summary>
 
-         /// <param name="str"> </param>
 
-         /// <returns> </returns>
 
-         [Obsolete("不建议使用", true)]
 
-         public static string ReplaceStrToScript(string str)
 
-         {
 
-             str = str.Replace("\\", "\\\\");
 
-             str = str.Replace("'", "\\'");
 
-             str = str.Replace("\"", "\\\"");
 
-             return str;
 
-         }
 
-     }
 
- }
 
 
  |