C#获取页面的HTML
C#获取页面的HTMLC#提取HTML代码的方法:
public class HTMLHelper { 私有字段 /// <summary> /// 获取HTML /// </summary> /// <param name="url">地址</param> /// <param name="postData">post 提交的字符串</param> /// <param name="isPost">是否是post</param> /// <param name="cookieContainer">CookieContainer</param> public static string GetHtml(string url, string postData, bool isPost, CookieContainer cookieContainer) { if (string.IsNullOrEmpty(postData)) return GetHtml(url, cookieContainer); Thread.Sleep(NetworkDelay); currentTry++; HttpWebRequest httpWebRequest = null; HttpWebResponse httpWebResponse = null; try { byte[] byteRequest = Encoding.Default.GetBytes(postData); httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(url); httpWebRequest.CookieContainer = cookieContainer; httpWebRequest.ContentType = contentType; httpWebRequest.ServicePoint.ConnectionLimit = maxTry; httpWebRequest.Referer = url; httpWebRequest.Accept = accept; httpWebRequest.UserAgent = userAgent; httpWebRequest.Method = isPost ? "POST" : "GET"; httpWebRequest.ContentLength = byteRequest.Length; Stream stream = httpWebRequest.GetRequestStream(); stream.Write(byteRequest, 0, byteRequest.Length); stream.Close(); httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); Stream responseStream = httpWebResponse.GetResponseStream(); StreamReader streamReader = new StreamReader(responseStream, encoding); string html = streamReader.ReadToEnd(); streamReader.Close(); responseStream.Close(); currentTry = 0; httpWebRequest.Abort(); httpWebResponse.Close(); return html; } catch (Exception e) { if (currentTry <= maxTry) GetHtml(url, postData, isPost, cookieContainer); currentTry--; if (httpWebRequest != null) httpWebRequest.Abort(); if (httpWebResponse != null) httpWebResponse.Close(); return string.Empty; } } /// <summary> /// 获取HTML /// </summary> /// <param name="url">地址</param> /// <param name="cookieContainer">CookieContainer</param> public static string GetHtml(string url, CookieContainer cookieContainer) { Thread.Sleep(NetworkDelay); currentTry++; HttpWebRequest httpWebRequest = null; HttpWebResponse httpWebResponse = null; try { httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(url); httpWebRequest.CookieContainer = cookieContainer; httpWebRequest.ContentType = contentType; httpWebRequest.ServicePoint.ConnectionLimit = maxTry; httpWebRequest.Referer = url; httpWebRequest.Accept = accept; httpWebRequest.UserAgent = userAgent; httpWebRequest.Method = "GET"; httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); Stream responseStream = httpWebResponse.GetResponseStream(); StreamReader streamReader = new StreamReader(responseStream, encoding); string html = streamReader.ReadToEnd(); streamReader.Close(); responseStream.Close(); currentTry--; httpWebRequest.Abort(); httpWebResponse.Close(); return html; } catch (Exception e) { if (currentTry <= maxTry) GetHtml(url, cookieContainer); currentTry--; if (httpWebRequest != null) httpWebRequest.Abort(); if (httpWebResponse != null) httpWebResponse.Close(); return string.Empty; } } }