网页地址" /// 返回网页源文件 public static string GetHtmlSource2(string url">
 | 注册
请输入搜索内容

热门搜索

Java Linux MySQL PHP JavaScript Hibernate jQuery Nginx
mb78
10年前发布

C#抓取网页的几种方法

/// <summary>               /// 用HttpWebRequest取得网页源码            /// 对于带BOM的网页很有效,不管是什么编码都能正确识别            /// </summary>            /// <param name="url">网页地址" </param>             /// <returns>返回网页源文件</returns>            public static string GetHtmlSource2(string  url)          {              //处理内容                string html = "";              HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);              request.Accept = "*/*"; //接受任意文件              request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)"; // 模拟使用IE在浏览 http://www.52mvc.com              request.AllowAutoRedirect = true;//是否允许302              //request.CookieContainer = new CookieContainer();//cookie容器,              request.Referer = url; //当前页面的引用                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();              Stream stream = response.GetResponseStream();              StreamReader reader = new StreamReader(stream, Encoding.Default);              html = reader.ReadToEnd();              stream.Close();                    return html;          }