先上软件效果图
代码如下
1.根据Url地址得到网页的html源码1 public static string GetWebContent(string Url) 2 { 3 string strResult = ""; 4 try 5 { 6 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url); 7 //声明一个HttpWebRequest请求 8 request.Timeout = 30000; 9 //设置连接超时时间10 request.Headers.Set("Pragma", "no-cache");11 HttpWebResponse response = (HttpWebResponse)request.GetResponse();12 Stream streamReceive = response.GetResponseStream();13 Encoding encoding = Encoding.GetEncoding("GB2312");14 StreamReader streamReader = new StreamReader(streamReceive, Encoding.UTF8);15 strResult = streamReader.ReadToEnd();16 }17 catch18 {19 MessageBox.Show("出错");20 }21 return strResult;22 }
2.根据元素判断是否使用了指定名字的样式,此方法有不妥之处,请大师指点
之所以重写是因为li.GetAttribute("class")获取不到例如,<a class=style1>这种没有引号的格式,所以就自己写了一个,检索不是很规范,bool ContaintClass(HtmlElement li, string ClassName) { string Html = li.OuterHtml; string str = Html.Substring(1, Html.IndexOf('>')-1); if (str.Contains(ClassName)) return true; else return false; }
3.使用
private void btn_GetList_Click(object sender, EventArgs e) { DataTable dt = new DataTable(); dt.Columns.Add("OrderID"); dt.Columns.Add("MusicName"); dt.Columns.Add("Singer"); string DataURL = (string)txt_DataURL.SelectedValue; if (String.IsNullOrEmpty(DataURL)) { MessageBox.Show("数据来源不能为空!", "警告", MessageBoxButtons.OK, MessageBoxIcon.Warning); } string StrWebContent = GetHTMLContent.GetWebContent(DataURL); int ListStart = StrWebContent.IndexOf(""); int ULStart = StrWebContent.IndexOf("", ULStart); string strweb = StrWebContent.Substring(ULStart, ULEnd); //生成HtmlDocument WebBrowser webb = new WebBrowser(); webb.Navigate("about:blank"); HtmlDocument htmldoc = webb.Document.OpenNew(true); htmldoc.Write(strweb); HtmlElementCollection htmlli = htmldoc.GetElementsByTagName("li"); string ClassName_OrderID = "index-num"; string ClassName_SontTitle = "song-title"; string ClassName_Singer = "singer"; string[] ClassNames = new string[] { ClassName_OrderID, ClassName_SontTitle, ClassName_Singer }; foreach (HtmlElement li in htmlli) { HtmlElementCollection htmlSpan = li.GetElementsByTagName("span"); string[] musicInfo = new string[3]; int i = 0; int InfoIndex = 0; while (i >= 0 && i < htmlSpan.Count) { if (ContaintClass(htmlSpan[i], ClassNames[InfoIndex])) { musicInfo[InfoIndex] = htmlSpan[i].InnerText; InfoIndex++; if (InfoIndex > 2) break; } i++; } DataRow dr = dt.NewRow(); dr["OrderID"] = musicInfo[0]; dr["MusicName"] = musicInfo[1]; dr["Singer"] = musicInfo[2]; dt.Rows.Add(dr); } dataGridView1.DataSource = dt; }
代码都很简单