网页采集程序(超级简单版)
01 protected void btn_click(object sender, EventArgs e)
02 {
% ]8 X( ~) w( Y, e03 //方法一:
: E2 r. ]( o# o7 v04 //System.Net.WebClient wc = new System.Net.WebClient();
$ W$ @/ |' `. ?- ~7 P1 F4 C# v9 X0 P+ l05 //byte[] b = wc.DownloadData("http://www.baidu.com");
$ \* ^! f g" J6 ~3 W7 ~3 F" b06 //string html = System.Text.Encoding.GetEncoding("gb2312").GetString(b);
1 x' P T8 Y- l! [07 //html = html.Substring(html.IndexOf("<p id=\"lg\">") + "<p id=\"lg\">".Length);
3 z. r# E/ g( S08 //html = html.Substring(0, html.IndexOf("</p>"));
+ ~: |4 Z( `( S P# U09 //Response.Write(html);
& h- x+ Y4 h+ t( F$ a) \( n8 @5 p10
6 \% b1 ^- g. b8 j11 //方法二:
( H" O2 p% m; P* u1 ?6 z12 //获取整个网页
* t+ V! I& v V13 System.Net.WebClient wc = new System.Net.WebClient();
" \ Q, U# o1 ?# H14 System.IO.Stream sm = wc.OpenRead("http://www.baidu.com");
1 A B7 W' `: R/ n- E1 X/ C; J5 b15 System.IO.StreamReader sr = new System.IO.StreamReader(sm, System.Text.Encoding.Default, true, 256000);
7 |+ e# o+ s; |( m. b& q! [. w# R16 string html = sr.ReadToEnd();
9 a+ M) W6 x# G2 }2 f9 @6 f17 sr.Close();
7 O1 k4 `) o! c b+ L1 S18 //根据规则获取想要的内容
+ |- \9 N, [( R5 P* H. s19 html = html.Substring(html.IndexOf("<p id=\"lg\">") + "<p id=\"lg\">".Length);
0 K3 p& J( L, p$ ^4 a0 D20 html = html.Substring(0, html.IndexOf("</p>"));
+ H$ B2 r V5 J1 z4 h% V4 s8 }21 Response.Write(html);
2 T; o2 K0 [/ P i1 F5 ~/ l22 }
http://hereson.javaeye.com/blog/207468
| 欢迎光临 老秘网_材夜思范文 (http://www.laomiw.com/) | Powered by Discuz! X3.4 |