以下是代码片段:
using System; using System.Collections.Generic; using System.Text;
using System.Net; using System.Text.RegularExpressions; using System.Windows.Forms; using System.Data; using System.Data.OleDb; using System.Data.SqlClient;
using System.Threading;
namespace ZLWeb { class DD { public int ID = 0; public string Url = ""; public string State = "";
private WebClient client = new WebClient(); public int UrlID = 0;// 开始ID public int EndUrlID = 0; // 结束ID public int Step = 1; //步进 private string vUrl = ""; private string html;
static public FrmMain f;
public OleDbCommand Comm = new OleDbCommand(); //public SqlCommand Comm = new SqlCommand();
//private Regex rx = new Regex(@"guangxi(\d{4}).html", RegexOptions.Compiled | RegexOptions.IgnoreCase);
public void Run() { if (ID < 1 || Url == "") return;
Comm.Connection = f.Conn; //f.Conn.Close(); //f.Conn.Open(); /* if (!rx.IsMatch(Url)) return; Match m = rx.Match(Url); if (!m.Success) return; string str = m.Groups[1].Value; for (int k = 0; k < str.Length; k++) { if (!Char.IsDigit(str, k)) return; } UrlID = Int32.Parse(str);
str = "guangxi" + UrlID.ToString("0000") + ".html"; vUrl = rx.Replace(Url, str); * */ //if (!GetWeb()) return; UrlID -= Step; while (GetUrlID()) { try { WriteLog(ID.ToString("00") + " " + vUrl); GetWeb(); ParseHTML(); } catch (ThreadAbortException ex) { WriteLog("有人让我马上结束 " + ex.ToString()); } catch (Exception e) { WriteLog(e.ToString()); } } WriteLog("完成任务,退出线程"); }
private bool GetUrlID() { //UrlID++; UrlID += Step; vUrl = Url.Replace("{#}", UrlID.ToString("0000")); return (UrlID <= EndUrlID); /* while (ID.ToString()[0] != UrlID.ToString()[UrlID.ToString().Length - 1]) { UrlID++; if (UrlID > 38) return false; if (UrlID > 8922) return false; } vUrl = rx.Replace(Url, "guangxi" + UrlID.ToString("0000") + ".html"); vUrl = Url.Replace("{#}", UrlID.ToString("0000")); return true; */ }
private void GetWeb() { html = ""; try { html = client.DownloadString(vUrl); } catch { } }
static public void WriteLog(string log) { //FrmMain f = null; if (null == f) { foreach (Form frm in Application.OpenForms) { if (frm is FrmMain) { f = frm as FrmMain; break; } } } if (f != null) { //f.SetText(ID.ToString("00") + " " + log); f.SetText(log); } }
public void ParseHTML() { string str = ""; string[] s; Regex r = new Regex(@"<div align=left>([\w\W]*?)</div>"); MatchCollection ms = r.Matches(html); GSData g = new GSData(); foreach (Match m in ms) { //WriteLog(m.Groups[1].Value); try { str = m.Groups[1].Value; str = str.Replace("\t", "").Replace("\n", ""); s = str.Split(new string[] { "<br>" }, StringSplitOptions.None); g.gs = s[0].Substring("<b>·".Length, s[0].IndexOf("</b>") - "<b>·".Length).Trim(); g.fr = s[1].Substring("法人:".Length).Trim(); g.zy = s[2].Substring("主营:".Length).Trim(); g.tel = s[3].Substring("电话:".Length).Trim(); g.address = s[4].Substring("地址:".Length).Trim(); g.etype = s[5].Substring("经济类型:".Length).Trim(); g.v = s[6].Substring("生产产值:".Length).Trim(); g.num = s[7].Substring("人员数量:".Length).Trim(); g.y = s[8].Substring("开业年份:".Length).Trim(); g.urlid = UrlID; g.tid = ID;
//WriteLog(g.gs); ReadyToWrite(g); //WriteLog(g.gs); } catch { } } }
private void ReadyToWrite(GSData g) { //lock (this) { WriteToDB(g); } }
public void WriteToDB(GSData g) { string sql; lock (this) { try { sql = "Insert Into NL_GS(公司, 法人, 主营, 电话, 地址, 经济类型, 生产产值, 人员数量, 开业年份, UrlID, tid) values("; sql += "'" + g.gs.Replace("\'", "").Replace("\"", "") + "',"; sql += "'" + g.fr.Replace("\'", "").Replace("\"", "") + "',"; sql += "'" + g.zy.Replace("\'", "").Replace("\"", "") + "',"; sql += "'" + g.tel.Replace("\'", "").Replace("\"", "") + "',"; sql += "'" + g.address.Replace("\'", "").Replace("\"", "") + "',"; sql += "'" + g.etype.Replace("\'", "").Replace("\"", "") + "',"; sql += "'" + g.v.Replace("\'", "").Replace("\"", "") + "',"; sql += "'" + g.num.Replace("\'", "").Replace("\"", "") + "',"; sql += "'" + g.y.Replace("\'", "").Replace("\"", "") + "',"; sql += "" + g.urlid.ToString() + ","; sql += "" + g.tid.ToString() + ")"; Comm.CommandText = sql; Comm.ExecuteNonQuery(); } catch (Exception e) { WriteLog(e.ToString()); //Comm.Dispose(); //Comm = new SqlCommand(); //Comm.Connection = f.Conn; //f.Conn.Close(); //f.Conn.Open(); } } } }
class GSData { public string gs = ""; public string fr = ""; public string zy = ""; public string tel = ""; public string address = ""; public string etype = ""; public string v = ""; public string num = ""; public string y = ""; public int urlid = 0; public int tid = 0; } }
|