using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Text.RegularExpressions;
using System.Configuration; /// <summary>
////// </summary>
public static class SearchConst
{ public static readonly string ARG_CLIENT = "client"; public static readonly string ARG_WORD = "word"; public static readonly int DataColumnCount = ; public static readonly int ColumnOfUrl = ; public static readonly int ColumnOfTitle = ; public static readonly int ColumnOfInfo = ; public static readonly int ColumnOfAdUrl = ; public static readonly string FMT_Date = "yyyy/MM/dd"; public static readonly string FMT_TIME = "HH:mm:ss"; public static readonly string UserAgentPC = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:11.0) Gecko/20100101 Firefox/11.0"; public static readonly string UserAgentMobile = "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A403 Safari/8536.25"; public static readonly string SearchKeyWordPlace = "#{q}"; public static readonly string DefaultEncode = "UTF-8"; public static readonly string AttributeHref = "href"; public static readonly string FILEEXT_ZIP = ".zip"; public static readonly string FILE_TXT = "source.txt"; public static readonly string FILE_KEY = "SavePath"; public static readonly string BATCH_PARALLES_KEY = "BatchParalles"; public static readonly string FLG_ENABLED = ""; public static readonly string CLIENT_MONITOR = "BJMOR"; public static readonly string MSG_E_PAGE_STYLE_CHANGE = "fff"; public static class Google
{ public static readonly string UserAgent = UserAgentPC; public static readonly string[] XPATH_ROOT = { "mbEnd", "mbEnd" };
public static readonly string[] XPATH_CITE = { "//div[@id='mbEnd']//ol/li//cite", "//div[@id='mbEnd']//ol/li//cite" }; //获取url
public static readonly string[] XPATH_H3 = { "//div[@id='mbEnd']//ol/li//h3", "//div[@id='mbEnd']//ol/li/h3" }; // //获取标题
public static readonly string[] XPATH_ADURL = { "//div[@id='mbEnd']//ol/li//h3//a[1]", "//div[@id='mbEnd']//ol/li/h3//a[1]" };
public static readonly string[] XPATH_INFO = { "//div[@id='mbEnd']//ol/li//div[@class='ac ads-creative']", "//div[@id='mbEnd']//ol/li//div[@class='ads-creative']" };
// top info
public static readonly string[] XPATH_ROOT_TOP = { "taw", "taw" };
public static readonly string[] XPATH_CITE_TOP = { "//div[@id='tads']//ol/li//cite", "//div[@id='tads']//ol/li//cite" };
public static readonly string[] XPATH_H3_TOP = { "//div[@id='tads']//ol/li//h3", "//div[@id='tads']//ol/li/h3" };
public static readonly string[] XPATH_ADURL_TOP = { "//div[@id='tads']//ol/li//h3//a[1]", "//div[@id='tads']//ol/li/h3//a[1]" };
public static readonly string[] XPATH_INFO_TOP = { "//div[@id='tads']//ol/li//div[@class='ac ads-creative']", "//div[@id='tads']//ol/li//div[@class='ads-creative']" };
//
public static readonly Regex RegexAdUrl = new Regex(@"adurl=(http[\S]*$)");
//
public static readonly string BAITAI_ID = "";
} public static class GoogleM
{
public static readonly string UserAgent = UserAgentMobile; //info
public static readonly string[] XPATH_ROOT = { "bottomads", "bottomads" };
public static readonly string[] XPATH_CITE = { "//div[@id='tadsb']/ol/li//cite", "//div[@id='tadsb']/ol/li//cite" };
public static readonly string[] XPATH_H3 = { "//div[@id='tadsb']/ol/li//h3", "//div[@id='tadsb']/ol/li//h3" };
public static readonly string[] XPATH_ADURL = { "//div[@id='tadsb']/ol/li//h3//a", "//div[@id='tadsb']/ol/li//h3//a" };
public static readonly string[] XPATH_INFO = { "//div[@id='tadsb']/ol/li//div[@class='ac ads-creative']", "//div[@id='tadsb']/ol/li//div[@class='ads-creative']" }; // top info
public static readonly string[] XPATH_ROOT_TOP = { "tads", "tads" };
public static readonly string[] XPATH_CITE_TOP = { "//div[@id='tads']/ol/li//cite", "//div[@id='tads']/ol/li//cite" };
public static readonly string[] XPATH_H3_TOP = { "//div[@id='tads']/ol/li//h3", "//div[@id='tads']/ol/li//h3" };
public static readonly string[] XPATH_ADURL_TOP = { "//div[@id='tads']/ol/li//h3//a", "//div[@id='tads']/ol/li//h3//a" };
public static readonly string[] XPATH_INFO_TOP = { "//div[@id='tads']/ol/li//div[@class='ac ads-creative']", "//div[@id='tads']/ol/li//div[@class='ads-creative']" };
//
public static readonly Regex RegexAdUrl = new Regex(@"adurl=(http[\S]*$)");
//
public static readonly string BAITAI_ID = "";
} public static class MSN
{
public static readonly string UserAgent = UserAgentPC;
//b_context/b_ad
public static readonly string[] XPATH_ROOT = { "sidebar", "b_context" };
public static readonly string[] XPATH_CITE = { "//div[@class='sb_adsNv2']//li//cite", "//ol[@id='b_context']//li[@class='b_ad']//li//cite" };
public static readonly string[] XPATH_H3 = { "//div[@class='sb_adsNv2']//li//h3", "//ol[@id='b_context']//li[@class='b_ad']//li//h2" };
public static readonly string[] XPATH_ADURL = { "//div[@class='sb_adsNv2']//li//a", "//ol[@id='b_context']//li[@class='b_ad']//li//a" };
public static readonly string[] XPATH_INFO = { "//div[@class='sb_adsNv2']//li//p", "//ol[@id='b_context']//li[@class='b_ad']//li//p" };
//b_results/b_ad
public static readonly string[] XPATH_ROOT_TOP = { "results_container", "b_results" };
public static readonly string[] XPATH_CITE_TOP = { "//div[@class='sb_adsWv2']//li//cite", "//ol[@id='b_results']//li[@class='b_ad']//li//cite" };
public static readonly string[] XPATH_H3_TOP = { "//div[@class='sb_adsWv2']//li//h3", "//ol[@id='b_results']//li[@class='b_ad']//li//h2" };
public static readonly string[] XPATH_ADURL_TOP = { "//div[@class='sb_adsWv2']//li//a", "//ol[@id='b_results']//li[@class='b_ad']//li//a" };
public static readonly string[] XPATH_INFO_TOP = { "//div[@class='sb_adsWv2']//li//p", "//ol[@id='b_results']//li[@class='b_ad']//li//p" };
//
public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");
//
public static readonly string BAITAI_ID = "";
} public static class Yahoo
{
public static readonly string UserAgent = UserAgentPC; public static readonly string XPATH_ROOT = "sIn";
public static readonly string XPATH_CITE1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/div[@class='a cf']";
public static readonly string XPATH_H31 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/h3";
public static readonly string XPATH_ADURL1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/h3/a";
public static readonly string XPATH_INFO1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/p";
//
public static readonly string XPATH_ROOT_TOP = "So1";
public static readonly string XPATH_CITE_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/div[@class='a cf']";
public static readonly string XPATH_H3_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/h3";
public static readonly string XPATH_ADURL_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/h3/a";
public static readonly string XPATH_INFO_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/p";
//
public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");
public static readonly string NullUrl = "&gt;";
//
public static readonly string BAITAI_ID = "";
} public static class Yahoo2
{
public static readonly string UserAgent = UserAgentPC; public static readonly string XPATH_ROOT_TOP = "contents";
public static readonly string XPATH_CITE_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/cite";
public static readonly string XPATH_H3_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/h2/a";
public static readonly string XPATH_ADURL_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/h2/a";
public static readonly string XPATH_INFO_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/p[@class='smr']";
//
public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");
public static readonly string NullUrl = "&gt;";
//
public static readonly string BAITAI_ID = "";
} public static class YahooM
{
public static readonly string UserAgent = UserAgentMobile; public static readonly string XPATH_ROOT = "contentsInner";
public static readonly string XPATH_CITE = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/cite";
public static readonly string XPATH_H3 = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/h3";
public static readonly string XPATH_ADURL = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/h3/a";
public static readonly string XPATH_INFO = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/p[@class='dtl']"; public static readonly string XPATH_ROOT_TOP = "contentsInner";
public static readonly string XPATH_CITE_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/cite";
public static readonly string XPATH_H3_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/h3";
public static readonly string XPATH_ADURL_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/h3/a";
public static readonly string XPATH_INFO_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/p[@class='dtl']";
//
public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");
public static readonly string NullUrl = "&gt;";
//
public static readonly string BAITAI_ID = "";
} public static class BaiDu { public static readonly string UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"; public static readonly string[] XPATH_ROOT = { "ec_im_container", "ec_im_container" }; //第一种情况 。
public static readonly string[] XPATH_CITE = { "//a/font[@size='-1' and @class]","//a/font[@size='-1' and @class]" }; //第一种情况
public static readonly string[] XPATH_H3 = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]" };//第一种情况
public static readonly string[] XPATH_ADURL = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]" };
public static readonly string[] XPATH_INFO = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-click]/font[1]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-click]/font[1]" };
// top info
public static readonly string[] XPATH_ROOT_TOP = { "content_left", "content_left" };
public static readonly string[] XPATH_CITE_TOP = { "//table[@data-click]/tbody/tr/td//a[not(@data-is-main-url) and not(contains(@href,'tool'))]/span", "//div[@class and @style]/div/div/a/span[1]|//div/table/tbody/tr/td[2]/div//a/span[1]" }; //前下后上
public static readonly string[] XPATH_H3_TOP = { "//table/tbody/tr/td/a[ @data-is-main-url]", "//div[@class and @style]/div/div/h3" }; //前下后上
public static readonly string[] XPATH_ADURL_TOP = { "//table/tbody/tr/td/a[ @data-is-main-url]", "//table/tbody/tr/td/a[ @data-is-main-url]" }; //前下后上
public static readonly string[] XPATH_INFO_TOP = { "//table[@data-click and @class]/tbody/tr[3]/td/a[not(./span)]|//table[@data-click and @class]/tbody/tr/td/table/tbody/tr/td/div/font/a", "//div[@class and @style]/div/div[not(./span)]/a|//div/table/tbody/tr/td/div/font/a[not(./span)]" }; //前
//
//public static readonly Regex RegexAdUrl = new Regex(@"http[\S]*$");
//
public static readonly string BAITAI_ID = "";
} public static class CnBing { public static readonly string UserAgent = UserAgentPC; public static readonly string[] XPATH_ROOT = { "b_context", "b_context" };
public static readonly string[] XPATH_CITE = { "//div[@class='sb_add sb_adTA']//cite", "//div[@class='sb_add sb_adTA']//cite" };
public static readonly string[] XPATH_H3 = { "//div[@class='sb_add sb_adTA']//h2/a", "//div[@class='sb_add sb_adTA']//h2/a" };//第一种情况
public static readonly string[] XPATH_ADURL = { "//div[@class='sb_add sb_adTA']//h2/a", "//div[@class='sb_add sb_adTA']//h2/a" };
public static readonly string[] XPATH_INFO = { "//div[@class='sb_add sb_adTA']//div[@class='b_caption']/p", "//div[@class='sb_add sb_adTA']//div[@class='b_caption']/p" };
// top info
public static readonly string[] XPATH_ROOT_TOP = { "gg", "gg" };
public static readonly string[] XPATH_CITE_TOP = { "", "" }; //前下后上
public static readonly string[] XPATH_H3_TOP = { "", "" }; //前下后上
public static readonly string[] XPATH_ADURL_TOP = { "", "" }; //前下后上
public static readonly string[] XPATH_INFO_TOP = { "", "" }; //前下部分广告后上
//
public static readonly Regex RegexAdUrl = new Regex(@"rturl=(http[\S]*$)");
//
public static readonly string BAITAI_ID = "";
} public static class HaoSou { public static readonly string UserAgent = UserAgentPC;
// 右边的广告
public static readonly string[] XPATH_ROOT = { "side", "side" }; //获取范围
public static readonly string[] XPATH_CITE = { "//ul[@id='rightbox']/li/p/cite[not(contains(text(),' http://e.360.cn'))]|//div[@id='m-spread-left']//cite", "//ul[@id='rightbox']/li/p/cite[not(contains(text(),' http://e.360.cn'))]|//div[@id='m-spread-left']//cite" }; //第一种情况
public static readonly string[] XPATH_H3 = { "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'ss'))]|//div[@id='m-spread-left']//h3/a", "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'ss'))]|//div[@id='m-spread-left']//h3/a" };//第一种情况
public static readonly string[] XPATH_ADURL = { "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'ss'))]|//div[@id='m-spread-left']//h3/a", "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'ss'))]|//div[@id='m-spread-left']//h3/a" };
public static readonly string[] XPATH_INFO = { "//ul[@id='e_idea_pp']/li//p|//ul[@id='rightbox']/li/p[not(contains(text(),'4000-360-360'))]", "//ul[@id='e_idea_pp']/li//p|//ul[@id='rightbox']/li/p[not(contains(text(),'4000-360-360'))]" };
// top info
public static readonly string[] XPATH_ROOT_TOP = {"ss", "sss" };
public static readonly string[] XPATH_CITE_TOP = { "", "" }; //前下后上
public static readonly string[] XPATH_H3_TOP = { "", "" }; //前下后上
public static readonly string[] XPATH_ADURL_TOP = { "", "" }; //前下后上
public static readonly string[] XPATH_INFO_TOP = { "", "" }; //前下部分广告后上
//
//public static readonly Regex RegexAdUrl = new Regex(@"http[\S]*$");
//
public static readonly string BAITAI_ID = "";
} public static class Sogou {
public static readonly string UserAgent = UserAgentPC;
//right 部分
public static readonly string[] XPATH_ROOT = { "right" };
public static readonly string[] XPATH_CITE = { "//div[@class='bizr_fb']" };//绿色的url
public static readonly string[] XPATH_H3 = { "//h3[@class='bizr_title']" };//#ad_leftresult_0 > h3:nth-child(1)
public static readonly string[] XPATH_ADURL = { "//h3[@class='bizr_title']/a" };//.h3的url
public static readonly string[] XPATH_INFO = { "//div[@class='bizr_ft']" };
//top 部分
public static readonly string[] XPATH_ROOT_TOP = { "promotion_adv_container" };//*[@id="promotion_adv_container"]/div/div
public static readonly string[] XPATH_CITE_TOP = { "//div[contains(@class,'biz_rb')and @id]/div//cite" };
public static readonly string[] XPATH_H3_TOP = { "//h3[@class='biz_title']" };
public static readonly string[] XPATH_ADURL_TOP = { "//h3[@class='biz_title']/a" };
public static readonly string[] XPATH_INFO_TOP = { "//div[@class='crown_info_box' or @class='biz_ft']|//div[contains(@id,'box_id')]/table" };// "" //
//public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");
//
public static readonly string BAITAI_ID = "";
public static readonly string NullUrl = "&gt;";
} }

using System;using System.Collections.Generic;using System.Linq;using System.Web;using System.Text.RegularExpressions;using System.Configuration;
/// <summary>/// SearchHelper の概要の説明です/// </summary>public static class SearchConst{
    public static readonly string ARG_CLIENT = "client";
    public static readonly string ARG_WORD = "word";
    public static readonly int DataColumnCount = 4;
    public static readonly int ColumnOfUrl = 0;
    public static readonly int ColumnOfTitle = 1;
    public static readonly int ColumnOfInfo = 2;
    public static readonly int ColumnOfAdUrl = 3;
    public static readonly string FMT_Date = "yyyy/MM/dd";
    public static readonly string FMT_TIME = "HH:mm:ss";
    public static readonly string UserAgentPC = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:11.0) Gecko/20100101 Firefox/11.0";
    public static readonly string UserAgentMobile = "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A403 Safari/8536.25";
    public static readonly string SearchKeyWordPlace = "#{q}";
    public static readonly string DefaultEncode = "UTF-8";
    public static readonly string AttributeHref = "href";
    public static readonly string FILEEXT_ZIP = ".zip";
    public static readonly string FILE_TXT = "source.txt";
    public static readonly string FILE_KEY = "SavePath";
    public static readonly string BATCH_PARALLES_KEY = "BatchParalles";
    public static readonly string FLG_ENABLED = "1";
    public static readonly string CLIENT_MONITOR = "BJMOR";
    public static readonly string MSG_E_PAGE_STYLE_CHANGE = "スポンサーチェックの検索媒体レイアウト変更";

public static class Google    {
        public static readonly string UserAgent = UserAgentPC;
        public static readonly string[] XPATH_ROOT = { "mbEnd", "mbEnd" };        public static readonly string[] XPATH_CITE = { "//div[@id='mbEnd']//ol/li//cite", "//div[@id='mbEnd']//ol/li//cite" };  //获取url        public static readonly string[] XPATH_H3 = { "//div[@id='mbEnd']//ol/li//h3", "//div[@id='mbEnd']//ol/li/h3" }; //  //获取标题        public static readonly string[] XPATH_ADURL = { "//div[@id='mbEnd']//ol/li//h3//a[1]", "//div[@id='mbEnd']//ol/li/h3//a[1]" };         public static readonly string[] XPATH_INFO = { "//div[@id='mbEnd']//ol/li//div[@class='ac ads-creative']", "//div[@id='mbEnd']//ol/li//div[@class='ads-creative']" };        // top info        public static readonly string[] XPATH_ROOT_TOP = { "taw", "taw" };        public static readonly string[] XPATH_CITE_TOP = { "//div[@id='tads']//ol/li//cite", "//div[@id='tads']//ol/li//cite" };        public static readonly string[] XPATH_H3_TOP = { "//div[@id='tads']//ol/li//h3", "//div[@id='tads']//ol/li/h3" };        public static readonly string[] XPATH_ADURL_TOP = { "//div[@id='tads']//ol/li//h3//a[1]", "//div[@id='tads']//ol/li/h3//a[1]" };        public static readonly string[] XPATH_INFO_TOP = { "//div[@id='tads']//ol/li//div[@class='ac ads-creative']", "//div[@id='tads']//ol/li//div[@class='ads-creative']" };        //        public static readonly Regex RegexAdUrl = new Regex(@"adurl=(http[\S]*$)");        //        public static readonly string BAITAI_ID = "001";    }
    public static class GoogleM    {        public static readonly string UserAgent = UserAgentMobile;
        //info        public static readonly string[] XPATH_ROOT = { "bottomads", "bottomads" };        public static readonly string[] XPATH_CITE = { "//div[@id='tadsb']/ol/li//cite", "//div[@id='tadsb']/ol/li//cite" };        public static readonly string[] XPATH_H3 = { "//div[@id='tadsb']/ol/li//h3", "//div[@id='tadsb']/ol/li//h3" };        public static readonly string[] XPATH_ADURL = { "//div[@id='tadsb']/ol/li//h3//a", "//div[@id='tadsb']/ol/li//h3//a" };        public static readonly string[] XPATH_INFO = { "//div[@id='tadsb']/ol/li//div[@class='ac ads-creative']", "//div[@id='tadsb']/ol/li//div[@class='ads-creative']" };
        // top info        public static readonly string[] XPATH_ROOT_TOP = { "tads", "tads" };        public static readonly string[] XPATH_CITE_TOP = { "//div[@id='tads']/ol/li//cite", "//div[@id='tads']/ol/li//cite" };        public static readonly string[] XPATH_H3_TOP = { "//div[@id='tads']/ol/li//h3", "//div[@id='tads']/ol/li//h3" };        public static readonly string[] XPATH_ADURL_TOP = { "//div[@id='tads']/ol/li//h3//a", "//div[@id='tads']/ol/li//h3//a" };        public static readonly string[] XPATH_INFO_TOP = { "//div[@id='tads']/ol/li//div[@class='ac ads-creative']", "//div[@id='tads']/ol/li//div[@class='ads-creative']" };        //        public static readonly Regex RegexAdUrl = new Regex(@"adurl=(http[\S]*$)");        //        public static readonly string BAITAI_ID = "005";    }
    public static class MSN    {        public static readonly string UserAgent = UserAgentPC;        //b_context/b_ad        public static readonly string[] XPATH_ROOT = { "sidebar", "b_context" };        public static readonly string[] XPATH_CITE = { "//div[@class='sb_adsNv2']//li//cite", "//ol[@id='b_context']//li[@class='b_ad']//li//cite" };        public static readonly string[] XPATH_H3 = { "//div[@class='sb_adsNv2']//li//h3", "//ol[@id='b_context']//li[@class='b_ad']//li//h2" };        public static readonly string[] XPATH_ADURL = { "//div[@class='sb_adsNv2']//li//a", "//ol[@id='b_context']//li[@class='b_ad']//li//a" };        public static readonly string[] XPATH_INFO = { "//div[@class='sb_adsNv2']//li//p", "//ol[@id='b_context']//li[@class='b_ad']//li//p" };        //b_results/b_ad        public static readonly string[] XPATH_ROOT_TOP = { "results_container", "b_results" };        public static readonly string[] XPATH_CITE_TOP = { "//div[@class='sb_adsWv2']//li//cite", "//ol[@id='b_results']//li[@class='b_ad']//li//cite" };        public static readonly string[] XPATH_H3_TOP = { "//div[@class='sb_adsWv2']//li//h3", "//ol[@id='b_results']//li[@class='b_ad']//li//h2" };        public static readonly string[] XPATH_ADURL_TOP = { "//div[@class='sb_adsWv2']//li//a", "//ol[@id='b_results']//li[@class='b_ad']//li//a" };        public static readonly string[] XPATH_INFO_TOP = { "//div[@class='sb_adsWv2']//li//p", "//ol[@id='b_results']//li[@class='b_ad']//li//p" };        //        public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");        //        public static readonly string BAITAI_ID = "003";    }
    public static class Yahoo    {        public static readonly string UserAgent = UserAgentPC;
        public static readonly string XPATH_ROOT = "sIn";        public static readonly string XPATH_CITE1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/div[@class='a cf']";        public static readonly string XPATH_H31 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/h3";        public static readonly string XPATH_ADURL1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/h3/a";        public static readonly string XPATH_INFO1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/p";        //        public static readonly string XPATH_ROOT_TOP = "So1";        public static readonly string XPATH_CITE_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/div[@class='a cf']";        public static readonly string XPATH_H3_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/h3";        public static readonly string XPATH_ADURL_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/h3/a";        public static readonly string XPATH_INFO_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/p";        //        public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");        public static readonly string NullUrl = "&gt;";        //        public static readonly string BAITAI_ID = "002";    }
    public static class Yahoo2    {        public static readonly string UserAgent = UserAgentPC;
        public static readonly string XPATH_ROOT_TOP = "contents";        public static readonly string XPATH_CITE_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/cite";        public static readonly string XPATH_H3_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/h2/a";        public static readonly string XPATH_ADURL_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/h2/a";        public static readonly string XPATH_INFO_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/p[@class='smr']";        //        public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");        public static readonly string NullUrl = "&gt;";        //        public static readonly string BAITAI_ID = "004";    }
    public static class YahooM    {        public static readonly string UserAgent = UserAgentMobile;
        public static readonly string XPATH_ROOT = "contentsInner";        public static readonly string XPATH_CITE = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/cite";        public static readonly string XPATH_H3 = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/h3";        public static readonly string XPATH_ADURL = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/h3/a";        public static readonly string XPATH_INFO = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/p[@class='dtl']";

public static readonly string XPATH_ROOT_TOP = "contentsInner";        public static readonly string XPATH_CITE_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/cite";        public static readonly string XPATH_H3_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/h3";        public static readonly string XPATH_ADURL_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/h3/a";        public static readonly string XPATH_INFO_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/p[@class='dtl']";        //        public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");        public static readonly string NullUrl = "&gt;";        //        public static readonly string BAITAI_ID = "006";    }
    public static class BaiDu {
        public static readonly string UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko";
        public static readonly string[] XPATH_ROOT = { "ec_im_container", "ec_im_container" }; //第一种情况   好像就一种情况。        public static readonly string[] XPATH_CITE = { "//a/font[@size='-1' and @class]","//a/font[@size='-1' and @class]" }; //第一种情况        public static readonly string[] XPATH_H3 = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]" };//第一种情况        public static readonly string[] XPATH_ADURL = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]" };        public static readonly string[] XPATH_INFO = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-click]/font[1]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-click]/font[1]" };        // top info        public static readonly string[] XPATH_ROOT_TOP = { "content_left", "content_left" };        public static readonly string[] XPATH_CITE_TOP = { "//table[@data-click]/tbody/tr/td//a[not(@data-is-main-url) and not(contains(@href,'tool'))]/span", "//div[@class and @style]/div/div/a/span[1]|//div/table/tbody/tr/td[2]/div//a/span[1]" };   //前下后上        public static readonly string[] XPATH_H3_TOP = { "//table/tbody/tr/td/a[ @data-is-main-url]", "//div[@class and @style]/div/div/h3" };    //前下后上        public static readonly string[] XPATH_ADURL_TOP = { "//table/tbody/tr/td/a[ @data-is-main-url]", "//table/tbody/tr/td/a[ @data-is-main-url]" };    //前下后上        public static readonly string[] XPATH_INFO_TOP = { "//table[@data-click and @class]/tbody/tr[3]/td/a[not(./span)]|//table[@data-click and @class]/tbody/tr/td/table/tbody/tr/td/div/font/a", "//div[@class and @style]/div/div[not(./span)]/a|//div/table/tbody/tr/td/div/font/a[not(./span)]" };         //前下部分广告后上        //        //public static readonly Regex RegexAdUrl = new Regex(@"http[\S]*$");        //        public static readonly string BAITAI_ID = "007";    }
    public static class CnBing {
        public static readonly string UserAgent = UserAgentPC;
        public static readonly string[] XPATH_ROOT = { "b_context", "b_context" };        public static readonly string[] XPATH_CITE = { "//div[@class='sb_add sb_adTA']//cite", "//div[@class='sb_add sb_adTA']//cite" };        public static readonly string[] XPATH_H3 = { "//div[@class='sb_add sb_adTA']//h2/a", "//div[@class='sb_add sb_adTA']//h2/a" };//第一种情况        public static readonly string[] XPATH_ADURL = { "//div[@class='sb_add sb_adTA']//h2/a", "//div[@class='sb_add sb_adTA']//h2/a" };        public static readonly string[] XPATH_INFO = { "//div[@class='sb_add sb_adTA']//div[@class='b_caption']/p", "//div[@class='sb_add sb_adTA']//div[@class='b_caption']/p" };        // top info        public static readonly string[] XPATH_ROOT_TOP = { "なし", "なし" };        public static readonly string[] XPATH_CITE_TOP = { "", "" };   //前下后上        public static readonly string[] XPATH_H3_TOP = { "", "" };    //前下后上        public static readonly string[] XPATH_ADURL_TOP = { "", "" };    //前下后上        public static readonly string[] XPATH_INFO_TOP = { "", "" };         //前下部分广告后上        //        public static readonly Regex RegexAdUrl = new Regex(@"rturl=(http[\S]*$)");        //        public static readonly string BAITAI_ID = "008";    }
    public static class HaoSou {
        public static readonly string UserAgent = UserAgentPC;            // 右边的广告        public static readonly string[] XPATH_ROOT = { "side", "side" }; //获取范围        public static readonly string[] XPATH_CITE = { "//ul[@id='rightbox']/li/p/cite[not(contains(text(),' http://e.360.cn'))]|//div[@id='m-spread-left']//cite", "//ul[@id='rightbox']/li/p/cite[not(contains(text(),' http://e.360.cn'))]|//div[@id='m-spread-left']//cite" }; //第一种情况        public static readonly string[] XPATH_H3 = { "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'好搜推广'))]|//div[@id='m-spread-left']//h3/a", "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'好搜推广'))]|//div[@id='m-spread-left']//h3/a" };//第一种情况        public static readonly string[] XPATH_ADURL = { "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'好搜推广'))]|//div[@id='m-spread-left']//h3/a", "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'好搜推广'))]|//div[@id='m-spread-left']//h3/a" };        public static readonly string[] XPATH_INFO = { "//ul[@id='e_idea_pp']/li//p|//ul[@id='rightbox']/li/p[not(contains(text(),'4000-360-360'))]", "//ul[@id='e_idea_pp']/li//p|//ul[@id='rightbox']/li/p[not(contains(text(),'4000-360-360'))]" };        // top info        public static readonly string[] XPATH_ROOT_TOP = {"なし", "なし" };        public static readonly string[] XPATH_CITE_TOP = { "", "" };   //前下后上        public static readonly string[] XPATH_H3_TOP = { "", "" };    //前下后上        public static readonly string[] XPATH_ADURL_TOP = { "", "" };    //前下后上        public static readonly string[] XPATH_INFO_TOP = { "", "" };         //前下部分广告后上        //        //public static readonly Regex RegexAdUrl = new Regex(@"http[\S]*$");        //        public static readonly string BAITAI_ID = "009";    }
    public static class Sogou {        public static readonly string UserAgent = UserAgentPC;        //right 部分        public static readonly string[] XPATH_ROOT = { "right" };        public static readonly string[] XPATH_CITE = { "//div[@class='bizr_fb']" };//绿色的url        public static readonly string[] XPATH_H3 = { "//h3[@class='bizr_title']" };//#ad_leftresult_0 > h3:nth-child(1)        public static readonly string[] XPATH_ADURL = { "//h3[@class='bizr_title']/a" };//.h3的url        public static readonly string[] XPATH_INFO = { "//div[@class='bizr_ft']" };        //top 部分                                  public static readonly string[] XPATH_ROOT_TOP = { "promotion_adv_container" };//*[@id="promotion_adv_container"]/div/div        public static readonly string[] XPATH_CITE_TOP = { "//div[contains(@class,'biz_rb')and @id]/div//cite" };        public static readonly string[] XPATH_H3_TOP = { "//h3[@class='biz_title']" };        public static readonly string[] XPATH_ADURL_TOP = { "//h3[@class='biz_title']/a" };        public static readonly string[] XPATH_INFO_TOP = { "//div[@class='crown_info_box' or @class='biz_ft']|//div[contains(@id,'box_id')]/table" };//   ""
        //        //public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");        //0        public static readonly string BAITAI_ID = "010";        public static readonly string NullUrl = "&gt;";    }
}

xpath 参考的更多相关文章

  1. 【转】XPath 示例

    XPath 示例   其他版本   本主题回顾整个 XPath 参考中出现的语法示例. 所有示例均基于 XPath 语法的示例 XML 文件 (inventory.xml). 有关在测试文件中使用 X ...

  2. XPATH 带命名空间数据的读取

    在XML中,很多情况下有命名空间,如果直接使用XPATH 读取是会读到空节点. 解决办法如下: InputStream is=loader.getResourceAsStream("com/ ...

  3. HtmlCleanner结合xpath用法(转载)

    HtmlCleaner cleaner = new HtmlCleaner(); TagNode node = cleaner.clean(new URL("http://finance.s ...

  4. scrapy2_初窥Scrapy

    递归知识:oop,xpath,jsp,items,pipline等专业网络知识,初级水平并不是很scrapy,可以从简单模块自己写. 初窥Scrapy Scrapy是一个为了爬取网站数据,提取结构性数 ...

  5. 较全的IT方面帮助文档

    http://www.shouce.ren/post/d/id/108632 XSLT参考手册-新.CHMhttp://www.shouce.ren/post/d/id/108633 XSL-FO参考 ...

  6. selenium java 浏览器操作

    环境搭建 selenium 2.53 selenium-java-2.53.0.jar selenium-java-2.53.0-srcs.jar 原代码包 拷贝的工程lib下,做build path ...

  7. JDOM 操作XML

    http://www.cnblogs.com/hoojo/archive/2011/08/11/2134638.html 可扩展标记语言——eXtensible Markup Language 用户可 ...

  8. python爬虫 scrapy2_初窥Scrapy

    sklearn实战-乳腺癌细胞数据挖掘 https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campai ...

  9. [开发笔记]-Linq to xml学习笔记

    最近需要用到操作xml文档的方法,学习了一下linq to xml,特此记录. 测试代码: class Program { //参考: LINQ to XML 编程基础 - luckdv - 博客园 ...

随机推荐

  1. 【代码笔记】iOS-点击cell时候的动画翻转

    一,效果图. 二,工程图. 三,代码. RootViewController.h #import <UIKit/UIKit.h> @interface RootViewController ...

  2. Web应用程序系统的多用户权限控制设计及实现-页面模块【9】

    前五章均是从整体上讲述了Web应用程序的多用户权限控制实现流程,本章讲述Web权限管理系统的基本模块-页面模块.页面模块涉及到的数据表为页面表. 1.1页面域 为了更规范和方便后期系统的二次开发和维护 ...

  3. java jdbc 连接mysql数据库 实现增删改查

    好久没有写博文了,写个简单的东西热热身,分享给大家. jdbc相信大家都不陌生,只要是个搞java的,最初接触j2ee的时候都是要学习这么个东西的,谁叫程序得和数据库打交道呢!而jdbc就是和数据库打 ...

  4. 振奋人心啊!!!!下一代.NET——ASP.NET vNext

    这两天看到的.NET的新闻都好振奋人心啊!微软北美技术大会带来了好多好消息! 看到一篇博客园的文章,感觉太棒了.摘录下来.原文链接:http://news.cnblogs.com/n/208133/ ...

  5. ASP.NET features need application service database support

    搭建的web程序出现如上图所示的错误 原因程序使用以下ASP.NET 特性 Membership (the SqlMembershipProvider class). Role management ...

  6. Tomcat:使用JMX监管Tomcat的几种方式

    Tomcat使用JMX管理方式,在Tomcat的自带应用manager就是使用了JMX方式来管理Tomcat,以此完成Web应用的动态部署.启动.停止. 然而manager应用是一种本地使用JMX接口 ...

  7. layout 布局、手风琴accordion、选项卡tabs

    <!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title> ...

  8. 按要求编写Java应用程序。 (1)建立一个名叫Cat的类: 属性:姓名、毛色、年龄 行为:显示姓名、喊叫 (2)编写主类: 创建一个对象猫,姓名为“妮妮”,毛色为“灰色”,年龄为2岁,在屏幕上输 出该对象的毛色和年龄,让该对象调用显示姓名和喊叫两个方法。

    package zuoye; public class Cat { String name="妮妮"; String color="灰色"; int age=1 ...

  9. 尝试一下sql server2016里面的json功能

    前2天下载了一个2016的rc版本来玩一下,首先感觉是~开发者版本免费啦!!撒花!!!另外一个东西,sql server 2016能支持json 的解析和应用啦,虽然我不知道它的性能如何,先来一发测试 ...

  10. W3School-CSS 背景实例

    CSS 背景实例 CSS 实例 CSS 背景实例 CSS 文本实例 CSS 字体(font)实例 CSS 边框(border)实例 CSS 外边距 (margin) 实例 CSS 内边距 (paddi ...