================== 获取网页中span标签里面的t_id的值

public function getpreg(){
$www = 'http://monkey.test.tripb.cn/test/preg.html';
$html = file_get_contents($www);
$search = '/<span([^>]*)\s*t_id=(\'|\")([^\'\"]+)(\'|\")/';
preg_match_all($search, $html, $t_id);

echo '<meta http-equiv="Content-type" content="text/html:charset=utf-8">';
echo '<pre>';

$str = implode('+',$t_id[3]);

$new = explode('+',$str);

$arr = array();
$arr1 = '';
foreach($new as $k=>$v){
$arr[] = preg_split('/[\s]+/',$v);
}
foreach($arr as $k=>$v){
echo $v[0].' '.$v[1];
echo '<br/>';
}
// var_dump($arr1);
}

==================          test/preg.html

<ul id="popup_content">
<li><div id="index_A"><span t_id="阿克苏 AKU Aksu">阿克苏</span><span t_id="阿尔山 YIE Alshan">阿尔山</span><span t_id="阿里 NGQ Ali">阿里</span><span t_id="阿勒泰 AAT Altay">阿勒泰</span><span t_id="安康 AKA AnKang">安康</span><span t_id="安庆 AQG AnQing">安庆</span><span t_id="鞍山 AOG AnShan">鞍山</span><span t_id="安顺 AVA AnShun">安顺</span><span t_id="阿拉善左旗 AXF ALaShanZuoQi">阿拉善左旗</span><span t_id="阿拉善右旗 RHT ALaShanYouQi">阿拉善右旗</span><span t_id="阿坝 AHJ ABa">阿坝</span><em class="letters red">A</em></div><div id="index_B"><span t_id="百色 AEB BaiSe">百色</span><span t_id="蚌埠 BFU BangBu">蚌埠</span><span t_id="保山 BSD BaoShan">保山</span><span t_id="包头 BAV BaoTou">包头</span><span t_id="巴彦淖尔 RLK BaYanNaoEr">巴彦淖尔</span><span t_id="北海 BHY BeiHai">北海</span><span t_id="北京 PEK BeiJing">北京</span><span t_id="北京南苑 NAY BeiJingNanYuan">北京南苑</span><span t_id="毕节 BFJ BiJie">毕节</span><span t_id="博乐 BPL BoLe">博乐</span><span t_id="北戴河 BPE BeiDaiHe">北戴河</span><em class="letters red">B</em></div><div id="index_C"><span t_id="长春 CGQ ChangChun">长春</span><span t_id="常德 CGD ChangDe">常德</span><span t_id="昌都 BPX ChangDu">昌都</span><span t_id="长沙 CSX ChangSha">长沙</span><span t_id="长治 CIH ChangZhi">长治</span><span t_id="常州 CZX ChangZhou">常州</span><span t_id="长白山 NBS ChangBaiShan">长白山</span><span t_id="朝阳 CHG ChaoYang">朝阳</span><span t_id="成都 CTU ChengDu">成都</span><span t_id="赤峰 CIF ChiFeng">赤峰</span><span t_id="重庆 CKG ChongQing">重庆</span><span t_id="清莱 CEI CHIANG RIA">清莱</span><em class="letters red">C</em></div><div id="index_D"><span t_id="稻城亚丁 DCY DaoCheng">稻城亚丁</span><span t_id="大理 DLU DaLi">大理</span><span t_id="大连 DLC DaLian">大连</span><span t_id="大同 DAT DaTong">大同</span><span t_id="达县 DAX DaXian">达县</span><span t_id="丹东 DDG DanDong">丹东</span><span t_id="大庆 DQA DAQING">大庆</span><span t_id="德宏芒市 LUM DeHong">德宏芒市</span><span t_id="东营 DOY DongYing">东营</span><span t_id="敦煌 DNH DunHuang">敦煌</span><span t_id="德令哈 HXD DeLingHa">德令哈</span><span t_id="大叻 DLI DaLat">大叻</span><em class="letters red">D</em></div><div id="index_E"><span t_id="额济纳旗 EJN EJiNaQi">额济纳旗</span><span t_id="恩施 ENH EnShi">恩施</span><span t_id="二连浩特 ERL ErLianHaoTe">二连浩特</span><em class="letters red">E</em></div><div id="index_F"><span t_id="佛山 FUO FoShan">佛山</span><span t_id="阜阳 FUG FuYang">阜阳</span><span t_id="富蕴 FYN FuYun">富蕴</span><span t_id="福州 FOC FuZhou">福州</span><span t_id="抚远 FYJ FuYuan">抚远</span><em class="letters red">F</em></div></li><li><div id="index_G"><span t_id="赣州 KOW GanZhou">赣州</span><span t_id="格尔木 GOQ GeErMu">格尔木</span><span t_id="广汉 GHN GuangHan">广汉</span><span t_id="广元 GYS GuangYuan">广元</span><span t_id="广州 CAN GuangZhou">广州</span><span t_id="桂林 KWL GuiLin">桂林</span><span t_id="贵阳 KWE Guiyang">贵阳</span><span t_id="固原 GYU GuYuan">固原</span><em class="letters red">G</em></div><div id="index_H"><span t_id="哈密 HMI HaMi">哈密</span><span t_id="哈尔滨 HRB HaRBin">哈尔滨</span><span t_id="海口 HAK HaiKou">海口</span><span t_id="海拉尔 HLD Hailar">海拉尔</span><span t_id="汉中 HZG HanZhong">汉中</span><span t_id="邯郸 HDG HanDan">邯郸</span><span t_id="杭州 HGH HangZhou">杭州</span><span t_id="合肥 HFE HeFei">合肥</span><span t_id="和田 HTN HeTan">和田</span><span t_id="黑河 HEK HeiHe">黑河</span><span t_id="衡阳 HNY HengYang">衡阳</span><span t_id="呼和浩特 HET HuHeHaoTe">呼和浩特</span><span t_id="淮安 HIA HuaiAn">淮安</span><span t_id="黄山 TXN HuangShan">黄山</span><span t_id="黄岩 HYN HuangYan">黄岩</span><span t_id="乌兰浩特 HLH HUlanhot">乌兰浩特</span><span t_id="顺化 HUI HUE">顺化</span><span t_id="惠州 HUZ HuiZhou">惠州</span><span t_id="河池 HCJ HeChi">河池</span><span t_id="花土沟 HTT HuaTuGou">花土沟</span><em class="letters red">H</em></div><div id="index_I"></div><div id="index_J"><span t_id="九华山 JUH JiuHuaShan">九华山</span><span t_id="吉安 KNC JiAn">吉安</span><span t_id="济宁 JNG JiNing">济宁</span><span t_id="吉林 JIL JiLin">吉林</span><span t_id="济南 TNA JiNan">济南</span><span t_id="鸡西 JXA JiXi">鸡西</span><span t_id="佳木斯 JMU JiaMuSi">佳木斯</span><span t_id="嘉峪关 JGN JiaYuGuan">嘉峪关</span><span t_id="锦州 JNZ JinZhou">锦州</span><span t_id="景德镇 JDZ JingDeZhen">景德镇</span><span t_id="井冈山 JGS JingGangShan">井冈山</span><span t_id="西双版纳 JHG JingHong">西双版纳</span><span t_id="九江 JIU JiuJiang">九江</span><span t_id="酒泉 CHW JiuQuan">酒泉</span><span t_id="九寨沟 JZH JIUZHAIGOU">九寨沟</span><span t_id="揭阳 SWA JieYang">揭阳</span><span t_id="金昌 JIC JingChang">金昌</span><span t_id="加格达奇 JGD JiaGeDaQi">加格达奇</span><em class="letters red">J</em></div></li><li><div id="index_K"><span t_id="喀什 KHG KaShi">喀什</span><span t_id="喀纳斯 KJI KaNaSi">喀纳斯</span><span t_id="康定 KGT KangDing">康定</span><span t_id="克拉玛依 KRY Karamay">克拉玛依</span><span t_id="库尔勒 KRL Korla">库尔勒</span><span t_id="昆明 KMG KuMing">昆明</span><span t_id="库车 KCA KuQa">库车</span><span t_id="凯里黄平 KJH KaiLiHuangPing">凯里黄平</span><span t_id="苏梅岛 USM KohSamui">苏梅岛</span><em class="letters red">K</em></div><div id="index_L"><span t_id="兰州 LHW LanZhou">兰州</span><span t_id="拉萨 LXA Lasa">拉萨</span><span t_id="荔波 LLB LiBo">荔波</span><span t_id="丽江 LJG LiJiang">丽江</span><span t_id="吕梁 LLV LvLiang">吕梁</span><span t_id="黎平 HZH LiPing">黎平</span><span t_id="丽水 RSU LiShui">丽水</span><span t_id="连云港 LYG LianYunGang">连云港</span><span t_id="连城 LCX LianCheng">连城</span><span t_id="临沧 LNJ LinChang">临沧</span><span t_id="临沂 LYI LinYi">临沂</span><span t_id="伊春林都 LDS Lindu">伊春林都</span><span t_id="林芝 LZY LinZhi">林芝</span><span t_id="柳州 LZH LiuZhou">柳州</span><span t_id="泸州 LZO LuZhou">泸州</span><span t_id="洛阳 LYA LuoYang">洛阳</span><span t_id="六盘水 LPF LiuPanShui">六盘水</span><span t_id="临汾 LFQ LinFen">临汾</span><em class="letters red">L</em></div><div id="index_M"><span t_id="绵阳 MIG ManYang">绵阳</span><span t_id="满洲里 NZH ManZhouLi">满洲里</span><span t_id="梅县 MXZ MeiXian">梅县</span><span t_id="漠河 OHE MOHE">漠河</span><span t_id="牡丹江 MDG MuDanJiang">牡丹江</span><em class="letters red">M</em></div><div id="index_N"><span t_id="那拉提 NLT NaLaTi">那拉提</span><span t_id="南昌 KHN NanChang">南昌</span><span t_id="南充 NAO NanChong">南充</span><span t_id="南京 NKG NanJing">南京</span><span t_id="南宁 NNG NanNing">南宁</span><span t_id="南通 NTG NanTong">南通</span><span t_id="南阳 NNY NanYang">南阳</span><span t_id="宁波 NGB NingBo">宁波</span><span t_id="良乌 NYU NYAUNG-U">良乌</span><span t_id="宁蒗 NLH NingLang">宁蒗</span><em class="letters red">N</em></div></li><li><div id="index_P"><span t_id="攀枝花 PZI PanZhiHua">攀枝花</span><span t_id="普洱 SYM PuEr">普洱</span><span t_id="濮阳 PYY PuYang">濮阳</span><span t_id="帕罗 PBH PARO">帕罗</span><em class="letters red">P</em></div><div id="index_Q"><span t_id="泉州晋江 JJN QuanZhouJinJiang">泉州晋江</span><span t_id="衢州 JUZ QuZhou">衢州</span><span t_id="齐齐哈尔 NDG QiQiHar">齐齐哈尔</span><span t_id="且末 IQM QieMo">且末</span><span t_id="秦皇岛 SHP QinHuangDao">秦皇岛</span><span t_id="青岛 TAO QingDao">青岛</span><span t_id="庆阳 IQN QingYang">庆阳</span><span t_id="琼海 BAR QiongHai">琼海</span><em class="letters red">Q</em></div><div id="index_R"><span t_id="日喀则 RKZ RiKaZe">日喀则</span><span t_id="日照 RIZ RiZhao">日照</span><em class="letters red">R</em></div><div id="index_S"><span t_id="三亚 SYX SanYa">三亚</span><span t_id="沙市 SHS ShaShi">沙市</span><span t_id="汕头 SWA ShanTou">汕头</span><span t_id="上海虹桥 SHA ShangHai">上海虹桥</span><span t_id="上海浦东 PVG ShangHaiPoDong">上海浦东</span><span t_id="沈阳 SHE ShenYang">沈阳</span><span t_id="深圳 SZX ShenZhen">深圳</span><span t_id="石家庄 SJW ShiJiaZhuang">石家庄</span><span t_id="思茅 SYM SiMao">思茅</span><span t_id="神农架 HPG ShenNongJia">神农架</span><span t_id="静冈 FSZ Shizuoka">静冈</span><span t_id="苏州 SZV SuZhou">苏州</span><span t_id="十堰 WDS ShiYan">十堰</span><span t_id="三明 SQJ SanMing">三明</span><span t_id="石河子 SHF ShiHeZi">石河子</span><em class="letters red">S</em></div><div id="index_T"><span t_id="台州 HYN TaiZhou">台州</span><span t_id="塔城 TCG TaCheng">塔城</span><span t_id="太原 TYN TaiYuan">太原</span><span t_id="腾冲 TCZ TengChong">腾冲</span><span t_id="天津 TSN TianJin">天津</span><span t_id="天水 THQ TianShui">天水</span><span t_id="通化 TNH TongHua">通化</span><span t_id="通辽 TGO TongLiao">通辽</span><span t_id="铜仁 TEN TongRen">铜仁</span><span t_id="唐山 TVS TangShan">唐山</span><span t_id="吐鲁番 TLQ TuLuFan">吐鲁番</span><em class="letters red">T</em></div><div id="index_W"><span t_id="乌鲁木齐 URC WuLuMuQi">乌鲁木齐</span><span t_id="万州 WXN WanZhou">万州</span><span t_id="潍坊 WEF WeiFang">潍坊</span><span t_id="威海 WEH WeiHai">威海</span><span t_id="文山 WNH WenSan">文山</span><span t_id="温州 WNZ WenZhou">温州</span><span t_id="乌海 WUA WuHai">乌海</span><span t_id="武汉 WUH WuHan">武汉</span><span t_id="无锡 WUX WuXi">无锡</span><span t_id="武夷山 WUS WuYiShan">武夷山</span><span t_id="梧州 WUZ WuZhou">梧州</span><span t_id="乌兰察布 UCB WuLanChaBu">乌兰察布</span><span t_id="五台山 WTS WuTaiShan">五台山</span><em class="letters red">W</em></div></li><li><div id="index_X"><span t_id="香格里拉 DIG XIANGGELILA">香格里拉</span><span t_id="夏河 gxh XiaHe">夏河</span><span t_id="西安 XIY XiAn">西安</span><span t_id="西昌 XIC XiChang">西昌</span><span t_id="西宁 XNN XiNing">西宁</span><span t_id="厦门 XMN XiaMen">厦门</span><span t_id="襄阳 XFN XiangYang">襄阳</span><span t_id="锡林浩特 XIL XiLinHot">锡林浩特</span><span t_id="兴义 ACX XingYi">兴义</span><span t_id="徐州 XUZ XuZhou">徐州</span><span t_id="忻州 WUT XinZhou">忻州</span><em class="letters red">X</em></div><div id="index_Y"><span t_id="扬州 YTY YangZhou">扬州</span><span t_id="延安 ENY YanAn">延安</span><span t_id="盐城 YNZ YanCheng">盐城</span><span t_id="延吉 YNJ YanJi">延吉</span><span t_id="烟台 YNT YanTai">烟台</span><span t_id="宜宾 YBP YiBin">宜宾</span><span t_id="宜昌 YIH YiChang">宜昌</span><span t_id="伊宁 YIN YiNing">伊宁</span><span t_id="义乌 YIW YiWu">义乌</span><span t_id="宜春 YIC YiChun">宜春</span><span t_id="鄂尔多斯 DSN YiJinHuoLuo">鄂尔多斯</span><span t_id="银川 INC YinChuan">银川</span><span t_id="英德 ENE YingDe">英德</span><span t_id="永州 LLF YongZhou">永州</span><span t_id="榆林 UYN YuLin">榆林</span><span t_id="玉树 YUS YuShu">玉树</span><span t_id="运城 YCU YunCheng">运城</span><span t_id="芽庄金兰 CXR YaZhuangJinLan">芽庄金兰</span><span t_id="营口 YKH YingKou">营口</span><em class="letters red">Y</em></div><div id="index_Z"><span t_id="湛江 ZHA ZhanJiang">湛江</span><span t_id="张家界 DYG ZhangJiaJie">张家界</span><span t_id="张家口 ZQZ ZhangJiaKou">张家口</span><span t_id="昭通 ZAT ZhaoTong">昭通</span><span t_id="郑州 CGO ZhengZhou">郑州</span><span t_id="芷江 HJJ ZhiJiang">芷江</span><span t_id="中卫 ZHY ZhongWei">中卫</span><span t_id="重庆舟白 JIQ ZhouBai">重庆舟白</span><span t_id="舟山 HSN ZhouShan">舟山</span><span t_id="珠海 ZUH ZhuHai">珠海</span><span t_id="张掖 YZY ZhangYe">张掖</span><span t_id="遵义 ZYI ZunYi">遵义</span><em class="letters red">Z</em></div></li>
</ul>

抓取网页中数据 -----51book中城市码的更多相关文章

  1. 转 PHP的CURL方法curl_setopt()函数案例介绍(抓取网页,POST数据)

    PHP的CURL方法curl_setopt()函数案例介绍(抓取网页,POST数据)   通过curl_setopt()函数可以方便快捷的抓取网页(采集很方便),curl_setopt 是php的一个 ...

  2. 写论文,没数据?R语言抓取网页大数据

    写论文,没数据?R语言抓取网页大数据 纵观国内外,大数据的市场发展迅猛,政府的扶持也达到了空前的力度,甚至将大数据纳入发展战略.如此形势为社会各界提供了很多机遇和挑战,而我们作为卫生(医学)统计领域的 ...

  3. PHP的CURL方法curl_setopt()函数案例介绍(抓取网页,POST数据)

    通过curl_setopt()函数可以方便快捷的抓取网页(采集很方便),curl_setopt 是php的一个扩展库 使用条件:需要在php.ini 中配置开启.(PHP 4 >= 4.0.2) ...

  4. java抓取东方财富股票数据(附源码)

    背景 前段时间给朋友写了一个自动抓取同花顺股票数据的程序,不少人觉得不错. 这几天后台有粉丝给我留言让我也抓一下东方财富的数据,说东方财富的数据特别难抓,我还真不一定能搞得定. 本来我是一个德艺双磬且 ...

  5. 一个我经常用到的采集网页数据抓取网页获取数据的PHP函数类

    class get_c_str { var $str; var $start_str; var $end_str; var $start_pos; var $end_pos; var $c_str_l ...

  6. delphi 7中使用idhttp抓取网页 解决假死现象

    在delphi 7中使用idhttp抓取网页,造成窗口无反应的假死状态.通过搜索获得两种方法. 1.写在线程中,但是调用比较麻烦 2.使用delphi 提供的idantifreeze(必须安装indy ...

  7. delphi 7中使用idhttp抓取网页 解决假死现象(使用TIdAntiFreezeControl控件)

    在delphi 7中使用idhttp抓取网页,造成窗口无反应的假死状态.通过搜索获得两种方法. 1.写在线程中,但是调用比较麻烦 2.使用delphi 提供的idantifreeze(必须安装indy ...

  8. python 解决抓取网页中的中文显示乱码问题

    关于爬虫乱码有很多各式各样的问题,这里不仅是中文乱码,编码转换.还包括一些如日文.韩文 .俄文.藏文之类的乱码处理,因为解决方式是一致的,故在此统一说明. 网络爬虫出现乱码的原因 源网页编码和爬取下来 ...

  9. Java 抓取网页中的内容【持续更新】

    背景:前几天复习Java的时候看到URL类,当时就想写个小程序试试,迫于考试没有动手,今天写了下,感觉还不错 内容1. 抓取网页中的URL 知识点:Java URL+ 正则表达式 import jav ...

随机推荐

  1. oracle数据库一些用户管理语句

    查询所有数据库用户 select * from dba_users 查看数据库名称 select name from v$database 查看权限 select * from user_sys_pr ...

  2. Spark编程模型及RDD操作

    转载自:http://blog.csdn.net/liuwenbo0920/article/details/45243775 1. Spark中的基本概念 在Spark中,有下面的基本概念.Appli ...

  3. java操作mongodb——查询数据

    通过find方法查询集合中的文档信息 -------------------------------------------------------- find() 查询所有文档信息,返回FindIt ...

  4. ECS活动真实IP (前端存在SLB)

    log_format main 'realip:$http_x_forwarded_for slbip:$remote_addr-$remote_user [$time_local] "$r ...

  5. Beyond Compare V3.2.3 Beta 中文版

    软件名称: Beyond Compare V3.2.3 Beta 中文版 软件语言: 简体中文 授权方式: 免费软件 运行环境: Win7 / Vista / Win2003 / WinXP 软件大小 ...

  6. OpenGL杂七杂八

    Projection Matrix 投影矩阵 3D -> 2D PFD_DOUBLEBUFFER 双缓冲 在图形图象处理编程过程中,双缓冲是一种基本的技术.我们知道,如果窗体在响应WM_PAIN ...

  7. 【转】【Egit】如何将eclipse中的项目上传至Git

    1.下载egit插件 打开Eclipse,git需要eclipse授权,通过网页是无法下载egit的安装包的.在菜单栏依次打开eclipse→help→install new software→add ...

  8. linux upstart启动配置

    程序名.conf放在/etcc/init/目录下# 注释 description "your-server" author "xxx" start on run ...

  9. Java 并发 线程的生命周期

    Java 并发 线程的生命周期 @author ixenos 线程的生命周期 线程状态: a)     New 新建 b)     Runnable 可运行 c)     Running 运行 (调用 ...

  10. 编译搭建Lamp服务器

    Lamp 是目前倍受欢迎的一种网站服务器.其主要有linux+apache+mysql+php 组成.由于其组成成员都是开源免费的产品,所以被作为中小型网站服务器的选择.LZ之前在学校学linux的时 ...