================== 获取网页中span标签里面的t_id的值

public function getpreg(){
$www = 'http://monkey.test.tripb.cn/test/preg.html';
$html = file_get_contents($www);
$search = '/<span([^>]*)\s*t_id=(\'|\")([^\'\"]+)(\'|\")/';
preg_match_all($search, $html, $t_id);

echo '<meta http-equiv="Content-type" content="text/html:charset=utf-8">';
echo '<pre>';

$str = implode('+',$t_id[3]);

$new = explode('+',$str);

$arr = array();
$arr1 = '';
foreach($new as $k=>$v){
$arr[] = preg_split('/[\s]+/',$v);
}
foreach($arr as $k=>$v){
echo $v[0].' '.$v[1];
echo '<br/>';
}
// var_dump($arr1);
}

==================          test/preg.html

<ul id="popup_content">
<li><div id="index_A"><span t_id="阿克苏 AKU Aksu">阿克苏</span><span t_id="阿尔山 YIE Alshan">阿尔山</span><span t_id="阿里 NGQ Ali">阿里</span><span t_id="阿勒泰 AAT Altay">阿勒泰</span><span t_id="安康 AKA AnKang">安康</span><span t_id="安庆 AQG AnQing">安庆</span><span t_id="鞍山 AOG AnShan">鞍山</span><span t_id="安顺 AVA AnShun">安顺</span><span t_id="阿拉善左旗 AXF ALaShanZuoQi">阿拉善左旗</span><span t_id="阿拉善右旗 RHT ALaShanYouQi">阿拉善右旗</span><span t_id="阿坝 AHJ ABa">阿坝</span><em class="letters red">A</em></div><div id="index_B"><span t_id="百色 AEB BaiSe">百色</span><span t_id="蚌埠 BFU BangBu">蚌埠</span><span t_id="保山 BSD BaoShan">保山</span><span t_id="包头 BAV BaoTou">包头</span><span t_id="巴彦淖尔 RLK BaYanNaoEr">巴彦淖尔</span><span t_id="北海 BHY BeiHai">北海</span><span t_id="北京 PEK BeiJing">北京</span><span t_id="北京南苑 NAY BeiJingNanYuan">北京南苑</span><span t_id="毕节 BFJ BiJie">毕节</span><span t_id="博乐 BPL BoLe">博乐</span><span t_id="北戴河 BPE BeiDaiHe">北戴河</span><em class="letters red">B</em></div><div id="index_C"><span t_id="长春 CGQ ChangChun">长春</span><span t_id="常德 CGD ChangDe">常德</span><span t_id="昌都 BPX ChangDu">昌都</span><span t_id="长沙 CSX ChangSha">长沙</span><span t_id="长治 CIH ChangZhi">长治</span><span t_id="常州 CZX ChangZhou">常州</span><span t_id="长白山 NBS ChangBaiShan">长白山</span><span t_id="朝阳 CHG ChaoYang">朝阳</span><span t_id="成都 CTU ChengDu">成都</span><span t_id="赤峰 CIF ChiFeng">赤峰</span><span t_id="重庆 CKG ChongQing">重庆</span><span t_id="清莱 CEI CHIANG RIA">清莱</span><em class="letters red">C</em></div><div id="index_D"><span t_id="稻城亚丁 DCY DaoCheng">稻城亚丁</span><span t_id="大理 DLU DaLi">大理</span><span t_id="大连 DLC DaLian">大连</span><span t_id="大同 DAT DaTong">大同</span><span t_id="达县 DAX DaXian">达县</span><span t_id="丹东 DDG DanDong">丹东</span><span t_id="大庆 DQA DAQING">大庆</span><span t_id="德宏芒市 LUM DeHong">德宏芒市</span><span t_id="东营 DOY DongYing">东营</span><span t_id="敦煌 DNH DunHuang">敦煌</span><span t_id="德令哈 HXD DeLingHa">德令哈</span><span t_id="大叻 DLI DaLat">大叻</span><em class="letters red">D</em></div><div id="index_E"><span t_id="额济纳旗 EJN EJiNaQi">额济纳旗</span><span t_id="恩施 ENH EnShi">恩施</span><span t_id="二连浩特 ERL ErLianHaoTe">二连浩特</span><em class="letters red">E</em></div><div id="index_F"><span t_id="佛山 FUO FoShan">佛山</span><span t_id="阜阳 FUG FuYang">阜阳</span><span t_id="富蕴 FYN FuYun">富蕴</span><span t_id="福州 FOC FuZhou">福州</span><span t_id="抚远 FYJ FuYuan">抚远</span><em class="letters red">F</em></div></li><li><div id="index_G"><span t_id="赣州 KOW GanZhou">赣州</span><span t_id="格尔木 GOQ GeErMu">格尔木</span><span t_id="广汉 GHN GuangHan">广汉</span><span t_id="广元 GYS GuangYuan">广元</span><span t_id="广州 CAN GuangZhou">广州</span><span t_id="桂林 KWL GuiLin">桂林</span><span t_id="贵阳 KWE Guiyang">贵阳</span><span t_id="固原 GYU GuYuan">固原</span><em class="letters red">G</em></div><div id="index_H"><span t_id="哈密 HMI HaMi">哈密</span><span t_id="哈尔滨 HRB HaRBin">哈尔滨</span><span t_id="海口 HAK HaiKou">海口</span><span t_id="海拉尔 HLD Hailar">海拉尔</span><span t_id="汉中 HZG HanZhong">汉中</span><span t_id="邯郸 HDG HanDan">邯郸</span><span t_id="杭州 HGH HangZhou">杭州</span><span t_id="合肥 HFE HeFei">合肥</span><span t_id="和田 HTN HeTan">和田</span><span t_id="黑河 HEK HeiHe">黑河</span><span t_id="衡阳 HNY HengYang">衡阳</span><span t_id="呼和浩特 HET HuHeHaoTe">呼和浩特</span><span t_id="淮安 HIA HuaiAn">淮安</span><span t_id="黄山 TXN HuangShan">黄山</span><span t_id="黄岩 HYN HuangYan">黄岩</span><span t_id="乌兰浩特 HLH HUlanhot">乌兰浩特</span><span t_id="顺化 HUI HUE">顺化</span><span t_id="惠州 HUZ HuiZhou">惠州</span><span t_id="河池 HCJ HeChi">河池</span><span t_id="花土沟 HTT HuaTuGou">花土沟</span><em class="letters red">H</em></div><div id="index_I"></div><div id="index_J"><span t_id="九华山 JUH JiuHuaShan">九华山</span><span t_id="吉安 KNC JiAn">吉安</span><span t_id="济宁 JNG JiNing">济宁</span><span t_id="吉林 JIL JiLin">吉林</span><span t_id="济南 TNA JiNan">济南</span><span t_id="鸡西 JXA JiXi">鸡西</span><span t_id="佳木斯 JMU JiaMuSi">佳木斯</span><span t_id="嘉峪关 JGN JiaYuGuan">嘉峪关</span><span t_id="锦州 JNZ JinZhou">锦州</span><span t_id="景德镇 JDZ JingDeZhen">景德镇</span><span t_id="井冈山 JGS JingGangShan">井冈山</span><span t_id="西双版纳 JHG JingHong">西双版纳</span><span t_id="九江 JIU JiuJiang">九江</span><span t_id="酒泉 CHW JiuQuan">酒泉</span><span t_id="九寨沟 JZH JIUZHAIGOU">九寨沟</span><span t_id="揭阳 SWA JieYang">揭阳</span><span t_id="金昌 JIC JingChang">金昌</span><span t_id="加格达奇 JGD JiaGeDaQi">加格达奇</span><em class="letters red">J</em></div></li><li><div id="index_K"><span t_id="喀什 KHG KaShi">喀什</span><span t_id="喀纳斯 KJI KaNaSi">喀纳斯</span><span t_id="康定 KGT KangDing">康定</span><span t_id="克拉玛依 KRY Karamay">克拉玛依</span><span t_id="库尔勒 KRL Korla">库尔勒</span><span t_id="昆明 KMG KuMing">昆明</span><span t_id="库车 KCA KuQa">库车</span><span t_id="凯里黄平 KJH KaiLiHuangPing">凯里黄平</span><span t_id="苏梅岛 USM KohSamui">苏梅岛</span><em class="letters red">K</em></div><div id="index_L"><span t_id="兰州 LHW LanZhou">兰州</span><span t_id="拉萨 LXA Lasa">拉萨</span><span t_id="荔波 LLB LiBo">荔波</span><span t_id="丽江 LJG LiJiang">丽江</span><span t_id="吕梁 LLV LvLiang">吕梁</span><span t_id="黎平 HZH LiPing">黎平</span><span t_id="丽水 RSU LiShui">丽水</span><span t_id="连云港 LYG LianYunGang">连云港</span><span t_id="连城 LCX LianCheng">连城</span><span t_id="临沧 LNJ LinChang">临沧</span><span t_id="临沂 LYI LinYi">临沂</span><span t_id="伊春林都 LDS Lindu">伊春林都</span><span t_id="林芝 LZY LinZhi">林芝</span><span t_id="柳州 LZH LiuZhou">柳州</span><span t_id="泸州 LZO LuZhou">泸州</span><span t_id="洛阳 LYA LuoYang">洛阳</span><span t_id="六盘水 LPF LiuPanShui">六盘水</span><span t_id="临汾 LFQ LinFen">临汾</span><em class="letters red">L</em></div><div id="index_M"><span t_id="绵阳 MIG ManYang">绵阳</span><span t_id="满洲里 NZH ManZhouLi">满洲里</span><span t_id="梅县 MXZ MeiXian">梅县</span><span t_id="漠河 OHE MOHE">漠河</span><span t_id="牡丹江 MDG MuDanJiang">牡丹江</span><em class="letters red">M</em></div><div id="index_N"><span t_id="那拉提 NLT NaLaTi">那拉提</span><span t_id="南昌 KHN NanChang">南昌</span><span t_id="南充 NAO NanChong">南充</span><span t_id="南京 NKG NanJing">南京</span><span t_id="南宁 NNG NanNing">南宁</span><span t_id="南通 NTG NanTong">南通</span><span t_id="南阳 NNY NanYang">南阳</span><span t_id="宁波 NGB NingBo">宁波</span><span t_id="良乌 NYU NYAUNG-U">良乌</span><span t_id="宁蒗 NLH NingLang">宁蒗</span><em class="letters red">N</em></div></li><li><div id="index_P"><span t_id="攀枝花 PZI PanZhiHua">攀枝花</span><span t_id="普洱 SYM PuEr">普洱</span><span t_id="濮阳 PYY PuYang">濮阳</span><span t_id="帕罗 PBH PARO">帕罗</span><em class="letters red">P</em></div><div id="index_Q"><span t_id="泉州晋江 JJN QuanZhouJinJiang">泉州晋江</span><span t_id="衢州 JUZ QuZhou">衢州</span><span t_id="齐齐哈尔 NDG QiQiHar">齐齐哈尔</span><span t_id="且末 IQM QieMo">且末</span><span t_id="秦皇岛 SHP QinHuangDao">秦皇岛</span><span t_id="青岛 TAO QingDao">青岛</span><span t_id="庆阳 IQN QingYang">庆阳</span><span t_id="琼海 BAR QiongHai">琼海</span><em class="letters red">Q</em></div><div id="index_R"><span t_id="日喀则 RKZ RiKaZe">日喀则</span><span t_id="日照 RIZ RiZhao">日照</span><em class="letters red">R</em></div><div id="index_S"><span t_id="三亚 SYX SanYa">三亚</span><span t_id="沙市 SHS ShaShi">沙市</span><span t_id="汕头 SWA ShanTou">汕头</span><span t_id="上海虹桥 SHA ShangHai">上海虹桥</span><span t_id="上海浦东 PVG ShangHaiPoDong">上海浦东</span><span t_id="沈阳 SHE ShenYang">沈阳</span><span t_id="深圳 SZX ShenZhen">深圳</span><span t_id="石家庄 SJW ShiJiaZhuang">石家庄</span><span t_id="思茅 SYM SiMao">思茅</span><span t_id="神农架 HPG ShenNongJia">神农架</span><span t_id="静冈 FSZ Shizuoka">静冈</span><span t_id="苏州 SZV SuZhou">苏州</span><span t_id="十堰 WDS ShiYan">十堰</span><span t_id="三明 SQJ SanMing">三明</span><span t_id="石河子 SHF ShiHeZi">石河子</span><em class="letters red">S</em></div><div id="index_T"><span t_id="台州 HYN TaiZhou">台州</span><span t_id="塔城 TCG TaCheng">塔城</span><span t_id="太原 TYN TaiYuan">太原</span><span t_id="腾冲 TCZ TengChong">腾冲</span><span t_id="天津 TSN TianJin">天津</span><span t_id="天水 THQ TianShui">天水</span><span t_id="通化 TNH TongHua">通化</span><span t_id="通辽 TGO TongLiao">通辽</span><span t_id="铜仁 TEN TongRen">铜仁</span><span t_id="唐山 TVS TangShan">唐山</span><span t_id="吐鲁番 TLQ TuLuFan">吐鲁番</span><em class="letters red">T</em></div><div id="index_W"><span t_id="乌鲁木齐 URC WuLuMuQi">乌鲁木齐</span><span t_id="万州 WXN WanZhou">万州</span><span t_id="潍坊 WEF WeiFang">潍坊</span><span t_id="威海 WEH WeiHai">威海</span><span t_id="文山 WNH WenSan">文山</span><span t_id="温州 WNZ WenZhou">温州</span><span t_id="乌海 WUA WuHai">乌海</span><span t_id="武汉 WUH WuHan">武汉</span><span t_id="无锡 WUX WuXi">无锡</span><span t_id="武夷山 WUS WuYiShan">武夷山</span><span t_id="梧州 WUZ WuZhou">梧州</span><span t_id="乌兰察布 UCB WuLanChaBu">乌兰察布</span><span t_id="五台山 WTS WuTaiShan">五台山</span><em class="letters red">W</em></div></li><li><div id="index_X"><span t_id="香格里拉 DIG XIANGGELILA">香格里拉</span><span t_id="夏河 gxh XiaHe">夏河</span><span t_id="西安 XIY XiAn">西安</span><span t_id="西昌 XIC XiChang">西昌</span><span t_id="西宁 XNN XiNing">西宁</span><span t_id="厦门 XMN XiaMen">厦门</span><span t_id="襄阳 XFN XiangYang">襄阳</span><span t_id="锡林浩特 XIL XiLinHot">锡林浩特</span><span t_id="兴义 ACX XingYi">兴义</span><span t_id="徐州 XUZ XuZhou">徐州</span><span t_id="忻州 WUT XinZhou">忻州</span><em class="letters red">X</em></div><div id="index_Y"><span t_id="扬州 YTY YangZhou">扬州</span><span t_id="延安 ENY YanAn">延安</span><span t_id="盐城 YNZ YanCheng">盐城</span><span t_id="延吉 YNJ YanJi">延吉</span><span t_id="烟台 YNT YanTai">烟台</span><span t_id="宜宾 YBP YiBin">宜宾</span><span t_id="宜昌 YIH YiChang">宜昌</span><span t_id="伊宁 YIN YiNing">伊宁</span><span t_id="义乌 YIW YiWu">义乌</span><span t_id="宜春 YIC YiChun">宜春</span><span t_id="鄂尔多斯 DSN YiJinHuoLuo">鄂尔多斯</span><span t_id="银川 INC YinChuan">银川</span><span t_id="英德 ENE YingDe">英德</span><span t_id="永州 LLF YongZhou">永州</span><span t_id="榆林 UYN YuLin">榆林</span><span t_id="玉树 YUS YuShu">玉树</span><span t_id="运城 YCU YunCheng">运城</span><span t_id="芽庄金兰 CXR YaZhuangJinLan">芽庄金兰</span><span t_id="营口 YKH YingKou">营口</span><em class="letters red">Y</em></div><div id="index_Z"><span t_id="湛江 ZHA ZhanJiang">湛江</span><span t_id="张家界 DYG ZhangJiaJie">张家界</span><span t_id="张家口 ZQZ ZhangJiaKou">张家口</span><span t_id="昭通 ZAT ZhaoTong">昭通</span><span t_id="郑州 CGO ZhengZhou">郑州</span><span t_id="芷江 HJJ ZhiJiang">芷江</span><span t_id="中卫 ZHY ZhongWei">中卫</span><span t_id="重庆舟白 JIQ ZhouBai">重庆舟白</span><span t_id="舟山 HSN ZhouShan">舟山</span><span t_id="珠海 ZUH ZhuHai">珠海</span><span t_id="张掖 YZY ZhangYe">张掖</span><span t_id="遵义 ZYI ZunYi">遵义</span><em class="letters red">Z</em></div></li>
</ul>

抓取网页中数据 -----51book中城市码的更多相关文章

  1. 转 PHP的CURL方法curl_setopt()函数案例介绍(抓取网页,POST数据)

    PHP的CURL方法curl_setopt()函数案例介绍(抓取网页,POST数据)   通过curl_setopt()函数可以方便快捷的抓取网页(采集很方便),curl_setopt 是php的一个 ...

  2. 写论文,没数据?R语言抓取网页大数据

    写论文,没数据?R语言抓取网页大数据 纵观国内外,大数据的市场发展迅猛,政府的扶持也达到了空前的力度,甚至将大数据纳入发展战略.如此形势为社会各界提供了很多机遇和挑战,而我们作为卫生(医学)统计领域的 ...

  3. PHP的CURL方法curl_setopt()函数案例介绍(抓取网页,POST数据)

    通过curl_setopt()函数可以方便快捷的抓取网页(采集很方便),curl_setopt 是php的一个扩展库 使用条件:需要在php.ini 中配置开启.(PHP 4 >= 4.0.2) ...

  4. java抓取东方财富股票数据(附源码)

    背景 前段时间给朋友写了一个自动抓取同花顺股票数据的程序,不少人觉得不错. 这几天后台有粉丝给我留言让我也抓一下东方财富的数据,说东方财富的数据特别难抓,我还真不一定能搞得定. 本来我是一个德艺双磬且 ...

  5. 一个我经常用到的采集网页数据抓取网页获取数据的PHP函数类

    class get_c_str { var $str; var $start_str; var $end_str; var $start_pos; var $end_pos; var $c_str_l ...

  6. delphi 7中使用idhttp抓取网页 解决假死现象

    在delphi 7中使用idhttp抓取网页,造成窗口无反应的假死状态.通过搜索获得两种方法. 1.写在线程中,但是调用比较麻烦 2.使用delphi 提供的idantifreeze(必须安装indy ...

  7. delphi 7中使用idhttp抓取网页 解决假死现象(使用TIdAntiFreezeControl控件)

    在delphi 7中使用idhttp抓取网页,造成窗口无反应的假死状态.通过搜索获得两种方法. 1.写在线程中,但是调用比较麻烦 2.使用delphi 提供的idantifreeze(必须安装indy ...

  8. python 解决抓取网页中的中文显示乱码问题

    关于爬虫乱码有很多各式各样的问题,这里不仅是中文乱码,编码转换.还包括一些如日文.韩文 .俄文.藏文之类的乱码处理,因为解决方式是一致的,故在此统一说明. 网络爬虫出现乱码的原因 源网页编码和爬取下来 ...

  9. Java 抓取网页中的内容【持续更新】

    背景:前几天复习Java的时候看到URL类,当时就想写个小程序试试,迫于考试没有动手,今天写了下,感觉还不错 内容1. 抓取网页中的URL 知识点:Java URL+ 正则表达式 import jav ...

随机推荐

  1. mysql添加为成绩表添加名次

    对于一种这样的表,为score添加名次

  2. Openjudge-NOI题库-蛇形填充数组

    题目描述 Description 用数字1,2,3,4,...,n*n这n2个数蛇形填充规模为n*n的方阵. 蛇形填充方法为: 对于每一条左下-右上的斜线,从左上到右下依次编号1,2,...,2n-1 ...

  3. usaco月赛,2017.1总结

    T1:跳舞的奶牛 大致题意:一个体积为k的舞台能够同时容纳k只奶牛一起跳舞,他们每头奶牛的跳舞时间不同,如果有一只奶牛跳完了第k+1头奶牛就会立刻上场跳舞,当所有奶牛跳完舞以后我们认为这次表演结束.现 ...

  4. brew install nvm

    brew install nvm mkdir ~/.nvm nano ~/.bash_profilectrl+x 退出 source ~/.bash_profile echo $NVM_DIR nvm ...

  5. python3 流程控制

    表达式if ... else >>> if 3 > 4: ... print('False') ... else: ... print('True') ... True 表达式 ...

  6. edittext设置为密文显示

    et_msg.setInputType(InputType.TYPE_CLASS_TEXT |InputType.TYPE_TEXT_VARIATION_PASSWORD);

  7. find中的-print0和xargs中-0的区别

    默认情况下, find 每输出一个文件名, 后面都会接着输出一个换行符 ('\n'), 因此我们看到的 find 的输出都是一行一行的: [bash-4.1.5] ; ls -l total 0 -r ...

  8. Qt 打开指定的文件

    最近项目用到使用本地的office打开指定的文件,记录一下代码: QString fileName = QFileDialog::getOpenFileName(this, tr("Open ...

  9. CodeForces 675E Trains and Statistic

    贪心,递推,线段树,$RMQ$. 假设我们记$ans[i]$是以$i$点为起点对答案的贡献,那么答案就是$\sum\limits_{i = 1}^n {ans[i]}$. $ans[i]$怎么计算呢? ...

  10. java中使用数组和链表简单实现SJBMap

    import java.util.LinkedList; public class SJBMap { private Object[] elementData; private int size; p ...