(为编写完善能拿下来数据)

企查查代码数据如下:

  1. #encoding:utf-8
  2. import requests
  3. from lxml import etree
  4. import random
  5. import re
  6. #目标采集地址
  7. base_url1='http://m.qichacha.com'
  8. base_url='https://m.qichacha.com/search?key='
  9.  
  10. user_agent=[
  11. "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
  12. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
  13. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
  14. "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER) ",
  15. "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
  16. "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
  17. "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E) ",
  18. "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
  19. "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0) ",
  20. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
  21. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
  22. ]
  23. cookie=[
  24. 'UM_distinctid=16518280d1e3e5-00788a93df88f7-5b193413-1fa400-16518280d1f949; zg_did=%7B%22did%22%3A%20%2216518280dd3b45-0da4e4ab13f793-5b193413-1fa400-16518280dd51f6%22%7D; acw_tc=7b81f49815356947470295339e1fc37a590000ea6190b3cf75ab42853b; PHPSESSID=4l787fmr2v90mh2khj8n6n64l5; CNZZDATA1254842228=226405416-1535690886-null%7C1535690886; Hm_lvt_3456bee468c83cc63fb5147f119f1075=1535595956,1535618789,1535618810,1535694746; zg_de1d1a35bfa24ce29bbf2c7eb17e6c4f=%7B%22sid%22%3A%201535694746927%2C%22updated%22%3A%201535695379242%2C%22info%22%3A%201535595953976%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22m.baidu.com%22%2C%22cuid%22%3A%20%227d775544e16a1cc0d0ab63b42b4b8aef%22%7D; Hm_lpvt_3456bee468c83cc63fb5147f119f1075=1535695379',
  25. 'UM_distinctid=16518280d1e3e5-00788a93df88f7-5b193413-1fa400-16518280d1f949; zg_did=%7B%22did%22%3A%20%2216518280dd3b45-0da4e4ab13f793-5b193413-1fa400-16518280dd51f6%22%7D; acw_tc=7b81f49815356947470295339e1fc37a590000ea6190b3cf75ab42853b; PHPSESSID=4l787fmr2v90mh2khj8n6n64l5; CNZZDATA1254842228=226405416-1535690886-null%7C1535690886; Hm_lvt_3456bee468c83cc63fb5147f119f1075=1535595956,1535618789,1535618810,1535694746; zg_de1d1a35bfa24ce29bbf2c7eb17e6c4f=%7B%22sid%22%3A%201535694746927%2C%22updated%22%3A%201535695791508%2C%22info%22%3A%201535595953976%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22m.baidu.com%22%2C%22cuid%22%3A%20%227d775544e16a1cc0d0ab63b42b4b8aef%22%7D; Hm_lpvt_3456bee468c83cc63fb5147f119f1075=1535695792',
  26. 'UM_distinctid=16518280d1e3e5-00788a93df88f7-5b193413-1fa400-16518280d1f949; zg_did=%7B%22did%22%3A%20%2216518280dd3b45-0da4e4ab13f793-5b193413-1fa400-16518280dd51f6%22%7D; acw_tc=7b81f49815356947470295339e1fc37a590000ea6190b3cf75ab42853b; PHPSESSID=4l787fmr2v90mh2khj8n6n64l5; CNZZDATA1254842228=226405416-1535690886-null%7C1535690886; Hm_lvt_3456bee468c83cc63fb5147f119f1075=1535595956,1535618789,1535618810,1535694746; zg_de1d1a35bfa24ce29bbf2c7eb17e6c4f=%7B%22sid%22%3A%201535694746927%2C%22updated%22%3A%201535695924595%2C%22info%22%3A%201535595953976%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22m.baidu.com%22%2C%22cuid%22%3A%20%227d775544e16a1cc0d0ab63b42b4b8aef%22%7D; Hm_lpvt_3456bee468c83cc63fb5147f119f1075=1535695925',
  27. 'UM_distinctid=16518280d1e3e5-00788a93df88f7-5b193413-1fa400-16518280d1f949; zg_did=%7B%22did%22%3A%20%2216518280dd3b45-0da4e4ab13f793-5b193413-1fa400-16518280dd51f6%22%7D; acw_tc=7b81f49815356947470295339e1fc37a590000ea6190b3cf75ab42853b; PHPSESSID=4l787fmr2v90mh2khj8n6n64l5; CNZZDATA1254842228=226405416-1535690886-null%7C1535690886; Hm_lvt_3456bee468c83cc63fb5147f119f1075=1535595956,1535618789,1535618810,1535694746; zg_de1d1a35bfa24ce29bbf2c7eb17e6c4f=%7B%22sid%22%3A%201535694746927%2C%22updated%22%3A%201535696003819%2C%22info%22%3A%201535595953976%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22m.baidu.com%22%2C%22cuid%22%3A%20%227d775544e16a1cc0d0ab63b42b4b8aef%22%7D; Hm_lpvt_3456bee468c83cc63fb5147f119f1075=1535696005'
  28. ]
  29. # 请求头设置
  30. headers={
  31. 'User-agent': random.choice(user_agent),
  32. 'cookie': random.choice(cookie)
  33. }
  34.  
  35. name_list=['成都创信广告有限公司']
  36.  
  37. for name in name_list:
  38. start_url=base_url+str(name)
  39. print(start_url)
  40. response = requests.get(start_url, headers=headers)
  41. _response=response.text
  42. # print(_response)
  43. # content = etree.HTML(_response)
  44. # print(content)
  45. #获取筛选信息链接
  46. search_url=re.findall('</div> <a href="(.*?)" class="a-decoration"> <div class="list-item"> <div class="list-item-top">',_response)
  47. url=base_url1+search_url[0]
  48. # print(url)
  49. # print('*'*100)
  50. response1 = requests.get(url,headers=headers)
  51. _response1=response1.text
  52. #公司名称
  53. company_name=re.findall('<div class="company-name">(.*?)<',_response1)[0]
  54. print('公司名称:'+company_name)
  55. #法人
  56. legal_person=re.findall('<a class="oper" href=".*?">(.*?)</a>',_response1)[0]
  57. print('法人:'+legal_person)
  58. #电话
  59. telephone=re.findall('<a href="tel:.*?" class="phone a-decoration">(.*?)</a>',_response1)[0]
  60. print('电话:'+telephone)
  61. # #地址
  62. # address=re.findall('</div> <div class="address">(.*?)</div> </div>',_response1)[0]
  63. # print(address)
  64. # # print('地址:'+address)
  65.  
  66. # #注册号
  67. # registration_number=re.findall('</div><div class="basic-item-right">(.*?)</div>',_response1)
  68. # print(registration_number)
  1. 执行结果如下图:

6.requests编写企查查爬虫的更多相关文章

  1. Python爬虫爬企查查数据

    因为制作B2b网站需要,需要入库企业信息数据.所以目光锁定企查查数据,废话不多说,开干! #-*- coding-8 -*- import requests import lxml import sy ...

  2. 如何用python无账号无限制获取企查查信息

    前言 文的文字及图片来源于网络,仅供学习.交流使用,不具有任何商业用途,版权归原作者所有,如有问题请及时联系我们以作处理. PS:如有需要Python学习资料的小伙伴可以加点击下方链接自行获取http ...

  3. 12.通过微信小程序端访问企查查(采集工商信息)

    需要注意的问题: 一.1.微信端访问企查查小程序需要登录.2.访问抓包获取的url是有时效性的过一段时间就不能用了. http://xcx.qichacha.com/wxa/v1/base/getEn ...

  4. 11.采集手机端app企查查上司公司数据(未成功)

    ---恢复内容开始--- 采集企查查手机端app数据: 1.首先手机端安装app并usb连接电脑端,fiddler监控手机请求数据对数据进行分析抓取. 手机端界面与fiddler界面参照: 2.对获取 ...

  5. 企查查app新增企业数据抓取

    企查查每日新增企业数据抓取尚未完成的工作: 需要自行抓包获取设备id,appid,sign等等 sign和时间戳保持一致即可 把所有的数据库.redis配置 无法自动登录,账号需要独立 redis数据 ...

  6. XPath2Doc,一个半自动采集网页生成Word Docx文件的工具,带企查查和天眼查模板

    原始出处:https://www.cnblogs.com/Charltsing/p/XPath2Doc.html 很多人需要从网站采集一些数据填写Word模板,手工操作费时费力还容易出错,所以我给朋友 ...

  7. Nebula Graph 在企查查的应用

    本文首发于 Nebula Graph Community 公众号 背景 企查查是企查查科技有限公司旗下的一款企业信用查询工具,旨在为用户提供快速查询企业工商信息.法院判决信息.关联企业信息.法律诉讼. ...

  8. Python 利用Python编写简单网络爬虫实例3

    利用Python编写简单网络爬虫实例3 by:授客 QQ:1033553122 实验环境 python版本:3.3.5(2.7下报错 实验目的 获取目标网站“http://bbs.51testing. ...

  9. Python 利用Python编写简单网络爬虫实例2

    利用Python编写简单网络爬虫实例2 by:授客 QQ:1033553122 实验环境 python版本:3.3.5(2.7下报错 实验目的 获取目标网站“http://www.51testing. ...

随机推荐

  1. php MySQL()函数

    1.mysql_query() 函数执行一条 MySQL 查询:mysql_query(query,connection) 2.mysql_fetch_array() 函数 3.mysql_fetch ...

  2. POJ 2585:Window Pains(拓扑排序)

    Window Pains Time Limit: 1000MS   Memory Limit: 65536K Total Submissions: 2524   Accepted: 1284 Desc ...

  3. UVA 10305:Ordering Tasks(拓扑排序)

    #include <stdio.h> #include <string.h> #include <iostream> #include <algorithm& ...

  4. 《DSP using MATLAB》Problem 4.16

    代码: %% ------------------------------------------------------------------------ %% Output Info about ...

  5. Tomcat配置JNDI数据源的三种方式-转-http://blog.51cto.com/xficc/1564691

    第一种,单个应用独享数据源 就一步,找到Tomcat的server.xml找到工程的Context节点,添加一个私有数据源 Xml代码   <Context docBase="WebA ...

  6. MySQL账号安全设置

    ======================================================================== 推荐账号安全设置 在数据库服务器上严格控制操作系统的账 ...

  7. dup and dup2的剖析

    转:http://www.cnblogs.com/sdphome/archive/2011/04/30/2033381.html dup和dup2都可用来复制一个现存的文件描写叙述符,使两个文件描写叙 ...

  8. x86函数调用约定

    以下摘自<IDA Pro>,貌似有一些细节之处没有交代清楚呢,需要进一步思考.实践. 了解栈帧的基本概念后,接下来详细介绍它们的结构.下面的例子涉及x86体系结构和与常见的x86编译器(如 ...

  9. 用vmware安装gho文件心得

    在卡饭学到了不少知识,下面是我的一个心得分享,希望大家能用的上. 用vmware安装gho文件心得 方法1:diskgenius+ghostexp用vm新建一个空白硬盘虚拟机, 记住虚拟机文件的存储位 ...

  10. RAC3——RAC原理开始

    1.RAC并发 RAC的本质是一个数据库,只不过现在这个数据库运行在了多台计算机上,在原先的单实例中,一个进程是否可以修改一条数据,取决于是否有其他进程(同一台计算机上)并发修改.在RAC环境下,这种 ...