



  • 因为在使用urllib模块的时候,会有诸多不便之处,总结如下:

    • 手动处理url编码
    • 手动处理post请求参数
    • 处理cookie和代理操作繁琐
    • ......
  • 使用requests模块:
    • 自动处理url编码
    • 自动处理post请求参数
    • 简化cookie和代理操作
    • ......



  • pip install requests


  • - 指定url
  • - 发起请求
  • - 获得响应数据
  • - 持久化存储



import requests

url = 'https://www.sogou.com/web'

# 处理参数
wd = input("enter a word: ")
param = {
} # UA伪装
# User-Agent 请求头信息。请求载体的身份标识
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.12 Safari/537.36'
} # 发起请求
response = requests.get(url,params=param,headers=headers) # 获取响应数据
page_text = response.content # 持久化存储
fileName = wd + ".html"
with open(fileName, 'wb') as f:
f.write(page_text) print(f"{wd}下载成功")


# 使用urllib模块爬取图片
from urllib import request url = "https://gss2.bdstatic.com/9fo3dSag_xI4khGkpoWK1HF6hhy/baike/w%3D268%3Bg%3D0/sign=081aba3563224f4a5799741531ccf76f/c83d70cf3bc79f3d423d2823b4a1cd11738b29c1.jpg"
request.urlretrieve(url=url, filename='ycy.jpg')



import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.12 Safari/537.36'
url = 'https://fanyi.baidu.com/sug' # ajax请求
wd = input("enter a english word: ") # 参数的处理
data = {
"kw": wd
} # 发送post请求
response = requests.post(url=url,data=data,headers=headers) # 如果确定返回的是json格式的数据,就可以直接.json拿到json对象
json_data = response.json()



import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.12 Safari/537.36'
url = "http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword"
wd = input('请输入查询地点:') data = {
'cname': '',
'pid': '',
'keyword': wd,
'pageIndex': '1',
'pageSize': '100',
json_data = requests.post(url=url, data=data, headers=headers).json()



需求分析: 指定页面的公司,该公司的详情页数据

# 域名:
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.12 Safari/537.36'
# 首页url
url = ''
id_list = []
start_page= int(input('起始页:'))
end_page= int(input('结束页:'))
for i in range(start_page,end_page+1):
data = {
'on': 'true',
'page': str(i),
'pageSize': '15',
'productName':'' ,
'conditionType': '1',
'applyname': '',
'applysn': '',
json_data = requests.post(url=url,data=data,headers=headers).json()
# print(json_data)
for item in json_data['list']:
id_list.append(item["ID"]) # 详情页url
url2 = ''
for id_item in id_list:
data_id = {
'id': id_item
json_data2 = requests.post(url=url2,data=data_id,headers=headers).json()



