python2.x urllib2和urllib的使用

1.最简单用法

　　urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,...)

 import urllib2

 import urllib

 response = urllib2.urlopen("http://www.baidu.com")

 print 'getcode():',response.getcode()

 print 'geturl():',response.geturl()

 print 'url:',response.url

 print 'headers:\n',response.headers

 print 'msg:',response.msg

 #-------------------------------------out--------------------------------------

 getcode(): 200

 geturl(): http://www.baidu.com

 url: http://www.baidu.com

 headers:

 Date: Thu, 29 Dec 2016 06:28:36 GMT

 Content-Type: text/html; charset=utf-8

 Transfer-Encoding: chunked

 Connection: Close

 Vary: Accept-Encoding

 Set-Cookie: BAIDUID=9A1E663B4C3AB33D11266F0D865A1F59:FG=1; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com

 Set-Cookie: BIDUPSID=9A1E663B4C3AB33D11266F0D865A1F59; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com

 Set-Cookie: PSTM=1482992916; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com

 Set-Cookie: BDSVRTM=0; path=/

 Set-Cookie: BD_HOME=0; path=/

 Set-Cookie: H_PS_PSSID=21858_1464_21112_17001_21553_20930; path=/; domain=.baidu.com

 P3P: CP=" OTI DSP COR IVA OUR IND COM "

 Cache-Control: private

 Cxy_all: baidu+0ba0b09e0fa305471b5e3b42c352570f

 Expires: Thu, 29 Dec 2016 06:27:54 GMT

 X-Powered-By: HPHP

 Server: BWS/1.1

 X-UA-Compatible: IE=Edge,chrome=1

 BDPAGETYPE: 1

 BDQID: 0x889c1bcd00004be7

 BDUSERID: 0

 msg: OK

获取html内容

 print response.read()     #以str字符串形式返回整个页面

 print response.readline() #每执行一次返回一行

 print response.readlines() #以列表形式返回

2. 构造Request 设置headers

 def set_headers():

     #构造Request,设置headers

     #__init__(self, url, data=None, headers={},origin_req_host=None, unverifiable=False)

     import urllib2

     headers = {'User-Agent':'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}

     request = urllib2.Request("http://localhost:5000/urllib2testget",headers=headers)

     response = urllib2.urlopen(request)

     print request.headers

     #追加一个header

     request.add_header("addheader","nice")

     response = urllib2.urlopen(request)

     print request.headers

 set_headers()

 #--------------------------------输出:

 {'User-agent': 'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}

 {"a": "", "": ""}

 ------------------------------------------------

 {'Addheader': 'nice', 'User-agent': 'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}

 {"a": "", "": ""}

3.发送get请求,发送post请求

 def get_post():

     #get方式

     import urllib2

     import urllib

     headers = {'User-Agent':'liu bi'}

     values = {"username":"diaosir_get","password":"diao123_get"}

     data = urllib.urlencode(values)

     print '---------------------get:'

     url = "http://localhost:5000/urllib2testget"

     get_url=url+"?"+data

     request = urllib2.Request(get_url,headers=headers)

     response = urllib2.urlopen(request)

     print json.loads(response.read())

     print '---------------------post:'

     url = "http://localhost:5000/urllib2testpost"

     request = urllib2.Request(url,data,headers=headers)

     response = urllib2.urlopen(request)

     print json.loads(response.read())

 get_post()

 #---------------------------------------------------------输出:

 ---------------------get:

 {u'username': u'diaosir_get', u'password': u'diao123_get'}

 ---------------------post:

 {u'username': u'diaosir_get', u'password': u'diao123_get'}

post&get

4.代理模式设置

def set_proxies():

    #1.proxy_handler

    #2.创建operner

    #3.安装opener[非必须]

    #4.拿operner去请求url

    enable_proxy = True

    proxy_handler = urllib2.ProxyHandler({"http":'http://120.24.73.165:3128'})

    null_proxy_handler = urllib2.ProxyHandler({})

    if enable_proxy:

        opener = urllib2.build_opener(proxy_handler)#挂载opener

    else:

        opener = urllib2.build_opener(null_proxy_handler)

    request = urllib2.Request('http://www.baidu.com')

    print '---------------------不使用代理'

    response = urllib2.urlopen(request)

    print response.getcode(),request.host

    print '---------------------使用代理'

    response = opener.open(request)

    print response.getcode(),request.host

#----------------------------------------------------------输出

---------------------不使用代理

200 www.baidu.com

---------------------使用代理

200 120.24.73.165:3128

5.debug模式, 代码中urllib2.build_opener中的httpsHandler需要去掉，

 def debug_set():

     #代理，调试

     import  urllib2,urllib

     proxy_handler = urllib2.ProxyHandler({"http":'http://192.168.1.108:89'})

     #debuglog的使用

     httpHandler = urllib2.HTTPHandler(debuglevel=1)

     opener = urllib2.build_opener(httpHandler, httpsHandler,)

     urllib2.install_opener(opener)

     request = urllib2.Request('http://127.0.0.1:5000/urllib2testget?a=2&b=3',headers={'User-Agent':'liubi00'})

     response = opener.open(request)

     print response.getcode(),response.read()

 #-------------------------------------------输出:

 send: 'GET /urllib2testget?a=2&b=3 HTTP/1.1\r\nAccept-Encoding: identity\r\nHost: 127.0.0.1:5000\r\nConnection: close\r\nUser-Agent: liubi00\r\n\r\n'

 reply: 'HTTP/1.0 200 OK\r\n'

 header: Content-Type: text/html; charset=utf-8

 header: Content-Length: 20

 header: Server: Werkzeug/0.11.11 Python/2.7.12

 header: Date: Fri, 30 Dec 2016 15:12:40 GMT

 200 {"a": "", "b": ""}

6.获取cookie存到cookie.txt

import cookielib

import  urllib2

def get_cookie():

    filename = 'cookie.txt'

    #声明一个MozillaCookieJar对象实例来保存cookie，之后写入文件

    cookie = cookielib.MozillaCookieJar(filename)

    #利用urllib2库的HTTPCookieProcessor对象来创建cookie处理器

    handler = urllib2.HTTPCookieProcessor(cookie)

    #通过handler来构建opener

    opener = urllib2.build_opener(handler,)

    request = urllib2.Request('http://www.baidu.com')

    request.add_header('User-Agent','fuckyou')

    response = opener.open(request)

    #保存cookie到文件

    cookie.save(ignore_discard=True, ignore_expires=True)

    print response.getcode()

get_cookie()

#----------------------------------------------输出:

200

7.通过cookie请求，更多查看http://www.cnblogs.com/sysu-blackbear/p/3629770.html

 import cookielib

 import urllib2

 def use_cookie():

     #cookie--从cookies.txt读取cookies,携带cookies请求

     cookie_file = 'cookie.txt'

     #创建MozillaCookieJar实例对象

     cookie = cookielib.MozillaCookieJar(cookie_file)

     #从文件中读取cookie内容到变量

     cookie.load( ignore_discard=True, ignore_expires=True)

     #创建请求的request

     req = urllib2.Request("http://www.baidu.com")

     #利用urllib2的build_opener方法创建一个opener

     opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))

     response = opener.open(req)

     print response.read()

8.异常处理

 def deal_errors():

     #异常处理

     import urllib2

     #HTTPError

     req = urllib2.Request('http://blog.csdn.net/cqcre')

     try:

         urllib2.urlopen(req)

     except urllib2.HTTPError, e:

         print e.code

         print e.reason

     #URLError

     requset = urllib2.Request('http://www.xxxxx.com')

     try:

         urllib2.urlopen(requset)

     except urllib2.URLError, e:

         print e.reason

     #HTTPERROR&URLERROR

     req = urllib2.Request('http://blog.csdn.net/cqcre')

     try:

         urllib2.urlopen(req)

     except urllib2.URLError, e:

         if hasattr(e,"code"):

             print e.code

         if hasattr(e,"reason"):

             print e.reason

     else:

         print "OK"

python2.x urllib2和urllib的使用的更多相关文章

Python2/3中的urllib库
urllib库对照速查表 Python2.X Python3.X urllib urllib.request, urllib.error, urllib.parse urllib2 urllib.re ...
Python2和Python3中urllib库中urlencode的使用注意事项
前言在Python中,我们通常使用urllib中的urlencode方法将字典编码,用于提交数据给url等操作,但是在Python2和Python3中urllib模块中所提供的urlencode的包 ...
python2核心类库：urllib、urllib2的区别和使用
urllib/urllib2都是接受URL请求的相关模块区别:1.urllib2可以接受一个Request类的实例来设置URL请求的headers,urllib仅可以接受URL.这意味着,你不可以伪装 ...
urllib与urllib2的学习总结(python2.7.X): python urllib与urllib2
https://www.cnblogs.com/wly923/archive/2013/05/07/3057122.html
python urllib2与urllib
1.urllib2可以接受一个Request对象,并以此可以来设置一个URL的headers,但是urllib只接收一个URL. 2.urllib模块可以提供进行urlencode的方法,该方法用于G ...
Python把json格式的string对象转变成dict对象操作、Python3不能使用urllib2、urllib.parse.urlencode(params).encode(encoding='UTF8')
son格式的string对象转变成dict对象操作 content=eval(content)#json字典转化 Python3不能使用urllib2 直接使用urllib.request替换urll ...
Python2 基于urllib2 的HTTP请求类
一个利用urllib2模块编写的下载器,虽然有了requests模块,但是毕竟标准库 import urllib2,random class strong_down(): def __init__(s ...
python2中urllib2模块带cookies使用方法
#!/usr/bin/python # coding=utf-8 #############方式1######################### import urllib2 cookie = & ...
python2.7 urllib2 爬虫
# _*_ coding:utf-8 _*_ import urllib2import cookielibimport randomimport refrom bs4 import Beautiful ...

随机推荐

LINQ TO SQL ——Group by
原文:LINQ TO SQL --Group by 分组在SQL中应用的十分普遍,在查询,统计时都有可能会用到它.LINQ TO SQL中同样具备group的功能,这篇我来讲下LINQ TO SQL中 ...
Android项目----AsyncTask异步操作
public abstract class AsyncTask extends Object java.lang.Object ↳ android.os.AsyncTask<Params, ...
HP quality center 9.0 邮件设置
[转载]HP quality center 9.0 邮件设置 (2010-09-20 10:28:03) 转载▼ 标签: 转载原文地址:HP quality center 9.0 邮件设置作者: ...
Linux内核策略介绍
Linux内核策略介绍学习笔记主要内容硬件策略 CPU 进程调度.系统调用.中断内存内存管理外存文件IO 网络协议栈其他时间管理进程调度内核的运行时间系统启动.中断发 ...
ibatis提示Unable to load embedded resource from assembly "Entity.Ce_SQL.xml,Entity".
原本以为是xml文件配置错误,尝试无果,最终原因未将xml文件的生成操作选择为嵌入的资源.很无语!
加密算法 MD5/SHA1
近来想学习函数式编程. 但是一直不知道怎么展开这个学习过程,目前的研究进度也不深入,想讲解一些原理也无从下手. 先简单的上一些算法,逐步分析语法和思想.虽然程度不深,但至少能记录这个过程. 本例子用F ...
【分享】史上最全的Python电子书教程资源下载
网上搜集的,点击即可下载,希望提供给有需要的人^_^ O'Reilly.Python.And.XML.pdf 2.02 MB OReilly - Programming Python 2nd. ...
Linux 宿主机安装 MiniGUI
去MiniGUI官方网站看的时候,很兴奋,安装竟然这么容易. 上帝总是在给你一个苹果之后,赏你一巴掌.我的确是高兴太早了. 首先看一下官网文档的说明步骤: (截取于官方文档) Installing r ...
c# in deep 之LINQ简介（1）
前两天公司进了一批书,在借阅jon skeet的c# in deep收获颇大,本书特点是介绍了不同版本的c#所增加的新特性.今天先写一下书中对linq的描述. 很多初学者在使用VS2010或2013写 ...
实现栈最小元素的min函数
#include<iostream> #include<stack> using namespace std; class min_stack { public: void p ...

python2.x urllib2和urllib的使用

python2.x urllib2和urllib的使用的更多相关文章

随机推荐

热门专题