Python Learning Day5

Response响应

 import requests

 response = requests.get('https://baidu.com')

 # response响应

 print(response.status_code)  # 获取响应状态码

 print(response.url)  # 获取url地址

 print(response.encoding)  # 字符编码

 response.encoding = 'utf-8'

 print(response.text)  # 获取文本

 print(response.content)  # 获取二进制流

 print(response.headers)  # 获取页面请求头信息

 print(response.history)  # 上一次跳转的地址

 # 1、返回cookie字典 2、返回cookies对象

 print(response.cookies)  # 获取cookies信息,

 print(response.cookies.get_dict())  # 获取cookies信息转换成字典

 print(response.cookies.items())  # 获取cookies信息转换成字典

 print(response.encoding)

 print(response.elapsed)  # 访问时间

import requests

# 往音频地址发送get请求

url = 'https://vd3.bdstatic.com/mda-ic4pfhh3ex32svqi/hd/mda-ic4pfhh3ex32svqi.mp4?auth_key=1557973824-0-0-bfb2e69bb5198ff65e18065d91b2b8c8&bcevod_channel=searchbox_feed&pd=wisenatural&abtest=all.mp4'

response = requests.get(url, stream=True)  # stream=True 把content设置为一个迭代器对象

print(response.content)

with open('love_for_GD.mp4', 'wb') as f:

    for content in response.iter_content():

        f.write(content)

证书验证(大部分网站都是https)

import urllib3

import requests

 # 如果是ssl请求,首先检查证书是否合法,不合法则报错,程序终端

response = requests.get('https://www.xiaohuar.com')

print(response.status_code)

# 改进1:去掉报错,但是会报警告

response = requests.get('https://www.xiaohuar.com', verify=False)

# 不验证证书,报警告,返回200

print(response.status_code)

# 改进2:去掉报错,并且去掉警报信息

urllib3.disable_warnings()  # 关闭警告

response = requests.get('https://www.xiaohuar.com', verify=False)

print(response.status_code)

# 改进3:加上证书

# 很多网站都是https,但是不用证书也可以访问,大多数情况都是可以携带也可以不携带证书

# 知乎\百度等都是可带可不带

# 有硬性要求的,则必须带，比如对于定向的用户,拿到证书后才有权限访问某个特定网站

urllib3.disable_warnings()  # 关闭警告

# 伪代码

response = requests.get(

    'https://www.xiaohuar.com',

    # verify=False,

    # /path/server.crt证书的存放目录， /path/key

    cert=('/path/server.crt', '/path/key'))

print(response.status_code)

超时设置

#两种超时:float or tuple

timeout=0.1  # 代表接收数据的超时时间

timeout=(0.1,0.2)  # 0.1代表链接超时  0.2代表接收数据的超时时间

import requests

response = requests.get('https://www.baidu.com',

                         timeout=0.0001)

print(response.elapsed)

print(response.status_code)

代理设置:先发送请求给代理,然后由代理帮忙发送(封ip是常见的事情)

import requests

proxies={

    # 带用户名密码的代理,@符号前是用户名与密码

    'http':'http://tank:123@localhost:9527',

    'http':'http://localhost:9527',

    'https':'https://localhost:9527',

}

response=requests.get('https://www.12306.cn',

                     proxies=proxies)

print(response.status_code)

认证设置

import requests

# 通过访问github的api来测试

url = 'https://api.github.com/user'

HEADERS = {

    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',

}

# 测试1，失败返回401

response = requests.get(url, headers=HEADERS)

print(response.status_code)  #

print(response.text)

打印结果:
{
"message": "Requires authentication",
"documentation_url": "https://developer.github.com/v3/users/#get-the-authenticated-user"
}

#测试2，通过requests.auth内的HTTPBasicAuth进行认证，认证成功返回用户信息

 from requests.auth import HTTPBasicAuth

 response = requests.get(url, headers=HEADERS, auth=HTTPBasicAuth('tankjam', 'kermit46709394'))

 print(response.text)

#

# 测试3，通过requests.get请求内的auth参数默认就是HTTPBasicAuth，认证成功返回用户信息

 response = requests.get(url, headers=HEADERS, auth=('tankjam', 'kermit46709394'))

 print(response.text)

上传文件

#上传文本文件

 files1 = {'file': open('user.txt', 'rb')}

# # files参数是POST请求固定参数

 response = requests.post('http://httpbin.org/post', files=files1)

 print(response.status_code)  #

 print(response.text)  #

# 上传图片文件

 files2 = {'jpg': open('一拳.jpg', 'rb')}

 response = requests.post('http://httpbin.org/post', files=files2)

 print(response.status_code)  #

 print(response.text)  #

#

# 上传视频文件

 files3 = {'movie': open('love_for_GD.mp4', 'rb')}

 response = requests.post('http://httpbin.org/post', files=files3)

 print(response.status_code)  #

 print(response.text)  #

selenium模块讲解
一什么是selenium？
　　最初是一个自动化测试工具。可以使用它帮我们驱动浏览器，自动去执行某些自定义好的操作。例如在页面中执行JS代码、跳过登录验证。可以使用selenium帮我们实现爬虫。
二为什么要使用selenium？
　　1、优点:
　　　　使用requests模块登录需要分析大量的复杂通信流程，使用selenium可以轻松跳过登录验证。
　　2、缺点:
　　　　浏览器会加载css、js、图片、视频...数据，爬虫效率相比requests模块要低。

# selenium之第一次

from selenium import webdriver  # 用来驱动浏览器的

# 调用得到一个动作链对象，破解滑动验证码的时候用的，可以拖动图片

from selenium.webdriver import ActionChains

# 按照什么方式查找属性，By.ID,  By.CSS_SELECTOR， By.Class

from selenium.webdriver.common.by import By

from selenium.webdriver.common.keys import Keys  # 键盘按键操作

# 和下面WebDriverWait一起用的，EC是expected_conditions的别名

from selenium.webdriver.support import expected_conditions as EC

# 等待页面加载某些元素

from selenium.webdriver.support.wait import WebDriverWait

import time

# 通过谷歌浏览器驱动打开谷歌浏览器

# webdriver.Chrome(r'chromedriver.exe的绝对路径')

# chrome = webdriver.Chrome(r'D:\BaiduNetdiskDownload\chromedriver_win32\chromedriver.exe')  # 括号内输入chromedriver.exe的绝对路径

# chromedriver.exe存放于python解释器的Scripts文件夹中

# chrome是一个驱动对象

chrome = webdriver.Chrome()

实例1

# 若try出现异常

try:

    # 往tank博客主页发送get请求

    # chrome.get('https://www.cnblogs.com/kermitjam/')

    # 参数1: 驱动对象  参数2: 等待时间

    wait = WebDriverWait(chrome, 10)

    # 1、访问百度

    chrome.get('https://www.baidu.com/')

    # 2、查找input输入框

     input_tag = wait.until(

        # 调用EC的presence_of_element_located()

        EC.presence_of_element_located(

            # 此处可以写一个元组

            # 参数1: 查找属性的方式

            # 参数2: 属性的名字

            (By.ID, "kw")

        )

    )

    input_tag = wait.until(EC.presence_of_element_located((By.ID, "kw")))

         # 3、搜索一拳超人

    input_tag.send_keys('一拳超人')

     # 4、按键盘回车键

    input_tag.send_keys(Keys.ENTER)

    time.sleep(3)

 # 无论发生什么都会关闭浏览器

 finally:

     # 关闭浏览器

     chrome.close()

示例二

try:

    wait = WebDriverWait(chrome, 10)

    chrome.get('https://www.jd.com/')

    input_tag = wait.until(EC.presence_of_element_located((By.ID, "key")))

    input_tag.send_keys('唐诗三百首')

    # 根据class属性名称查找标签

    search_button = wait.until(

        EC.presence_of_element_located((By.CLASS_NAME, 'button')))

    # 5、点击搜索按钮

    search_button.click()

    time.sleep(3)

finally:

    chrome.close()

隐式等待

driver = webdriver.Chrome()

 try:

     # 显式等待: 等待某个元素加载

     # 参数1: 驱动对象  参数2: 等待时间

     # wait = WebDriverWait(chrome, 10)

     driver.get('https://china.nba.com/')

     # 隐式等待: 等待页面所有元素加载

     driver.implicitly_wait(10)

     news_tag = driver.find_element_by_class_name('nav-news')

     # 获取标签对象

     print(news_tag)

     # 获取标签的名字

     print(news_tag.tag_name)

     time.sleep(10)

 finally:

     driver.close()

基本选择器

from selenium import webdriver  # 用来驱动浏览器的

import time

'''

===============所有方法===================

    element是查找一个标签

    elements是查找所有标签

    1、find_element_by_link_text  通过链接文本去找

    2、find_element_by_id 通过id去找

    3、find_element_by_class_name

    4、find_element_by_partial_link_text

    5、find_element_by_name

    6、find_element_by_css_selector

    7、find_element_by_tag_name

'''

# 获取驱动对象、

driver = webdriver.Chrome()

try:

    # 往百度发送请求

    driver.get('https://www.baidu.com/')

    driver.implicitly_wait(10)

    # 1、find_element_by_link_text  通过链接文本去找

    # 根据登录

    # send_tag = driver.find_element_by_link_text('登录')

    # send_tag.click()

    # 2、find_element_by_partial_link_text 通过局部文本查找a标签

    login_button = driver.find_element_by_partial_link_text('登')

    login_button.click()

    time.sleep(1)

    # 3、find_element_by_class_name 根据class属性名查找

    login_tag = driver.find_element_by_class_name('tang-pass-footerBarULogin')

    login_tag.click()

    time.sleep(1)

    # 4、find_element_by_name 根据name属性查找

    username = driver.find_element_by_name('userName')

    username.send_keys('')

    time.sleep(1)

    # 5、find_element_by_id 通过id属性名查找

    password = driver.find_element_by_id('TANGRAM__PSP_10__password')

    password.send_keys('*******')

    time.sleep(1)

    # 6、find_element_by_css_selector  根据属性选择器查找

    # 根据id查找登录按钮

    login_submit = driver.find_element_by_css_selector('#TANGRAM__PSP_10__submit')

    # driver.find_element_by_css_selector('.pass-button-submit')

    login_submit.click()

    # 7、find_element_by_tag_name  根据标签名称查找标签

    div = driver.find_element_by_tag_name('div')

    print(div.tag_name)

    time.sleep(10)

finally:

    driver.close()

练习自动登录抽屉新热榜

from selenium import webdriver

import time

driver = webdriver.Chrome(r'D:\BaiduNetdiskDownload\chromedriver_win32\chromedriver.exe')

# 把窗口转成全屏

driver.maximize_window()

try:

    driver.get('https://dig.chouti.com/')

    driver.implicitly_wait(10)

    time.sleep(5)

    # 1、点击登录

    login_btn = driver.find_element_by_id('login_btn')

    login_btn.click()

    time.sleep(2)

    # 2、输入用户名

    phone = driver.find_element_by_class_name('login-phone')

    phone.send_keys('')

    # 3、输入密码

    pwd = driver.find_element_by_class_name('pwd-password-input')

    pwd.send_keys('kermit46709394')

    # 4、确认登录

    login_submit = driver.find_element_by_class_name('btn-large')

    login_submit.click()

    time.sleep(20)

# 捕获异常并打印

except Exception as e:

    print(e)

finally:

    driver.close()

Python Learning Day5的更多相关文章

python笔记 - day5
python笔记 - day5 参考: http://www.cnblogs.com/wupeiqi/articles/5484747.html http://www.cnblogs.com/alex ...
Python学习day5作业
目录 Python学习day5作业 ATM和购物商城 1. 程序说明 2. 基本流程图 3. 程序测试帐号 4. 程序结构: 5. 程序测试 title: Python学习day5作业 tags: p ...
python learning Exception & Debug.py
''' 在程序运行的过程中,如果发生了错误,可以事先约定返回一个错误代码,这样,就可以知道是否有错,以及出错的原因.在操作系统提供的调用中,返回错误码非常常见.比如打开文件的函数open(),成功时返 ...
Python Learning Paths
Python Learning Paths Python Expert Python in Action Syntax Python objects Scalar types Operators St ...
Python Learning
这是自己之前整理的学习Python的资料,分享出来,希望能给别人一点帮助. Learning Plan Python是什么?- 对Python有基本的认识版本区别下载安装 IDE 文件构造 Py ...
python基础 Day5
python Day5 字典其他数据类型的缺点列表可以存储大量的数据,但是关联性不强. 列表的查询速度比较慢其容器的数据类型为dict 其数据类型的分类可变(不可哈希)的数据类型:list d ...
How to begin Python learning?
如何开始Python语言学习? 1. 先了解它,Wiki百科:http://zh.wikipedia.org/zh-cn/Python 2. Python, Ruby等语言来自开源社区,社区的学法是V ...
python 学习day5（模块）
一.模块介绍模块,用一砣代码实现了某个功能的代码集合. 类似于函数式编程和面向过程编程,函数式编程则完成一个功能,其他代码用来调用即可,提供了代码的重用性和代码间的耦合.而对于一个复杂的功能来,可能 ...
Experience of Python Learning Week 1
1.The founder of python is Guido van Rossum ,he created it on Christmas in 1989, smriti of ABC langu ...

随机推荐

主机ping虚拟机失败。虚拟机ping主机，可以ping通。
原文:https://blog.csdn.net/ww1473345713/article/details/51490525 今天打开虚拟机,然后用Xshell远程连接,发现连接不上.按照以下顺序检查 ...
uboot配置和编译过程详解
根据朱有鹏老师讲解整理一.uboot主Makefile分析 1.uboot version确定(Makefile的24-29行) include/version_autogenerated.h文件是 ...
cmf公共函数解析
cmf公共函数解析-common.php 路径:thinkcmf\simplewind\cmf\common.php方法: 方法作用返回值 cmf_get_current_admin_id ...
C++用sqlite3_open连接打开指定数据库的小问题
一开始我也纳闷,我以为是我数据库没弄好,但是当我仔细检查,才发现原来我少了分号写少了分号,可能会导致 database 和 table 找不到... 所以用的时候需要注意... 代 ...
Java中默认方法
默认方法是JDK8新特性,指的是接口也可以提供具体方法了,而不像以前,只能提供抽象方法,Mortal 这个接口,增加了一个默认方法 r,这个方法有实现体,并且被声明为了default,如以下代码: 这 ...
项目中常用的JS操作技巧
1.<a>标签-超链接中confirm方法使用介绍 <a href="a.html" onclick="if(confirm('确定删除?')==fal ...
Python中的numpy函数的使用ones，zeros，eye
在看别人写的代码时,看到的不知道的函数,就在这里记下来. 原文是这样用的: weights = ones((numfeatures,1)) 在python中help(): import numpy a ...
设置gvim的字体大小
1.临时设置: 进入命令行模式输入: set guifont=Courier\ New:h10 2.永久设置: 打开安装目录找到defaults.vim在最后一行输入: set guifont=Cou ...
09.swoole学习笔记--进程事件
<?php //进程数组 $workers=[]; //创建进程的数据量 $worker_num=; //创建启动进程 ;$i<$worker_num;$i++){ //创建单独新进程 $ ...
Enum应用
public enum ZDJGJD { YSZ("01",0.3,"取得预售许可"),JGFD("02",0.6,"单位结构封顶 ...

Python Learning Day5

Python Learning Day5的更多相关文章

随机推荐

热门专题