python3爬虫-知乎登陆
py文件:
from fake_useragent import UserAgent
import requests
from http import cookiejar
import base64
from PIL import Image
import time, json
import hashlib, hmac
import execjs
from urllib import parse ua = UserAgent() class MyException(Exception):
def __init__(self, status, msg):
self.status = status
self.msg = msg class ZhiHu: def __init__(self, username=None, password=None):
self.username = username
self.password = password
self.session = requests.Session()
self.session.headers = {
"user-agent": ua.random,
"referer": "https://www.zhihu.com/",
'host': 'www.zhihu.com',
} self.session.cookies = cookiejar.LWPCookieJar(filename="./cookies.txt") self.login_param = {
"client_id": "c3cef7c66a1843f8b3a9e6a1e3160e20",
"grant_type": "password",
"source": "com.zhihu.web",
"username": "",
"password": "",
"ref_source": "homepage",
"utm_source": "baidu", } def load_cookies(self):
'''加载cookies,保存在session中'''
try:
self.session.cookies.load(ignore_discard=True, ignore_expires=True)
return True
except FileNotFoundError:
return False def login(self, captcha_lang: str = "en", is_load_cookies: bool = True):
'''
这里进行登陆操作
:param lang: 使用怎样的登陆验证,en表示验证码,zh表示点击倒立汉字
:param is_load_cookies: 是否使用保存的cookies进行登陆
:return:
''' if self.load_cookies() and is_load_cookies:
# 进行登陆操作
print("读取cookies文件")
if self.check__login():
print("登陆成功")
return
print("cookies已经失效") # 走到这里说明是没有登陆的,在这里进行登陆操作 # 检测用户名和密码已经输入了
self.check_user_input() # 获取到xsrf的值,并且设置请求头
headers = self.session.headers.copy()
xsrf = self.get_xsrf()
headers.update({
"content-type": "application/x-www-form-urlencoded",
"x-xsrftoken": xsrf,
"x-zse-83": "3_1.1",
}) self.login_param.update({
"username": self.username,
"password": self.password,
"lang": captcha_lang
}) # 进行formdata的创建
timestamp = int(time.time() * 1000)
self.login_param.update({
"timestamp": timestamp,
"captcha": self.get_captcha() or "",
"signature": self.get_signature(timestamp)
}) formdata = self.__encrypt(self.login_param) url = "https://www.zhihu.com/api/v3/oauth/sign_in" # 进行登陆操作
self.session.post(url=url, headers=headers, data=formdata)
if self.check__login():
self.session.cookies.save()
print("cookies以写入文件")
print("登录成功")
return True
print("登录失败") def check__login(self):
'''判断是否已经是登陆状态'''
url = "https://www.zhihu.com/"
response = self.session.get(url=url, allow_redirects=False)
if response.status_code == 302:
return False
elif response.status_code == 200:
return True def check_user_input(self):
if not self.username:
self.username = input("请输入手机号>>:").strip()
if self.username.isdigit() and not self.username.startswith("+86"):
self.username = "+86" + self.username if not self.password:
self.password = input("请输入密码>>:").strip() def get_captcha(self):
'''获取到验证码,这里至少请求一次,请求的方法的顺序get,put,post'''
lang = self.login_param.get("lang")
if lang == "en":
captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=en"
else:
captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=cn"
response = self.session.get(captcha_api)
is_use_verify = response.json().get("show_captcha", False)
if is_use_verify:
# 使用验证,请求方式顺序为put,post
# 先获取验证图片的base64
response = self.session.put(captcha_api)
base64_img = response.json()['img_base64'].replace(r'\n', '')
with open("./captcha.png", "wb") as f:
f.write(base64.b64decode(base64_img))
img = Image.open("./captcha.png")
if lang == "en":
img.show()
code = input("请输入图片中的验证码>>:").strip()
else:
import matplotlib.pyplot as plt
plt.imshow(img)
print('点击所有倒立的汉字,在命令行中按回车提交')
points = plt.ginput(7)
code = json.dumps({'img_size': [200, 44],
'input_points': [[i[0] / 2, i[1] / 2] for i in points]}) self.session.post(captcha_api, data={"input_text": code}, headers={"user-agent": ua.random, })
return code def get_no_captch(self):
'''调用这个方法,可以实现不需要验证码就可以登录'''
lang = self.login_param.get("lang")
if lang == "en":
captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=en"
else:
captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=cn"
while True:
print("正在请求验证码....")
time.sleep(0.5)
response = self.session.get(captcha_api)
is_use_verify = str(response.json().get("show_captcha"))
if is_use_verify == 'false':
return ""
print("继续...") def get_signature(self, timestamp):
'''获取signature的值'''
ha = hmac.new(key=b"d1b964811afb40118a12068ff74a12f4", digestmod=hashlib.sha1)
client_id = self.login_param.get("client_id")
grant_type = self.login_param.get("grant_type")
source = self.login_param.get("source")
ha.update(bytes(grant_type + client_id + source + str(timestamp), encoding="utf-8"))
return ha.hexdigest() def get_xsrf(self):
url = "https://www.zhihu.com/signin"
response = self.session.get(url=url, headers=self.session.headers, allow_redirects=False)
_xsrf = response.cookies.get("_xsrf")
return _xsrf def __encrypt(self, data: dict):
data = parse.urlencode(data)
with open("./01.js", "r") as f:
js_code = f.read()
ctx = execjs.compile(js_code)
res = ctx.call("Q", data)
return res if __name__ == '__main__':
zhihu = ZhiHu()
zhihu.login()
js文件:
window = {
"encodeURIComponent": encodeURIComponent
}
navigator = {
"userAgent": "5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
} function s(e) {
return (s = "function" == typeof Symbol && "symbol" == typeof Symbol.t ? function (e) {
return typeof e
}
: function (e) {
return e && "function" == typeof Symbol && e.constructor === Symbol && e !== Symbol.prototype ? "symbol" : typeof e
}
)(e)
} var t = "1.1"
, __g = {}; function i() {
} function h(e) {
this.s = (2048 & e) >> 11,
this.i = (1536 & e) >> 9,
this.h = 511 & e,
this.A = 511 & e
} function A(e) {
this.i = (3072 & e) >> 10,
this.A = 1023 & e
} function n(e) {
this.n = (3072 & e) >> 10,
this.e = (768 & e) >> 8,
this.a = (192 & e) >> 6,
this.s = 63 & e
} function e(e) {
this.i = e >> 10 & 3,
this.h = 1023 & e
} function a() {
} function c(e) {
this.n = (3072 & e) >> 10,
this.e = (768 & e) >> 8,
this.a = (192 & e) >> 6,
this.s = 63 & e
} function o(e) {
this.A = (4095 & e) >> 2,
this.s = 3 & e
} function r(e) {
this.i = e >> 10 & 3,
this.h = e >> 2 & 255,
this.s = 3 & e
} function k(e) {
this.s = (4095 & e) >> 10,
this.i = (1023 & e) >> 8,
this.h = 1023 & e,
this.A = 63 & e
} function B(e) {
this.s = (4095 & e) >> 10,
this.n = (1023 & e) >> 8,
this.e = (255 & e) >> 6
} function f(e) {
this.i = (3072 & e) >> 10,
this.A = 1023 & e
} function u(e) {
this.A = 4095 & e
} function C(e) {
this.i = (3072 & e) >> 10
} function b(e) {
this.A = 4095 & e
} function g(e) {
this.s = (3840 & e) >> 8,
this.i = (192 & e) >> 6,
this.h = 63 & e
} function G() {
this.c = [0, 0, 0, 0],
this.o = 0,
this.r = [],
this.k = [],
this.B = [],
this.f = [],
this.u = [],
this.C = !1,
this.b = [],
this.g = [],
this.G = !1,
this.Q = null,
this.R = null,
this.w = [],
this.x = 0,
this.D = {
0: i,
1: h,
2: A,
3: n,
4: e,
5: a,
6: c,
7: o,
8: r,
9: k,
10: B,
11: f,
12: u,
13: C,
14: b,
15: g
}
} i.prototype.M = function (e) {
e.G = !1
}
,
h.prototype.M = function (e) {
switch (this.s) {
case 0:
e.c[this.i] = this.h;
break;
case 1:
e.c[this.i] = e.k[this.A]
}
}
,
A.prototype.M = function (e) {
e.k[this.A] = e.c[this.i]
}
,
n.prototype.M = function (e) {
switch (this.s) {
case 0:
e.c[this.n] = e.c[this.e] + e.c[this.a];
break;
case 1:
e.c[this.n] = e.c[this.e] - e.c[this.a];
break;
case 2:
e.c[this.n] = e.c[this.e] * e.c[this.a];
break;
case 3:
e.c[this.n] = e.c[this.e] / e.c[this.a];
break;
case 4:
e.c[this.n] = e.c[this.e] % e.c[this.a];
break;
case 5:
e.c[this.n] = e.c[this.e] == e.c[this.a];
break;
case 6:
e.c[this.n] = e.c[this.e] >= e.c[this.a];
break;
case 7:
e.c[this.n] = e.c[this.e] || e.c[this.a];
break;
case 8:
e.c[this.n] = e.c[this.e] && e.c[this.a];
break;
case 9:
e.c[this.n] = e.c[this.e] !== e.c[this.a];
break;
case 10:
e.c[this.n] = s(e.c[this.e]);
break;
case 11:
e.c[this.n] = e.c[this.e] in e.c[this.a];
break;
case 12:
e.c[this.n] = e.c[this.e] > e.c[this.a];
break;
case 13:
e.c[this.n] = -e.c[this.e];
break;
case 14:
e.c[this.n] = e.c[this.e] < e.c[this.a];
break;
case 15:
e.c[this.n] = e.c[this.e] & e.c[this.a];
break;
case 16:
e.c[this.n] = e.c[this.e] ^ e.c[this.a];
break;
case 17:
e.c[this.n] = e.c[this.e] << e.c[this.a];
break;
case 18:
e.c[this.n] = e.c[this.e] >>> e.c[this.a];
break;
case 19:
e.c[this.n] = e.c[this.e] | e.c[this.a]
}
}
,
e.prototype.M = function (e) {
e.r.push(e.o),
e.B.push(e.k),
e.o = e.c[this.i],
e.k = [];
for (var t = 0; t < this.h; t++)
e.k.unshift(e.f.pop());
e.u.push(e.f),
e.f = []
}
,
a.prototype.M = function (e) {
e.o = e.r.pop(),
e.k = e.B.pop(),
e.f = e.u.pop()
}
,
c.prototype.M = function (e) {
switch (this.s) {
case 0:
e.C = e.c[this.n] >= e.c[this.e];
break;
case 1:
e.C = e.c[this.n] <= e.c[this.e];
break;
case 2:
e.C = e.c[this.n] > e.c[this.e];
break;
case 3:
e.C = e.c[this.n] < e.c[this.e];
break;
case 4:
e.C = e.c[this.n] == e.c[this.e];
break;
case 5:
e.C = e.c[this.n] != e.c[this.e];
break;
case 6:
e.C = e.c[this.n];
break;
case 7:
e.C = !e.c[this.n]
}
}
,
o.prototype.M = function (e) {
switch (this.s) {
case 0:
e.o = this.A;
break;
case 1:
e.C && (e.o = this.A);
break;
case 2:
e.C || (e.o = this.A);
break;
case 3:
e.o = this.A,
e.Q = null
}
e.C = !1
}
,
r.prototype.M = function (e) {
switch (this.s) {
case 0:
for (var t = [], n = 0; n < this.h; n++)
t.unshift(e.f.pop());
e.c[3] = e.c[this.i](t[0], t[1]);
break;
case 1:
for (var r = e.f.pop(), o = [], i = 0; i < this.h; i++)
o.unshift(e.f.pop());
e.c[3] = e.c[this.i][r](o[0], o[1]);
break;
case 2:
for (var a = [], c = 0; c < this.h; c++)
a.unshift(e.f.pop());
e.c[3] = new e.c[this.i](a[0], a[1])
}
}
,
k.prototype.M = function (e) {
switch (this.s) {
case 0:
e.f.push(e.c[this.i]);
break;
case 1:
e.f.push(this.h);
break;
case 2:
e.f.push(e.k[this.A]);
break;
case 3:
e.f.push(e.g[this.A])
}
}
,
B.prototype.M = function (t) {
switch (this.s) {
case 0:
var s = t.f.pop();
t.c[this.n] = t.c[this.e][s];
break;
case 1:
var i = t.f.pop()
, h = t.f.pop();
t.c[this.e][i] = h;
break;
case 2:
var A = t.f.pop();
t.c[this.n] = eval(A)
}
}
,
f.prototype.M = function (e) {
e.c[this.i] = e.g[this.A]
}
,
u.prototype.M = function (e) {
e.Q = this.A
}
,
C.prototype.M = function (e) {
throw e.c[this.i]
}
,
b.prototype.M = function (e) {
var t = this
, n = [0];
e.k.forEach(function (e) {
n.push(e)
});
var r = function (r) {
var o = new G;
return o.k = n,
o.k[0] = r,
o.J(e.b, t.A, e.g, e.w),
o.c[3]
};
r.toString = function () {
return "() { [native code] }"
}
,
e.c[3] = r
}
,
g.prototype.M = function (e) {
switch (this.s) {
case 0:
for (var t = {}, n = 0; n < this.h; n++) {
var r = e.f.pop();
t[e.f.pop()] = r
}
e.c[this.i] = t;
break;
case 1:
for (var o = [], i = 0; i < this.h; i++)
o.unshift(e.f.pop());
e.c[this.i] = o
}
}
,
G.prototype.v = function (e) {
for (var t = new Buffer(e, "base64").toString("binary"), n = [], r = 0; r < t.length - 1; r += 2)
n.push(t.charCodeAt(r) << 8 | t.charCodeAt(r + 1));
this.b = n
}
,
G.prototype.y = function (e) {
for (var t = new Buffer(e, "base64").toString("binary"), n = 66, r = [], o = 0; o < t.length; o++) {
var i = 24 ^ t.charCodeAt(o) ^ n;
r.push(String.fromCharCode(i)),
n = i
}
return r.join("")
}
,
G.prototype.F = function (e) {
var t = this;
this.g = e.map(function (e) {
return "string" == typeof e ? t.y(e) : e
})
}
,
G.prototype.J = function (e, t, n) {
for (t = t || 0,
n = n || [],
this.o = t,
"string" == typeof e ? (this.F(n),
this.v(e)) : (this.b = e,
this.g = n),
this.G = !0,
this.x = Date.now(); this.G;) {
var r = this.b[this.o++];
if ("number" != typeof r)
break;
var o = Date.now();
if (500 < o - this.x)
return;
this.x = o;
try {
this.M(r)
} catch (e) {
if (this.R = e,
!this.Q)
throw "execption at " + this.o + ": " + e;
this.o = this.Q
}
}
}
,
G.prototype.M = function (e) {
var t = (61440 & e) >> 12;
new this.D[t](e).M(this)
}
,
1 && (new G).J("4AeTAJwAqACcAaQAAAAYAJAAnAKoAJwDgAWTACwAnAKoACACGAESOTRHkQAkAbAEIAMYAJwFoAASAzREJAQYBBIBNEVkBnCiGAC0BjRAJAAYBBICNEVkBnDGGAC0BzRAJACwCJAAnAmoAJwKoACcC4ABnAyMBRAAMwZgBnESsA0aADRAkQAkABgCnA6gABoCnA+hQDRHGAKcEKAAMQdgBnFasBEaADRAkQAkABgCnBKgABoCnBOhQDRHZAZxkrAUGgA0QJEAJAAYApwVoABgBnG6sBYaADRAkQAkABgCnBegAGAGceKwGBoANECRACQAnAmoAJwZoABgBnIOsBoaADRAkQAkABgCnBugABoCnByhQDRHZAZyRrAdGgA0QJEAJAAQACAFsB4gBhgAnAWgABIBNEEkBxgHEgA0RmQGdJoQCBoFFAE5gCgFFAQ5hDSCJAgYB5AAGACcH4AFGAEaCDRSEP8xDzMQIAkQCBoFFAE5gCgFFAQ5hDSCkQAkCBgBGgg0UhD/MQ+QACAIGAkaBxQBOYGSABoAnB+EBRoIN1AUCDmRNJMkCRAIGgUUATmAKAUUBDmENIKRACQIGAEaCDRSEP8xD5AAIAgYCRoHFAI5gZIAGgCcH4QFGgg3UBQQOZE0kyQJGAMaCRQ/OY+SABoGnCCEBTTAJAMYAxoJFAY5khI/Nk+RABoGnCCEBTTAJAMYAxoJFAw5khI/Nk+RABoGnCCEBTTAJAMYAxoJFBI5khI/Nk+RABoGnCCEBTTAJAMYBxIDNEEkB3JsHgNQAA==", 0, ["BRgg", "BSITFQkTERw=", "LQYfEhMA", "PxMVFBMZKB8DEjQaBQcZExMC", "", "NhETEQsE", "Whg=", "Wg==", "MhUcHRARDhg=", "NBcPBxYeDQMF", "Lx4ODys+GhMC", "LgM7OwAKDyk6Cg4=", "Mx8SGQUvMQ==", "SA==", "ORoVGCQgERcCAxo=", "BTcAERcCAxo=", "BRg3ABEXAgMaFAo=", "SQ==", "OA8LGBsP", "GC8LGBsP", "Tg==", "PxAcBQ==", "Tw==", "KRsJDgE=", "TA==", "LQofHg4DBwsP", "TQ==", "PhMaNCwZAxoUDQUeGQ==", "PhMaNCwZAxoUDQUeGTU0GQIeBRsYEQ8=", "Qg==", "BWpUGxkfGRsZFxkbGR8ZGxkHGRsZHxkbGRcZG1MbGR8ZGxkXGRFpGxkfGRsZFxkbGR8ZGxkHGRsZHxkbGRcZGw==", "ORMRCyk0Exk8LQ==", "ORMRCyst"]);
var Q = function (e) {
return __g._encrypt(e)
};
参考的是这位博主的博客:https://home.cnblogs.com/u/zkqiang
python3爬虫-知乎登陆的更多相关文章
- python3爬虫-通过selenium登陆拉钩,爬取职位信息
from selenium import webdriver from selenium.common.exceptions import NoSuchElementException from se ...
- Python3 使用selenium库登陆知乎并保存cookie为本地文件
Python3 使用selenium库登陆知乎并保存cookie为本地文件 学习使用selenium库模拟登陆知乎,并将cookie保存为本地文件,然后供以后(requests模块)使用,用selen ...
- python3爬虫--反爬虫应对机制
python3爬虫--反爬虫应对机制 内容来源于: Python3网络爬虫开发实战: 网络爬虫教程(python2): 前言: 反爬虫更多是一种攻防战,针对网站的反爬虫处理来采取对应的应对机制,一般需 ...
- python3爬虫(4)各种网站视频下载方法
python3爬虫(4)各种网站视频下载方法原创H-KING 最后发布于2019-01-09 11:06:23 阅读数 13608 收藏展开理论上来讲只要是网上(浏览器)能看到图片,音频,视频,都能够 ...
- [Javascript] 爬虫 模拟新浪微博登陆
概述: 由于业务需要,要编写爬虫代码去爬去新浪微博用户的信息. 虽然在网上能找到不少信息,但由于新浪微博改版,其登陆机制进行了修改,故很多老的文章就不适合用了. 经过一番摸索,成功模拟新浪微博的登陆 ...
- Python3爬虫系列:理论+实验+爬取妹子图实战
Github: https://github.com/wangy8961/python3-concurrency-pics-02 ,欢迎star 爬虫系列: (1) 理论 Python3爬虫系列01 ...
- python爬虫知乎问答
python爬虫知乎问答 import cookielibimport base64import reimport hashlibimport jsonimport rsaimport binasci ...
- python3爬虫中文乱码之请求头‘Accept-Encoding’:br 的问题
当用python3做爬虫的时候,一些网站为了防爬虫会设置一些检查机制,这时我们就需要添加请求头,伪装成浏览器正常访问. header的内容在浏览器的开发者工具中便可看到,将这些信息添加到我们的爬虫代码 ...
- Python3 爬虫之 Scrapy 核心功能实现(二)
博客地址:http://www.moonxy.com 基于 Python 3.6.2 的 Scrapy 爬虫框架使用,Scrapy 的搭建过程请参照本人的另一篇博客:Python3 爬虫之 Scrap ...
随机推荐
- angularjs -- 路由监听
前几天,项目在做一个功能时需要在页面切换之前关闭正在执行的函数.尝试了几种方式都不行,最后想到既然angularjs是通过理由切换页面,那就在路由上面做文章吧.AngularJS在路由发生改变时,可以 ...
- Django From组件 fields widgets
一.Form组件之字段 Field required=True, 是否允许为空 widget=None, HTML插件 label=None, 用于生成Label标签或显示内容 initial=Non ...
- vuejs code splitting with webpack 3种模式
我们知道一个web app如果太大会严重影响用户的体验,如何能够最快速度地让用户看到完整页面是优化web应用需要做的重要工作. 这其中使用code split实现lazy加载,只让用户初次访问时只加载 ...
- EBS请求定义成菜单
1. 将请求定义为“功能”路径:系统管理员 –应用产品-函数输入自定义的功能名称,用户功能名以及说明 “特性”TAB页: 类型选择“表单”,其余两个字段默认:在表单TAB页: 表单字段:选择“运行 ...
- Hive的介绍及安装
简介 Hive 是基于 Hadoop 的一个数据仓库工具,可以将结构化的数据文件 映射为一张数据库表,并提供类 SQL 查询功能. 本质是将 SQL 转换为 MapReduce 程序. Hive组件 ...
- 安装Linux Centos系统硬盘分区方法
一.硬盘回顾 无论是安装Windows还是Linux操作系统,硬盘分区都是整个系统安装过程中最为棘手的环节.硬盘一般分为IDE硬盘.SCSI硬盘和SATA硬盘三种,在Linux系统中,IDE接口的硬盘 ...
- I/O复用及epoll基础知识
IO multiplexing IO multiplexing这个词可能有点陌生,但是如果我说select,epoll,大概就都能明白了.有些地方也称这种IO方式为event driven IO.我们 ...
- .net通用类型转换方法
由于数据类型多,要按照逐个类型写一个类型转换的方法的话一是代码量多,显得累赘. using System; using System.ComponentModel; using System.Glob ...
- C#的Lambda 表达式都使用 Lambda 运算符 =>,该运算符读为“goes to”。语法如下:
形参列表=>函数体 函数体多于一条语句的可用大括号括起. 类型 可以将此表达式分配给委托类型,如下所示: delegate int del(int i); del myDelegate = ...
- 选中复选框,才能在文本框中输东西。button按钮已启用,