py文件:

from fake_useragent import UserAgent
import requests
from http import cookiejar
import base64
from PIL import Image
import time, json
import hashlib, hmac
import execjs
from urllib import parse ua = UserAgent() class MyException(Exception):
def __init__(self, status, msg):
self.status = status
self.msg = msg class ZhiHu: def __init__(self, username=None, password=None):
self.username = username
self.password = password
self.session = requests.Session()
self.session.headers = {
"user-agent": ua.random,
"referer": "https://www.zhihu.com/",
'host': 'www.zhihu.com',
} self.session.cookies = cookiejar.LWPCookieJar(filename="./cookies.txt") self.login_param = {
"client_id": "c3cef7c66a1843f8b3a9e6a1e3160e20",
"grant_type": "password",
"source": "com.zhihu.web",
"username": "",
"password": "",
"ref_source": "homepage",
"utm_source": "baidu", } def load_cookies(self):
'''加载cookies,保存在session中'''
try:
self.session.cookies.load(ignore_discard=True, ignore_expires=True)
return True
except FileNotFoundError:
return False def login(self, captcha_lang: str = "en", is_load_cookies: bool = True):
'''
这里进行登陆操作
:param lang: 使用怎样的登陆验证,en表示验证码,zh表示点击倒立汉字
:param is_load_cookies: 是否使用保存的cookies进行登陆
:return:
''' if self.load_cookies() and is_load_cookies:
# 进行登陆操作
print("读取cookies文件")
if self.check__login():
print("登陆成功")
return
print("cookies已经失效") # 走到这里说明是没有登陆的,在这里进行登陆操作 # 检测用户名和密码已经输入了
self.check_user_input() # 获取到xsrf的值,并且设置请求头
headers = self.session.headers.copy()
xsrf = self.get_xsrf()
headers.update({
"content-type": "application/x-www-form-urlencoded",
"x-xsrftoken": xsrf,
"x-zse-83": "3_1.1",
}) self.login_param.update({
"username": self.username,
"password": self.password,
"lang": captcha_lang
}) # 进行formdata的创建
timestamp = int(time.time() * 1000)
self.login_param.update({
"timestamp": timestamp,
"captcha": self.get_captcha() or "",
"signature": self.get_signature(timestamp)
}) formdata = self.__encrypt(self.login_param) url = "https://www.zhihu.com/api/v3/oauth/sign_in" # 进行登陆操作
self.session.post(url=url, headers=headers, data=formdata)
if self.check__login():
self.session.cookies.save()
print("cookies以写入文件")
print("登录成功")
return True
print("登录失败") def check__login(self):
'''判断是否已经是登陆状态'''
url = "https://www.zhihu.com/"
response = self.session.get(url=url, allow_redirects=False)
if response.status_code == 302:
return False
elif response.status_code == 200:
return True def check_user_input(self):
if not self.username:
self.username = input("请输入手机号>>:").strip()
if self.username.isdigit() and not self.username.startswith("+86"):
self.username = "+86" + self.username if not self.password:
self.password = input("请输入密码>>:").strip() def get_captcha(self):
'''获取到验证码,这里至少请求一次,请求的方法的顺序get,put,post'''
lang = self.login_param.get("lang")
if lang == "en":
captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=en"
else:
captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=cn"
response = self.session.get(captcha_api)
is_use_verify = response.json().get("show_captcha", False)
if is_use_verify:
# 使用验证,请求方式顺序为put,post
# 先获取验证图片的base64
response = self.session.put(captcha_api)
base64_img = response.json()['img_base64'].replace(r'\n', '')
with open("./captcha.png", "wb") as f:
f.write(base64.b64decode(base64_img))
img = Image.open("./captcha.png")
if lang == "en":
img.show()
code = input("请输入图片中的验证码>>:").strip()
else:
import matplotlib.pyplot as plt
plt.imshow(img)
print('点击所有倒立的汉字,在命令行中按回车提交')
points = plt.ginput(7)
code = json.dumps({'img_size': [200, 44],
'input_points': [[i[0] / 2, i[1] / 2] for i in points]}) self.session.post(captcha_api, data={"input_text": code}, headers={"user-agent": ua.random, })
return code def get_no_captch(self):
'''调用这个方法,可以实现不需要验证码就可以登录'''
lang = self.login_param.get("lang")
if lang == "en":
captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=en"
else:
captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=cn"
while True:
print("正在请求验证码....")
time.sleep(0.5)
response = self.session.get(captcha_api)
is_use_verify = str(response.json().get("show_captcha"))
if is_use_verify == 'false':
return ""
print("继续...") def get_signature(self, timestamp):
'''获取signature的值'''
ha = hmac.new(key=b"d1b964811afb40118a12068ff74a12f4", digestmod=hashlib.sha1)
client_id = self.login_param.get("client_id")
grant_type = self.login_param.get("grant_type")
source = self.login_param.get("source")
ha.update(bytes(grant_type + client_id + source + str(timestamp), encoding="utf-8"))
return ha.hexdigest() def get_xsrf(self):
url = "https://www.zhihu.com/signin"
response = self.session.get(url=url, headers=self.session.headers, allow_redirects=False)
_xsrf = response.cookies.get("_xsrf")
return _xsrf def __encrypt(self, data: dict):
data = parse.urlencode(data)
with open("./01.js", "r") as f:
js_code = f.read()
ctx = execjs.compile(js_code)
res = ctx.call("Q", data)
return res if __name__ == '__main__':
zhihu = ZhiHu()
zhihu.login()

js文件:

window = {
"encodeURIComponent": encodeURIComponent
}
navigator = {
"userAgent": "5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
} function s(e) {
return (s = "function" == typeof Symbol && "symbol" == typeof Symbol.t ? function (e) {
return typeof e
}
: function (e) {
return e && "function" == typeof Symbol && e.constructor === Symbol && e !== Symbol.prototype ? "symbol" : typeof e
}
)(e)
} var t = "1.1"
, __g = {}; function i() {
} function h(e) {
this.s = (2048 & e) >> 11,
this.i = (1536 & e) >> 9,
this.h = 511 & e,
this.A = 511 & e
} function A(e) {
this.i = (3072 & e) >> 10,
this.A = 1023 & e
} function n(e) {
this.n = (3072 & e) >> 10,
this.e = (768 & e) >> 8,
this.a = (192 & e) >> 6,
this.s = 63 & e
} function e(e) {
this.i = e >> 10 & 3,
this.h = 1023 & e
} function a() {
} function c(e) {
this.n = (3072 & e) >> 10,
this.e = (768 & e) >> 8,
this.a = (192 & e) >> 6,
this.s = 63 & e
} function o(e) {
this.A = (4095 & e) >> 2,
this.s = 3 & e
} function r(e) {
this.i = e >> 10 & 3,
this.h = e >> 2 & 255,
this.s = 3 & e
} function k(e) {
this.s = (4095 & e) >> 10,
this.i = (1023 & e) >> 8,
this.h = 1023 & e,
this.A = 63 & e
} function B(e) {
this.s = (4095 & e) >> 10,
this.n = (1023 & e) >> 8,
this.e = (255 & e) >> 6
} function f(e) {
this.i = (3072 & e) >> 10,
this.A = 1023 & e
} function u(e) {
this.A = 4095 & e
} function C(e) {
this.i = (3072 & e) >> 10
} function b(e) {
this.A = 4095 & e
} function g(e) {
this.s = (3840 & e) >> 8,
this.i = (192 & e) >> 6,
this.h = 63 & e
} function G() {
this.c = [0, 0, 0, 0],
this.o = 0,
this.r = [],
this.k = [],
this.B = [],
this.f = [],
this.u = [],
this.C = !1,
this.b = [],
this.g = [],
this.G = !1,
this.Q = null,
this.R = null,
this.w = [],
this.x = 0,
this.D = {
0: i,
1: h,
2: A,
3: n,
4: e,
5: a,
6: c,
7: o,
8: r,
9: k,
10: B,
11: f,
12: u,
13: C,
14: b,
15: g
}
} i.prototype.M = function (e) {
e.G = !1
}
,
h.prototype.M = function (e) {
switch (this.s) {
case 0:
e.c[this.i] = this.h;
break;
case 1:
e.c[this.i] = e.k[this.A]
}
}
,
A.prototype.M = function (e) {
e.k[this.A] = e.c[this.i]
}
,
n.prototype.M = function (e) {
switch (this.s) {
case 0:
e.c[this.n] = e.c[this.e] + e.c[this.a];
break;
case 1:
e.c[this.n] = e.c[this.e] - e.c[this.a];
break;
case 2:
e.c[this.n] = e.c[this.e] * e.c[this.a];
break;
case 3:
e.c[this.n] = e.c[this.e] / e.c[this.a];
break;
case 4:
e.c[this.n] = e.c[this.e] % e.c[this.a];
break;
case 5:
e.c[this.n] = e.c[this.e] == e.c[this.a];
break;
case 6:
e.c[this.n] = e.c[this.e] >= e.c[this.a];
break;
case 7:
e.c[this.n] = e.c[this.e] || e.c[this.a];
break;
case 8:
e.c[this.n] = e.c[this.e] && e.c[this.a];
break;
case 9:
e.c[this.n] = e.c[this.e] !== e.c[this.a];
break;
case 10:
e.c[this.n] = s(e.c[this.e]);
break;
case 11:
e.c[this.n] = e.c[this.e] in e.c[this.a];
break;
case 12:
e.c[this.n] = e.c[this.e] > e.c[this.a];
break;
case 13:
e.c[this.n] = -e.c[this.e];
break;
case 14:
e.c[this.n] = e.c[this.e] < e.c[this.a];
break;
case 15:
e.c[this.n] = e.c[this.e] & e.c[this.a];
break;
case 16:
e.c[this.n] = e.c[this.e] ^ e.c[this.a];
break;
case 17:
e.c[this.n] = e.c[this.e] << e.c[this.a];
break;
case 18:
e.c[this.n] = e.c[this.e] >>> e.c[this.a];
break;
case 19:
e.c[this.n] = e.c[this.e] | e.c[this.a]
}
}
,
e.prototype.M = function (e) {
e.r.push(e.o),
e.B.push(e.k),
e.o = e.c[this.i],
e.k = [];
for (var t = 0; t < this.h; t++)
e.k.unshift(e.f.pop());
e.u.push(e.f),
e.f = []
}
,
a.prototype.M = function (e) {
e.o = e.r.pop(),
e.k = e.B.pop(),
e.f = e.u.pop()
}
,
c.prototype.M = function (e) {
switch (this.s) {
case 0:
e.C = e.c[this.n] >= e.c[this.e];
break;
case 1:
e.C = e.c[this.n] <= e.c[this.e];
break;
case 2:
e.C = e.c[this.n] > e.c[this.e];
break;
case 3:
e.C = e.c[this.n] < e.c[this.e];
break;
case 4:
e.C = e.c[this.n] == e.c[this.e];
break;
case 5:
e.C = e.c[this.n] != e.c[this.e];
break;
case 6:
e.C = e.c[this.n];
break;
case 7:
e.C = !e.c[this.n]
}
}
,
o.prototype.M = function (e) {
switch (this.s) {
case 0:
e.o = this.A;
break;
case 1:
e.C && (e.o = this.A);
break;
case 2:
e.C || (e.o = this.A);
break;
case 3:
e.o = this.A,
e.Q = null
}
e.C = !1
}
,
r.prototype.M = function (e) {
switch (this.s) {
case 0:
for (var t = [], n = 0; n < this.h; n++)
t.unshift(e.f.pop());
e.c[3] = e.c[this.i](t[0], t[1]);
break;
case 1:
for (var r = e.f.pop(), o = [], i = 0; i < this.h; i++)
o.unshift(e.f.pop());
e.c[3] = e.c[this.i][r](o[0], o[1]);
break;
case 2:
for (var a = [], c = 0; c < this.h; c++)
a.unshift(e.f.pop());
e.c[3] = new e.c[this.i](a[0], a[1])
}
}
,
k.prototype.M = function (e) {
switch (this.s) {
case 0:
e.f.push(e.c[this.i]);
break;
case 1:
e.f.push(this.h);
break;
case 2:
e.f.push(e.k[this.A]);
break;
case 3:
e.f.push(e.g[this.A])
}
}
,
B.prototype.M = function (t) {
switch (this.s) {
case 0:
var s = t.f.pop();
t.c[this.n] = t.c[this.e][s];
break;
case 1:
var i = t.f.pop()
, h = t.f.pop();
t.c[this.e][i] = h;
break;
case 2:
var A = t.f.pop();
t.c[this.n] = eval(A)
}
}
,
f.prototype.M = function (e) {
e.c[this.i] = e.g[this.A]
}
,
u.prototype.M = function (e) {
e.Q = this.A
}
,
C.prototype.M = function (e) {
throw e.c[this.i]
}
,
b.prototype.M = function (e) {
var t = this
, n = [0];
e.k.forEach(function (e) {
n.push(e)
});
var r = function (r) {
var o = new G;
return o.k = n,
o.k[0] = r,
o.J(e.b, t.A, e.g, e.w),
o.c[3]
};
r.toString = function () {
return "() { [native code] }"
}
,
e.c[3] = r
}
,
g.prototype.M = function (e) {
switch (this.s) {
case 0:
for (var t = {}, n = 0; n < this.h; n++) {
var r = e.f.pop();
t[e.f.pop()] = r
}
e.c[this.i] = t;
break;
case 1:
for (var o = [], i = 0; i < this.h; i++)
o.unshift(e.f.pop());
e.c[this.i] = o
}
}
,
G.prototype.v = function (e) {
for (var t = new Buffer(e, "base64").toString("binary"), n = [], r = 0; r < t.length - 1; r += 2)
n.push(t.charCodeAt(r) << 8 | t.charCodeAt(r + 1));
this.b = n
}
,
G.prototype.y = function (e) {
for (var t = new Buffer(e, "base64").toString("binary"), n = 66, r = [], o = 0; o < t.length; o++) {
var i = 24 ^ t.charCodeAt(o) ^ n;
r.push(String.fromCharCode(i)),
n = i
}
return r.join("")
}
,
G.prototype.F = function (e) {
var t = this;
this.g = e.map(function (e) {
return "string" == typeof e ? t.y(e) : e
})
}
,
G.prototype.J = function (e, t, n) {
for (t = t || 0,
n = n || [],
this.o = t,
"string" == typeof e ? (this.F(n),
this.v(e)) : (this.b = e,
this.g = n),
this.G = !0,
this.x = Date.now(); this.G;) {
var r = this.b[this.o++];
if ("number" != typeof r)
break;
var o = Date.now();
if (500 < o - this.x)
return;
this.x = o;
try {
this.M(r)
} catch (e) {
if (this.R = e,
!this.Q)
throw "execption at " + this.o + ": " + e;
this.o = this.Q
}
}
}
,
G.prototype.M = function (e) {
var t = (61440 & e) >> 12;
new this.D[t](e).M(this)
}
,
1 && (new G).J("4AeTAJwAqACcAaQAAAAYAJAAnAKoAJwDgAWTACwAnAKoACACGAESOTRHkQAkAbAEIAMYAJwFoAASAzREJAQYBBIBNEVkBnCiGAC0BjRAJAAYBBICNEVkBnDGGAC0BzRAJACwCJAAnAmoAJwKoACcC4ABnAyMBRAAMwZgBnESsA0aADRAkQAkABgCnA6gABoCnA+hQDRHGAKcEKAAMQdgBnFasBEaADRAkQAkABgCnBKgABoCnBOhQDRHZAZxkrAUGgA0QJEAJAAYApwVoABgBnG6sBYaADRAkQAkABgCnBegAGAGceKwGBoANECRACQAnAmoAJwZoABgBnIOsBoaADRAkQAkABgCnBugABoCnByhQDRHZAZyRrAdGgA0QJEAJAAQACAFsB4gBhgAnAWgABIBNEEkBxgHEgA0RmQGdJoQCBoFFAE5gCgFFAQ5hDSCJAgYB5AAGACcH4AFGAEaCDRSEP8xDzMQIAkQCBoFFAE5gCgFFAQ5hDSCkQAkCBgBGgg0UhD/MQ+QACAIGAkaBxQBOYGSABoAnB+EBRoIN1AUCDmRNJMkCRAIGgUUATmAKAUUBDmENIKRACQIGAEaCDRSEP8xD5AAIAgYCRoHFAI5gZIAGgCcH4QFGgg3UBQQOZE0kyQJGAMaCRQ/OY+SABoGnCCEBTTAJAMYAxoJFAY5khI/Nk+RABoGnCCEBTTAJAMYAxoJFAw5khI/Nk+RABoGnCCEBTTAJAMYAxoJFBI5khI/Nk+RABoGnCCEBTTAJAMYBxIDNEEkB3JsHgNQAA==", 0, ["BRgg", "BSITFQkTERw=", "LQYfEhMA", "PxMVFBMZKB8DEjQaBQcZExMC", "", "NhETEQsE", "Whg=", "Wg==", "MhUcHRARDhg=", "NBcPBxYeDQMF", "Lx4ODys+GhMC", "LgM7OwAKDyk6Cg4=", "Mx8SGQUvMQ==", "SA==", "ORoVGCQgERcCAxo=", "BTcAERcCAxo=", "BRg3ABEXAgMaFAo=", "SQ==", "OA8LGBsP", "GC8LGBsP", "Tg==", "PxAcBQ==", "Tw==", "KRsJDgE=", "TA==", "LQofHg4DBwsP", "TQ==", "PhMaNCwZAxoUDQUeGQ==", "PhMaNCwZAxoUDQUeGTU0GQIeBRsYEQ8=", "Qg==", "BWpUGxkfGRsZFxkbGR8ZGxkHGRsZHxkbGRcZG1MbGR8ZGxkXGRFpGxkfGRsZFxkbGR8ZGxkHGRsZHxkbGRcZGw==", "ORMRCyk0Exk8LQ==", "ORMRCyst"]);
var Q = function (e) {
return __g._encrypt(e)
};

参考的是这位博主的博客:https://home.cnblogs.com/u/zkqiang

python3爬虫-知乎登陆的更多相关文章

  1. python3爬虫-通过selenium登陆拉钩,爬取职位信息

    from selenium import webdriver from selenium.common.exceptions import NoSuchElementException from se ...

  2. Python3 使用selenium库登陆知乎并保存cookie为本地文件

    Python3 使用selenium库登陆知乎并保存cookie为本地文件 学习使用selenium库模拟登陆知乎,并将cookie保存为本地文件,然后供以后(requests模块)使用,用selen ...

  3. python3爬虫--反爬虫应对机制

    python3爬虫--反爬虫应对机制 内容来源于: Python3网络爬虫开发实战: 网络爬虫教程(python2): 前言: 反爬虫更多是一种攻防战,针对网站的反爬虫处理来采取对应的应对机制,一般需 ...

  4. python3爬虫(4)各种网站视频下载方法

    python3爬虫(4)各种网站视频下载方法原创H-KING 最后发布于2019-01-09 11:06:23 阅读数 13608 收藏展开理论上来讲只要是网上(浏览器)能看到图片,音频,视频,都能够 ...

  5. [Javascript] 爬虫 模拟新浪微博登陆

     概述: 由于业务需要,要编写爬虫代码去爬去新浪微博用户的信息. 虽然在网上能找到不少信息,但由于新浪微博改版,其登陆机制进行了修改,故很多老的文章就不适合用了. 经过一番摸索,成功模拟新浪微博的登陆 ...

  6. Python3爬虫系列:理论+实验+爬取妹子图实战

    Github: https://github.com/wangy8961/python3-concurrency-pics-02 ,欢迎star 爬虫系列: (1) 理论 Python3爬虫系列01 ...

  7. python爬虫知乎问答

    python爬虫知乎问答 import cookielibimport base64import reimport hashlibimport jsonimport rsaimport binasci ...

  8. python3爬虫中文乱码之请求头‘Accept-Encoding’:br 的问题

    当用python3做爬虫的时候,一些网站为了防爬虫会设置一些检查机制,这时我们就需要添加请求头,伪装成浏览器正常访问. header的内容在浏览器的开发者工具中便可看到,将这些信息添加到我们的爬虫代码 ...

  9. Python3 爬虫之 Scrapy 核心功能实现(二)

    博客地址:http://www.moonxy.com 基于 Python 3.6.2 的 Scrapy 爬虫框架使用,Scrapy 的搭建过程请参照本人的另一篇博客:Python3 爬虫之 Scrap ...

随机推荐

  1. 百度网盘下载器 PanDownload v2.0

    PanDownload是一款坚持以用户体验为中心,畅快淋漓的下载为理念而打造的下载工具. 从2017年2月9日首个版本推出,时至今日已经一年七个月了,首先感谢大家一直以来的支持与建议,促使着我不断地对 ...

  2. linux 用户管理 groupadd、groupmod、groupdel、gpasswd

    添加用户组groupadd [选项] 组名 /usr/sbin/groupadd执行权限:root一个用户可以属于多个所属组,但有一个缺省组,和用户名同名-g GID:指定组ID 修改用户组 grou ...

  3. 4.使用bat调用可执行jar文件

    一.项目需求 1.maven工程 2.有properties配置文件. 3.有内部jar包 二.简单的是实现实例 1.项目文件放置 注意:如果是web工程,我们会将jar文件当到我们项目WEB-INF ...

  4. Oracle闪回(FlashBack)数据库

    Flashback Database功能非常类似与RMAN的不完全恢复,它可以把整个数据库回退到过去的某个时点的状态,这个功能依赖于Flashback log日志.比RMAN更快速和高效,因此Flas ...

  5. Windows server 安装

    运行管理员CMD --先切换到安装环境目录cd C:\Windows\Microsoft.NET\Framework\v4.0.30319 --安装服务 InstallUtil.exe D:\绝对路劲 ...

  6. 将DataRow赋值给model中同名属性

    /// <summary> /// 将DataRow赋值给model中同名属性 /// </summary> /// <typeparam name="T&qu ...

  7. [翻译] AFDropdownNotification

    AFDropdownNotification Dropdown notification view for iOS. 下拉通知的view,用于iOS. Installation - 安装 If you ...

  8. MVC中使用EF的技巧集(一)

    一.建好数据库后,向项目中添加数据模型. 1.右键点击“Models” 文件夹,选择“添加”,再选择“添加新项”. 2.在“添加新项”窗口,选择左边的“数据”,然后再在右边选择“ADO.NET 实体数 ...

  9. 使用 FRP 反向代理实现 Windows 远程连接

    互联网普及率的日渐攀升与 IPv4 资源的持续减少,现在大部分家庭宽带都不会分配公网 IP ,这使一些网络应用的实现多了些困难,像个人的 NAS 和一些智能家居设备.对于分配公网 IP ,各地运营商的 ...

  10. kafka for Windows

    1,保证,安装好java环境,zookeeper,并且运行zookeeper. 2,下载kafka,并解压到磁盘 下载链接http://mirrors.hust.edu.cn/apache/kafka ...