【开源】2024最新python豆瓣电影数据爬虫+可视化分析项目
项目介绍
【开源】项目基于python+pandas+flask+mysql
等技术实现豆瓣电影数据获取及可视化分析展示,觉得有用的朋友可以来个一键三连,感谢!!!
项目演示
[video(video-C9B87WwE-1716106102936)(type-bilibili)(url-https://player.bilibili.com/player.html?aid=1204518067)(image-https://img-blog.csdnimg.cn/img_convert/5779cbd1ffa2001f7508e14ca2141f4a.jpeg)(title-【开源】2024最新python豆瓣电影数据爬虫+可视化分析项目)]
项目截图
- 首页
- 列表页
- 爬虫演示
项目地址
https://github.com/mudfish/python-douban-view
项目结构
核心模块
电影爬虫
"""
异步并发爬虫
"""
# 本次运行获取的最大页数
MAX_PAGES = 5
# 进度控制文件
PAGE_PROGRESS_FILE = "page_progress.json"
# 电影类型
MOVIE_TYPES = ["剧情", "喜剧", "动作", "爱情", "科幻", "动画"]
# CSV文件名
CSV_NAME = "movie_data.csv"
# CSV头
CSV_HEADS = [
"id",
"movie_id",
"title",
"year",
"directors",
"casts",
"rating",
"cover",
"country",
"summary",
"types",
"lang",
"release_date",
"time",
"url",
]
# 上映日期匹配正则,剔除非数字和-
RELEASE_DATE_REMOVE_RE = r"[^0-9-]"
engine = create_engine("mysql+pymysql://root:123456@127.0.0.1:3306/db_douban")
def get_id():
return str(random.randint(1, 100000000)) + str(time.time()).split(".")[1].strip()
class Spider:
def __init__(self):
self.movie_page_url = "https://m.douban.com/rexxar/api/v2/movie/recommend?"
self.movie_detail_url = "https://movie.douban.com/subject/{}/"
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
"Referer": "https://movie.douban.com/explore",
}
self.movie_types = MOVIE_TYPES
self.page_progress = {}
# 需要抓取的页面数
self.total_pages = 0
self.completed_pages = 0
self.global_progress_bar = None
def init(self):
# 每次跑之前,先删除之前的csv文件
if os.path.exists(CSV_NAME):
os.remove(CSV_NAME)
with open(CSV_NAME, "w", newline="", encoding="utf-8") as writer_f:
writer = csv.writer(writer_f)
writer.writerow(CSV_HEADS)
def load_page_progress(self):
if os.path.exists(PAGE_PROGRESS_FILE):
with open(PAGE_PROGRESS_FILE, "r", encoding="utf-8") as f:
# 判断文件内容是否为空
if os.stat(PAGE_PROGRESS_FILE).st_size == 0:
# 初始化页面进度
print("初始化页面进度")
self.page_progress = {}
self.save_page_progress()
else:
self.page_progress = json.load(f)
def save_page_progress(self):
with open(PAGE_PROGRESS_FILE, "w", encoding="utf-8") as f:
json.dump(self.page_progress, f, ensure_ascii=False)
async def get_movie_pages(self, session, type_name):
start_page = self.page_progress.get(type_name, 1)
if start_page <= MAX_PAGES:
for page in range(start_page, MAX_PAGES + 1):
# print(f'{type_name}第{page}页:')
start_time = time.time()
params = {"start": (page - 1) * 20, "count": 10, "tags": type_name}
try:
async with session.get(
self.movie_page_url, headers=self.headers, params=params
) as resp:
resp.raise_for_status()
respJson = await resp.json()
movie_list = respJson["items"]
for i, m in enumerate(movie_list):
if m["type"] == "movie":
await self.process_movie(session, m)
# progress_bar.update(round(1/len(movie_list)))
self.page_progress[type_name] = page + 1
# 记录进度
self.save_page_progress()
# 刷新全局进度
self.update_global_progress()
except Exception as e:
print(f"处理:{type_name}第{page}页失败: {e}")
traceback.print_exc()
continue
async def process_movie(self, session, movie):
movie_data = []
movie_data.append(get_id())
movie_data.append(movie["id"])
movie_data.append(movie["title"])
movie_data.append(movie["year"])
async with session.get(
self.movie_detail_url.format(movie["id"]), headers=self.headers
) as resp:
resp.raise_for_status()
html_text = await resp.text()
path = etree.HTML(html_text)
# 导演
movie_data.append(",".join(path.xpath('//a[@rel="v:directedBy"]/text()')))
# 主演
movie_data.append(",".join(path.xpath('//a[@rel="v:starring"]/text()')))
# 评分
movie_data.append(path.xpath('//strong[@property="v:average"]/text()')[0])
# 封面
movie_data.append(path.xpath('//img[@rel="v:image"]/@src')[0])
# 国家
movie_data.append(
path.xpath(
'//span[contains(text(),"制片国家")]/following-sibling::br[1]/preceding-sibling::text()[1]'
)[0].replace(" / ", ",")
)
# 摘要
movie_data.append(path.xpath('//span[@property="v:summary"]/text()')[0].strip())
# 类型
movie_data.append(
",".join(path.xpath('//div[@id="info"]/span[@property="v:genre"]/text()'))
)
# 语言
movie_data.append(
path.xpath(
'//span[contains(text(),"语言")]/following-sibling::br[1]/preceding-sibling::text()[1]'
)[0]
)
# 上映日期
movie_data.append(
re.sub(
RELEASE_DATE_REMOVE_RE,
"",
path.xpath('//span[@property="v:initialReleaseDate"]/text()')[0][:10],
)
)
# 时长(空处理)
# print(movie["id"])
movie_time = path.xpath('//span[@property="v:runtime"]/text()')
if len(movie_time) > 0:
movie_data.append(movie_time[0])
else:
movie_data.append("")
# url
movie_data.append(self.movie_detail_url.format(movie["id"]))
self.save_to_csv(movie_data)
def save_to_csv(self, row):
with open(CSV_NAME, "a", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(row)
def clean_csv(self):
print("===========清理数据============")
df = pd.read_csv(CSV_NAME, encoding="utf-8")
df.drop_duplicates(subset=["movie_id"], keep="first", inplace=True)
print("存储到数据库...")
df.to_sql("tb_movie", con=engine, index=False, if_exists="append")
print("清理重复数据...")
engine.connect().execute(
text(
"delete t1 from tb_movie t1 inner join (select min(id) as id,movie_id from tb_movie group by movie_id having count(*) > 1) t2 on t1.movie_id=t2.movie_id where t1.id>t2.id"
)
)
def update_global_progress(self):
self.completed_pages += 1
# print(self.completed_pages)
self.global_progress_bar.update(1)
self.global_progress_bar.refresh()
async def run(self):
self.init()
self.load_page_progress()
# self.total_pages = MAX_PAGES*len(MOVIE_TYPES) - sum(self.page_progress.get(type_name, 1) for type_name in MOVIE_TYPES)
for type_name in MOVIE_TYPES:
if MAX_PAGES > self.page_progress.get(type_name, 1):
self.total_pages += MAX_PAGES + 1 - self.page_progress.get(type_name, 1)
print(self.total_pages)
if self.total_pages > 0:
self.global_progress_bar = tqdm(
total=self.total_pages, desc="progress", unit="page", colour="GREEN"
)
async with aiohttp.ClientSession() as session:
tasks = [
self.get_movie_pages(session, type_name)
for type_name in self.movie_types
]
await asyncio.gather(*tasks)
# 请求结束后,清空页面进度
# self.page_progress = {}
# self.save_page_progress()
self.global_progress_bar.close()
self.clean_csv()
if __name__ == "__main__":
loop = asyncio.get_event_loop()
spider = Spider()
loop.run_until_complete(spider.run())
电影可视化
接口代码
from flask import Flask, render_template, request, redirect, url_for, session
from utils import db_query
app = Flask(__name__)
app.secret_key = "mysessionkey"
# 统一请求拦截
@app.before_request
def before_request():
# 利用正则匹配,如果/static开头和/login, /logout,/register的请求,则不拦截;其他的判断是否已登录
if (
request.path.startswith("/static")
or request.path == "/login"
or request.path == "/logout"
or request.path == "/register"
):
return
# 如果没有登录,则跳转到登录页面
if not session.get("login_username"):
return redirect(url_for("login"))
# 首页
@app.route("/")
def index():
# 获取电影统计数据
movie_stats = db_query.fetch_movie_statistics()
# 获取电影分类统计
movie_type_distribution = db_query.fetch_movie_type_distribution()
# 获取电影评分统计
movie_rating_distribution = db_query.fetch_movie_rating_distribution()
print(movie_rating_distribution)
return render_template(
"index.html",
login_username=session.get("login_username"),
movie_stats=movie_stats,
movie_type_distribution=movie_type_distribution,
movie_rating_distribution=movie_rating_distribution,
)
# 登录
@app.route("/login", methods=["GET", "POST"])
def login():
if request.method == "POST":
req_params = dict(request.form)
# 判断用户名密码是否正确
sql = "SELECT * FROM `tb_user` WHERE `username` = %s AND `password` = %s"
params = (req_params["username"], req_params["password"])
if len(db_query.query(sql, params)) > 0:
# 存储session
session["login_username"] = req_params["username"]
return redirect(url_for("index"))
else:
return render_template(
"error.html",
error="用户名或密码错误",
)
elif request.method == "GET":
return render_template("login.html")
# 退出
@app.route("/logout")
def logout():
session.pop("login_username", None)
return redirect(url_for("index"))
# 注册
@app.route("/register", methods=["GET", "POST"])
def register():
if request.method == "POST":
req_params = dict(request.form)
if req_params["password"] == req_params["password_confirm"]:
# 判断是否已存在该用户名
sql = "SELECT * FROM `tb_user` WHERE `username` = %s"
params = (req_params["username"],)
result = db_query.query(sql, params)
if len(result) > 0:
return render_template(
"error.html",
error="用户名已存在",
)
sql = "INSERT INTO `tb_user` (`username`, `password`) VALUES (%s, %s)"
params = (
req_params["username"],
req_params["password"],
)
db_query.query(sql, params, db_query.QueryType.NO_SELECT)
return redirect(url_for("login"))
else:
return render_template(
"error.html",
error="两次密码输入不一致",
)
elif request.method == "GET":
return render_template("register.html")
@app.route("/list")
def movie_list():
# 查询数据库获取电影列表
movies = db_query.fetch_movie_list() # 假设此函数返回一个包含电影信息的列表
# 渲染并返回list.html,同时传递movies数据
return render_template(
"list.html", login_username=session.get("login_username"), movies=movies
)
@app.errorhandler(404)
def page_not_found(error):
return render_template("404.html"), 404
@app.errorhandler(500)
def system_error(error):
return render_template("500.html"), 500
if __name__ == "__main__":
# 静态文件缓存自动刷新
app.jinja_env.auto_reload = True
app.run(host="127.0.0.1", port=8002, debug=True)
首页
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible"
content="IE=edge" />
<meta
name="viewport"
content="width=device-width, initial-scale=1, shrink-to-fit=no"
/>
<meta name="description" content="" />
<meta name="author" content="" />
<title>首页</title>
<!-- Custom fonts for this template-->
<link
href="/static/vendor/fontawesome-free/css/all.min.css"
rel="stylesheet"
type="text/css"
/>
<link
href="https://fonts.googleapis.com/css?family=Nunito:200,200i,300,300i,400,400i,600,600i,700,700i,800,800i,900,900i"
rel="stylesheet"
/>
<!-- Custom styles for this template-->
<link href="/static/css/sb-admin-2.min.css" rel="stylesheet" />
</head>
<body id="page-top">
<!-- Page Wrapper -->
<div id="wrapper">
<!-- Sidebar -->
<ul
class="navbar-nav bg-gradient-primary sidebar sidebar-dark accordion"
id="accordionSidebar"
>
<!-- Sidebar - Brand -->
<a
class="sidebar-brand d-flex align-items-center justify-content-center"
href="index.html"
>
<div class="sidebar-brand-icon rotate-n-15">
<i class="fas fa-laugh-wink"></i>
</div>
<div class="sidebar-brand-text mx-3">豆瓣电影可视化</div>
</a>
<!-- Divider -->
<hr class="sidebar-divider my-0" />
<!-- Nav Item - Dashboard -->
<li class="nav-item active">
<a class="nav-link" href="/">
<i class="fas fa-fw fa-tachometer-alt"></i>
<span>首页</span></a
>
</li>
<!-- 列表 -->
<li class="nav-item">
<a class="nav-link" href="/list">
<i class="fas fa-fw fa-table"></i>
<span>电影列表</span></a
>
</li>
<!-- Divider -->
<hr class="sidebar-divider d-none d-md-block" />
<!-- Sidebar Toggler (Sidebar) -->
<div class="text-center d-none d-md-inline">
<button class="rounded-circle border-0" id="sidebarToggle"></button>
</div>
</ul>
<!-- End of Sidebar -->
<!-- Content Wrapper -->
<div id="content-wrapper" class="d-flex flex-column">
<!-- Main Content -->
<div id="content">
<!-- Topbar -->
<nav
class="navbar navbar-expand navbar-light bg-white topbar mb-4 static-top shadow"
>
<!-- Sidebar Toggle (Topbar) -->
<button
id="sidebarToggleTop"
class="btn btn-link d-md-none rounded-circle mr-3"
>
<i class="fa fa-bars"></i>
</button>
<!-- Topbar Search -->
<!-- <form class="d-none d-sm-inline-block form-inline mr-auto ml-md-3 my-2 my-md-0 mw-100 navbar-search">
<div class="input-group">
<input type="text" class="form-control bg-light border-0 small" placeholder="Search for..." aria-label="Search" aria-describedby="basic-addon2">
<div class="input-group-append">
<button class="btn btn-primary" type="button">
<i class="fas fa-search fa-sm"></i>
</button>
</div>
</div>
</form> -->
<!-- Topbar Navbar -->
<ul class="navbar-nav ml-auto">
<div class="topbar-divider d-none d-sm-block"></div>
<!-- Nav Item - User Information -->
<li class="nav-item dropdown no-arrow">
<a
class="nav-link dropdown-toggle"
href="#"
id="userDropdown"
role="button"
data-toggle="dropdown"
aria-haspopup="true"
aria-expanded="false"
>
<span class="mr-2 d-none d-lg-inline text-gray-600 small"
>{{login_username}}</span
>
<img
class="img-profile rounded-circle"
src="/static/img/avatar.png"
/>
</a>
<!-- Dropdown - User Information -->
<div
class="dropdown-menu dropdown-menu-right shadow animated--grow-in"
aria-labelledby="userDropdown"
>
<a
class="dropdown-item"
href="#"
data-toggle="modal"
data-target="#logoutModal"
>
<i
class="fas fa-sign-out-alt fa-sm fa-fw mr-2 text-gray-400"
></i>
Logout
</a>
</div>
</li>
</ul>
</nav>
<!-- End of Topbar -->
<!-- Begin Page Content -->
<div class="container-fluid">
<!-- Page Heading -->
<!-- <div class="d-sm-flex align-items-center justify-content-between mb-4">
<h1 class="h3 mb-0 text-gray-800">Dashboard</h1>
<a href="#" class="d-none d-sm-inline-block btn btn-sm btn-primary shadow-sm"><i class="fas fa-download fa-sm text-white-50"></i> Generate Report</a>
</div> -->
<!-- Content Row -->
<div class="row">
<!-- Earnings (Monthly) Card Example -->
<div class="col-xl-3 col-md-6 mb-4">
<div class="card border-left-primary shadow h-100 py-2">
<div class="card-body">
<div class="row no-gutters align-items-center">
<div class="col mr-2">
<div
class="font-weight-bold text-primary text-uppercase mb-1"
>
电影总数
</div>
<div class="h5 mb-0 font-weight-bold text-gray-800">
{{ movie_stats['total_movies'] }}
</div>
</div>
<div class="col-auto">
<i class="fas fa-calendar fa-2x text-gray-300"></i>
</div>
</div>
</div>
</div>
</div>
<!-- Earnings (Monthly) Card Example -->
<div class="col-xl-3 col-md-6 mb-4">
<div class="card border-left-success shadow h-100 py-2">
<div class="card-body">
<div class="row no-gutters align-items-center">
<div class="col mr-2">
<div
class="font-weight-bold text-success text-uppercase mb-1"
>
电影最高评分
</div>
<div class="h5 mb-0 font-weight-bold text-gray-800">
{{ movie_stats['highest_rating'] }}
</div>
</div>
<div class="col-auto">
<i class="fas fa-dollar-sign fa-2x text-gray-300"></i>
</div>
</div>
</div>
</div>
</div>
<!-- Earnings (Monthly) Card Example -->
<div class="col-xl-3 col-md-6 mb-4">
<div class="card border-left-info shadow h-100 py-2">
<div class="card-body">
<div class="row no-gutters align-items-center">
<div class="col mr-2">
<div
class="font-weight-bold text-info text-uppercase mb-1"
>
出演最多演员
</div>
<div class="row no-gutters align-items-center">
<div class="col-auto">
<div
class="h5 mb-0 mr-3 font-weight-bold text-gray-800"
>
{{ movie_stats['most_popular_cast'] }}
</div>
</div>
<div class="col">
<div class="progress progress-sm mr-2">
<div
class="progress-bar bg-info"
role="progressbar"
style="width: 50%"
aria-valuenow="50"
aria-valuemin="0"
aria-valuemax="100"
></div>
</div>
</div>
</div>
</div>
<div class="col-auto">
<i
class="fas fa-clipboard-list fa-2x text-gray-300"
></i>
</div>
</div>
</div>
</div>
</div>
<!-- Pending Requests Card Example -->
<div class="col-xl-3 col-md-6 mb-4">
<div class="card border-left-warning shadow h-100 py-2">
<div class="card-body">
<div class="row no-gutters align-items-center">
<div class="col mr-2">
<div
class="font-weight-bold text-warning text-uppercase mb-1"
>
制片最多国家
</div>
<div class="h5 mb-0 font-weight-bold text-gray-800">
{{ movie_stats['most_common_country'] }}
</div>
</div>
<div class="col-auto">
<i class="fas fa-comments fa-2x text-gray-300"></i>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- Content Row -->
<div class="row">
<!-- Area Chart -->
<div class="col-xl-6 col-lg-6">
<div class="card shadow mb-4">
<!-- Card Header - Dropdown -->
<div
class="card-header py-3 d-flex flex-row align-items-center justify-content-between"
>
<h6 class="m-0 font-weight-bold text-primary">
电影分类统计
</h6>
</div>
<!-- Card Body -->
<div class="card-body">
<div
id="movie_type_chart"
style="width: 100%; height: 450px"
></div>
<!-- <div class="chart-area">
</div> -->
</div>
</div>
</div>
<!-- Line Chart -->
<div class="col-xl-6 col-lg-6">
<div class="card shadow mb-4">
<!-- Card Header - Dropdown -->
<div
class="card-header py-3 d-flex flex-row align-items-center justify-content-between"
>
<h6 class="m-0 font-weight-bold text-primary">
电影评分统计
</h6>
<div class="dropdown no-arrow">
<a
class="dropdown-toggle"
href="#"
role="button"
id="dropdownMenuLink"
data-toggle="dropdown"
aria-haspopup="true"
aria-expanded="false"
>
<i
class="fas fa-ellipsis-v fa-sm fa-fw text-gray-400"
></i>
</a>
<div
class="dropdown-menu dropdown-menu-right shadow animated--fade-in"
aria-labelledby="dropdownMenuLink"
>
<div class="dropdown-header">Dropdown Header:</div>
<a class="dropdown-item" href="#">Action</a>
<a class="dropdown-item" href="#">Another action</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="#"
>Something else here</a
>
</div>
</div>
</div>
<!-- Card Body -->
<div class="card-body">
<div
id="movie_score_chart"
style="width: 100%; height: 450px"
></div>
</div>
</div>
</div>
</div>
<!-- Content Row -->
</div>
<!-- /.container-fluid -->
</div>
<!-- End of Main Content -->
<!-- Footer -->
<footer class="sticky-footer bg-white">
<div class="container my-auto">
<div class="copyright text-center my-auto">
<span
>@Laoxu Open Source.<a
target="_blank"
href="https://github.com/mudfish"
>Github</a
></span
>
</div>
</div>
</footer>
<!-- End of Footer -->
</div>
<!-- End of Content Wrapper -->
</div>
<!-- End of Page Wrapper -->
<!-- Scroll to Top Button-->
<a class="scroll-to-top rounded" href="#page-top">
<i class="fas fa-angle-up"></i>
</a>
<!-- Logout Modal-->
<div
class="modal fade"
id="logoutModal"
tabindex="-1"
role="dialog"
aria-labelledby="exampleModalLabel"
aria-hidden="true"
>
<div class="modal-dialog" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title" id="exampleModalLabel">Ready to Leave?</h5>
<button
class="close"
type="button"
data-dismiss="modal"
aria-label="Close"
>
<span aria-hidden="true">×</span>
</button>
</div>
<!-- <div class="modal-body">Select "Logout" below if you are ready to end your current session.</div> -->
<div class="modal-footer">
<button
class="btn btn-secondary"
type="button"
data-dismiss="modal"
>
Cancel
</button>
<a class="btn btn-primary" href="/logout">Logout</a>
</div>
</div>
</div>
</div>
<!-- Bootstrap core JavaScript-->
<script src="/static/vendor/jquery/jquery.min.js"></script>
<script src="/static/vendor/bootstrap/js/bootstrap.bundle.min.js"></script>
<!-- Core plugin JavaScript-->
<script src="/static/vendor/jquery-easing/jquery.easing.min.js"></script>
<!-- Custom scripts for all pages-->
<script src="/static/js/sb-admin-2.min.js"></script>
<!-- Page level plugins -->
<script src="/static/vendor/chart.js/Chart.min.js"></script>
<!-- Page level custom scripts -->
<script src="/static/js/demo/chart-area-demo.js"></script>
<script src="/static/js/demo/chart-pie-demo.js"></script>
<script src="/static/js/echarts.min.js"></script>
<script>
var chartDom = document.getElementById("movie_type_chart");
var myChart = echarts.init(chartDom);
var option;
var movieTypeData = {{ movie_type_distribution|tojson }};
// console.log(movieTypeData)
option = {
title: {
text: "",
subtext: "来源:豆瓣数据",
left: "center",
},
tooltip: {
trigger: "item",
},
legend: {
orient: "vertical",
left: "left",
},
series: [
{
name: "Access From",
type: "pie",
radius: "50%",
data: movieTypeData,
emphasis: {
itemStyle: {
shadowBlur: 10,
shadowOffsetX: 0,
shadowColor: "rgba(0, 0, 0, 0.5)",
},
},
},
],
};
option && myChart.setOption(option);
</script>
<script>
var chartDom = document.getElementById("movie_score_chart");
var myChart = echarts.init(chartDom);
var option;
var ratingData = {{ movie_rating_distribution|tojson }};
console.log(ratingData)
option = {
title: {
text: "",
subtext: "来源:豆瓣数据",
left: "center",
},
xAxis: {
type: "category",
boundaryGap: false,
data: ratingData.map(item => item[0]),
},
yAxis: {
type: "value",
},
series: [
{
data: ratingData.map(item => item[1]),
type: "line",
areaStyle: {},
},
],
tooltip: {
trigger: 'axis', //坐标轴触发,主要在柱状图,折线图等会使用类目轴的图表中使用
axisPointer: {// 坐标轴指示器,坐标轴触发有效
type: 'shadow' // 默认为直线,可选为:'line' | 'shadow'
}
},
};
option && myChart.setOption(option);
</script>
</body>
</html>
【开源】2024最新python豆瓣电影数据爬虫+可视化分析项目的更多相关文章
- [Python] 豆瓣电影top250爬虫
1.分析 <li><div class="item">电影信息</div></li> 每个电影信息都是同样的格式,毕竟在服务器端是用 ...
- 放养的小爬虫--豆瓣电影入门级爬虫(mongodb使用教程~)
放养的小爬虫--豆瓣电影入门级爬虫(mongodb使用教程~) 笔者声明:只用于学习交流,不用于其他途径.源代码已上传github.githu地址:https://github.com/Erma-Wa ...
- 零基础入门Python实战:四周实现爬虫网站 Django项目视频教程
点击了解更多Python课程>>> 零基础入门Python实战:四周实现爬虫网站 Django项目视频教程 适用人群: 即将毕业的大学生,工资低工作重的白领,渴望崭露头角的职场新人, ...
- OneAPM大讲堂 | 监控数据的可视化分析神器 Grafana 的告警实践
文章系国内领先的 ITOM 管理平台供应商 OneAPM 编译呈现. 概览 Grafana 是一个开源的监控数据分析和可视化套件.最常用于对基础设施和应用数据分析的时间序列数据进行可视化分析,也可以用 ...
- 获取豆瓣电影数据(R与API获取网页数据)
一般成熟的网站都会有反爬虫策略,例如限制访问次数,限制访问 IP,动态显示数据等.爬虫和反爬虫就是一直相爱相杀地互相钳制.如果要通过爬虫来获取某些大型网站的数据,那是一件很费时费力的活.小白总遭遇过在 ...
- 神技!微信小程序(应用号)抢先入门体验(附最新案例-豆瓣电影)持续更新
微信小程序 Demo(豆瓣电影) 由于时间的关系,没有办法写一个完整的说明,后续配合一些视频资料,请持续关注 官方文档:https://mp.weixin.qq.com/debug/wxadoc/de ...
- Python Scrapy突破反爬虫机制(项目实践)
对于 BOSS 直聘这种网站,当程序请求网页后,服务器响应内容包含了整个页面的 HTML 源代码,这样就可以使用爬虫来爬取数据.但有些网站做了一些“反爬虫”处理,其网页内容不是静态的,而是使用 Jav ...
- 用Python爬取《王者荣耀》英雄皮肤数据并可视化分析,用图说话
大家好,我是辰哥~ 今天辰哥带大家分析一波当前热门手游<王者荣耀>英雄皮肤,比如皮肤上线时间.皮肤类型(勇者:史诗:传说等).价格. 1.获取数据 数据来源于<王者荣耀官方网站> ...
- python 发送json数据操作实例分析 - python
文章来源:嗨学网 敏而好学论坛www.piaodoo.com 欢迎大家相互学习 本文实例讲述了python 发送json数据操作.分享给大家供大家参考,具体如下: # !/usr/bin/env py ...
- 毕设之Python爬取天气数据及可视化分析
写在前面的一些P话:(https://jq.qq.com/?_wv=1027&k=RFkfeU8j) 天气预报我们每天都会关注,我们可以根据未来的天气增减衣物.安排出行,每天的气温.风速风向. ...
随机推荐
- Techwalk攻略 | 来北京与OpenHarmony技术大会一起技术漫游!
去北京Citywalk已经不是新鲜事? 不如来第二届OpenHarmony技术大会一起Techwalk! 大会即将开幕请速速收藏以下打卡攻略↓ 点击链接,观看线上直播
- 重磅官宣,OpenHarmony开发者大会来了!
开放原子开源基金会OpenHarmony开发者大会2023将于4月19日在北京召开. 春风送暖万物新,OpenHarmony正当时.诚邀您参加本届大会,聆听行业大咖分享操作系统和开源的最新前沿研究成 ...
- 黄吉:如何适配OpenHarmony自有音频框架ADM?
编者按:在 OpenHarmony 生态发展过程中,涌现了大批优秀的代码贡献者,本专题旨在表彰贡献.分享经验,文中内容来自嘉宾访谈,不代表 OpenHarmony 工作委员会观点. 黄吉 中国科学院软 ...
- TAMUctf 2024 RSA-证书修复-总结
上周末跟朋友们参加了TAMUctf 2024国际赛,最终排在了第14名,还是很不错的成绩. 本次比赛Monk师傅也是出了三个RSA证书修复问题的题目,质量很不错.这里给大家整理一下供学习. Trunc ...
- Python删除文件、文件夹----os
使用 os 删除文件 import os '''删除文件 语法: os.unlink(path) 示例: 删除 b 文件夹中的 12.txt ''' os.unlink('b/12.txt') ...
- Android与STM32通信中巧妙解决中文乱码问题
前言 网上一大堆都是要多种格式相互转换,并且要很大的字库文件,对于小应用工程,小容量的STM32芯片,额外多出这些开销会感到蛮不舒服的,而且绝大部分的乱码问题时发生在STM32这边,所以本文是从And ...
- Python 爬虫之 xpath
0x01 XML 基础 xpath 是在 XML 文档中搜索内容的一门语言 HTML 是 XML 的一个子集 XML 代码举例: <book> <isbn>978xxxxxxx ...
- 重新点亮shell————函数[七]
前言 简单整理一下函数. 正文 自定义函数: function fname(){ 命令 } 函数的执行: fname 函数作用范围的变量: local 变量名 函数的参数 $1 $2 $3 .... ...
- python将日志生成到文件和控制台
# 日志收集设置import logging, osfrom logging.handlers import TimedRotatingFileHandlerimport datetimecurren ...
- CF1535F String Distance
\(CF1535F\ \ String\ Distance\) 题意 给 \(n\) 个长度均为 \(len\) 的字符串 \(T_1,T_2,\dots T_n\),定义 \(f(a,b)\) 为将 ...