最终版:07_中证网(Plus -Pro).py

# coding=utf-8
import requests
from bs4 import BeautifulSoup
import io
import sys
import os sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030') # 改变标准输出的默认编码 for qq in range(8):
# query = input("【中证网】请输入你想搜索的内容:")
query = '苏州银行' #年份
year = [2014,2015,2016,2017,2018,2019,2020,2021]
#总页数
pages = [2,1,1,1,11,1,19,7] year = year[qq]
pages = pages[qq] if not os.path.isdir(f'D:/桌面/爬虫-银行/中国证券网/{query}'): # 如果没有此文件夹
os.mkdir(f'D:/桌面/爬虫-银行/中国证券网/{query}') # 创建此文件夹 m = 0
for p in range(1, pages + 1):
url = f'http://search.cs.com.cn/search?page={p}&channelid=215308&searchword={query}&keyword={query}&token=12.1462412070719.47&perpage=10&outlinepage=5&&andsen=&total=&orsen=&exclude=&searchscope=&timescope=&timescopecolumn=&orderby=&timeline=={year}' dic = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"} resp = requests.get(url, headers=dic, )
resp.encoding = 'utf-8'
# print(resp) print(f'\n>>>--------------------第{p}页---------------------<<<\n')
print(f'\n>>>--------------------第{p}页---------------------<<<\n')
print(f'\n>>>--------------------第{p}页---------------------<<<\n') # print(resp.text)
page = BeautifulSoup(resp.text, "html.parser") # 指定html解析器 alist = page.find_all("table")
datalist = []
for ii in alist:
ss=ii.find('td', style='font-size: 12px;line-height: 24px;color: #333333;margin-top: 4px;')
# print('ss=\n\n',ss)
if ss != None:
ss = ss.get_text()
datalist.append(ss) # print('data:',datalist,len(datalist)) if not os.path.isdir(f'D:/桌面/爬虫-银行/中国证券网/{query}/{year}'): # 如果没有此文件夹
os.mkdir(f'D:/桌面/爬虫-银行/中国证券网/{query}/{year}') # 创建此文件夹 for ii in range(len(datalist)):
fp = open(f'D:/桌面/爬虫-银行/中国证券网/{query}/{year}/({year}){ii + m + 1}.txt', 'w+', encoding='utf-8')
fp.write(datalist[ii] + '\n') # 只包含文本
print(datalist[ii])
print(f'\n> > >{year}年,第{p}页,第{ii + 1}篇,成功! < < <')
fp.close()
m = m + len(datalist) + 1 print('----------------------------')
print(f'------\n{year}年,爬取完毕----')
print('----------------------------')

历史优化记录:01_中证网.py

# coding=utf-8
import requests
from bs4 import BeautifulSoup
import io
import sys sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030') # 改变标准输出的默认编码 query = input("【中证网】请输入你想搜索的内容:")
pages = int(input("要爬取的页数(不小于1):"))
if pages < 1:
exit() url = f'http://search.cs.com.cn/search?channelid=215308&perpage=&templet=&token=12.1462412070719.47&searchword={query}' dic = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 "
"Safari/537.36 SLBrowser/7.0.0.6241 SLBChan/30"} resp = requests.get(url, headers=dic, )
resp.encoding = 'utf-8'
# print(resp) # print(resp.text)
page = BeautifulSoup(resp.text, "html.parser") # 指定html解析器 alist = page.find("table").find_all("a") # print(alist) weblist = []
for a in alist:
if a.get('href')[:5] == "https":
weblist.append(a.get('href')) # ----------------单页每个文章---------------------------------
m = 0 for ii in range(len(weblist)): url_a = weblist[ii] # print('0=',url_a) dic_a = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 "
"Safari/537.36 SLBrowser/7.0.0.6241 SLBChan/30"} resp_a = requests.get(url_a, headers=dic_a, )
resp_a.encoding = 'gbk' # print('New:\n',resp_a.text) page_a = BeautifulSoup(resp_a.text, "html.parser") # 指定html解析器 # print('123:\n',page_a) page_b = page_a.find('section').find_all('p') # print(page_b)
fp=open(f'D:/桌面/爬虫-银行/中国证券网/中国银行/{ii+1}.txt','w+',encoding='utf-8') txt_list = []
for txt_a in page_b:
# print(txt_a.text)
txt_list.append(txt_a.text) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ++++++++++++++++++++++文本写入+++++++++++++++++++++++++++++++ for i in range(len(txt_list)):
fp.write(txt_list[i] + '\n') # 只包含文本 fp.close()
print(f'>>{ii+1}成功!')
m = ii+1 # +-+++-----------++++++++++-----多页------++++++++++++----------++++ if pages > 1:
for p in range(pages):
url_s = f"http://search.cs.com.cn/search?page={p+1}&channelid=215308&searchword={query}" resp = requests.get(url, headers=dic, )
resp.encoding = 'utf-8'
# print(resp) # print(resp.text)
page = BeautifulSoup(resp.text, "html.parser") # 指定html解析器 alist = page.find("table").find_all("a") # print(alist) weblist = []
for a in alist:
if a.get('href')[:5] == "https":
weblist.append(a.get('href')) # ----------------单页每个文章--------------------------------- for ii in range(len(weblist)): url_a = weblist[ii] # print('0=',url_a) dic_a = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 "
"Safari/537.36 SLBrowser/7.0.0.6241 SLBChan/30"} resp_a = requests.get(url_a, headers=dic_a, )
resp_a.encoding = 'gbk' # print('New:\n',resp_a.text) page_a = BeautifulSoup(resp_a.text, "html.parser") # 指定html解析器 # print('123:\n',page_a) page_b = page_a.find('section').find_all('p') # print(page_b)
fp = open(f'D:/桌面/爬虫-银行/中国证券网/中国银行/{ii + 1 + m}.txt', 'w+', encoding='utf-8') txt_list = []
for txt_a in page_b:
# print(txt_a.text)
txt_list.append(txt_a.text) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ++++++++++++++++++++++文本写入+++++++++++++++++++++++++++++++ for i in range(len(txt_list)):
fp.write(txt_list[i] + '\n') # 只包含文本 print(f'>>{ii + 1 + m}成功!')
m = m + ii + 1 fp.close() print('---------------\n>>>爬取完毕<<<')

历史优化记录:02_中证网.py

# coding=utf-8
import requests
from bs4 import BeautifulSoup
import io
import sys sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030') # 改变标准输出的默认编码 query = input("【中证网】请输入你想搜索的内容:")
pages = int(input("要爬取的页数(不小于1):"))
if pages < 1:
exit() url = f'http://search.cs.com.cn/search?page=1&channelid=215308&searchword={query}' dic = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 "
"Safari/537.36 SLBrowser/7.0.0.6241 SLBChan/30"} resp = requests.get(url, headers=dic, )
resp.encoding = 'utf-8'
# print(resp) # print(resp.text)
page = BeautifulSoup(resp.text, "html.parser") # 指定html解析器 alist = page.find("table").find_all("a") # print(alist) weblist = []
for a in alist:
if a.get('href')[:5] == "https":
weblist.append(a.get('href')) # ----------------单页每个文章---------------------------------
m = 0 for ii in range(len(weblist)): url_a = weblist[ii] # print('0=',url_a) dic_a = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 "
"Safari/537.36 SLBrowser/7.0.0.6241 SLBChan/30"} resp_a = requests.get(url_a, headers=dic_a, )
resp_a.encoding = 'gbk' # print('New:\n',resp_a.text) page_a = BeautifulSoup(resp_a.text, "html.parser") # 指定html解析器 # print('123:\n',page_a) page_b = page_a.find('section').find_all('p') # print(page_b)
fp=open(f'D:/桌面/爬虫-银行/中国证券网/中国银行/0/(2021){ii+1}.txt','w+',encoding='utf-8') txt_list = []
for txt_a in page_b:
# print(txt_a.text)
txt_list.append(txt_a.text) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ++++++++++++++++++++++文本写入+++++++++++++++++++++++++++++++ for i in range(len(txt_list)):
fp.write(txt_list[i] + '\n') # 只包含文本 fp.close()
print(f'>>{ii+1}成功!')
m = ii+1 # +-+++-----------++++++++++-----多页------++++++++++++----------++++
# +-+++-----------++++++++++-----多页------++++++++++++----------++++ if pages > 1:
for p in range(pages):
url_s = f"http://search.cs.com.cn/search?page={p+1}&channelid=215308&searchword={query}" resp = requests.get(url, headers=dic, )
resp.encoding = 'utf-8'
# print(resp) # print(resp.text)
page = BeautifulSoup(resp.text, "html.parser") # 指定html解析器 alist = page.find("table").find_all("a") # print(alist) weblist = []
for a in alist:
if a.get('href')[:5] == "https":
weblist.append(a.get('href')) # ----------------单页每个文章--------------------------------- for ii in range(len(weblist)): url_a = weblist[ii] # print('0=',url_a) dic_a = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 "
"Safari/537.36 SLBrowser/7.0.0.6241 SLBChan/30"} resp_a = requests.get(url_a, headers=dic_a, )
resp_a.encoding = 'gbk' # print('New:\n',resp_a.text) page_a = BeautifulSoup(resp_a.text, "html.parser") # 指定html解析器 # print('123:\n',page_a) page_b = page_a.find('section').find_all('p') # print(page_b)
fp = open(f'D:/桌面/爬虫-银行/中国证券网/中国银行/0/(2021){ii + 1 + m}.txt', 'w+', encoding='utf-8') txt_list = []
for txt_a in page_b:
# print(txt_a.text)
txt_list.append(txt_a.text) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ++++++++++++++++++++++文本写入+++++++++++++++++++++++++++++++ for i in range(len(txt_list)):
fp.write(txt_list[i] + '\n') # 只包含文本 print(f'>>{ii + 1 + m}成功!')
m = m + ii + 1 fp.close() print('---------------\n>>>爬取完毕<<<')

历史优化记录:03_中证网.py

# coding=utf-8
import requests
from bs4 import BeautifulSoup
import io
import sys sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030') # 改变标准输出的默认编码 query = input("【中证网】请输入你想搜索的内容:")
pages = int(input("要爬取的页数(不小于1):"))
if pages < 1:
exit() m = 0
for p in range(1,pages+1):
url = f'http://search.cs.com.cn/search?page={p}&channelid=215308&searchword={query}&perpage=10&outlinepage=5&&andsen=&total=&orsen=&exclude=&searchscope=&timescope=&timescopecolumn=&orderby=&timeline==2021' dic = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"} resp = requests.get(url, headers=dic, )
resp.encoding = 'utf-8'
# print(resp) print(f'\n>>>--------------------第{p}页---------------------<<<\n')
print(f'\n>>>--------------------第{p}页---------------------<<<\n')
print(f'\n>>>--------------------第{p}页---------------------<<<\n') # print(resp.text)
page = BeautifulSoup(resp.text, "html.parser") # 指定html解析器 alist = page.find("table").find_all('a') weblist = [] for a in alist:
if a.get('href')[:5] == "https":
weblist.append(a.get('href'))
# print('weblist==',weblist)
# ----------------单页每个文章--------------------------------- for ii in range(len(weblist)): url_a = weblist[ii] # print('0=',url_a) dic_a = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"} resp_a = requests.get(url_a, headers=dic_a, )
resp_a.encoding = 'gbk' # print('New:\n',resp_a.text) page_a = BeautifulSoup(resp_a.text, "html.parser") # 指定html解析器 # print('123:\n',page_a) page_b = page_a.find('section').find_all('p') # print(page_b)
fp=open(f'D:/桌面/爬虫-银行/中国证券网/中国银行/2021/(2021){ii+m+1}.txt','w+',encoding='utf-8') txt_list = []
for txt_a in page_b:
# print('txt_a===',txt_a.text)
txt_list.append(txt_a.text)
print(f'\n-++++++++++++++++++第{ii+1}篇文章++++++++++++++++-\n',txt_list,len(txt_list))
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ++++++++++++++++++++++文本写入+++++++++++++++++++++++++++++++ for i in range(len(txt_list)):
fp.write(txt_list[i] + '\n') # 只包含文本 # print('-----------------------------------')
print(f'\n> > >{ii+1}成功! < < <')
fp.close()
m=m+len(weblist)+1 print('---------------\n>>>爬取完毕<<<')

历史优化记录:04_中证网(网址筛选问题).py

# coding=utf-8
import requests
from bs4 import BeautifulSoup
import io
import sys sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030') # 改变标准输出的默认编码 query = input("【中证网】请输入你想搜索的内容:")
pages = int(input("要爬取的页数(不小于1):"))
if pages < 1:
exit() m = 0
for p in range(1,pages+1):
url = f'http://search.cs.com.cn/search?page={pages}&channelid=215308&searchword={query}&keyword={query}&token=12.1462412070719.47&perpage=10&outlinepage=5&&andsen=&total=&orsen=&exclude=&searchscope=&timescope=&timescopecolumn=&orderby=&timeline==2020' dic = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"} resp = requests.get(url, headers=dic, )
resp.encoding = 'utf-8'
# print(resp) print(f'\n>>>--------------------第{p}页---------------------<<<\n')
print(f'\n>>>--------------------第{p}页---------------------<<<\n')
print(f'\n>>>--------------------第{p}页---------------------<<<\n') # print(resp.text)
page = BeautifulSoup(resp.text, "html.parser") # 指定html解析器 alist = page.find("table").find_all('a') print('alist:',alist) weblist = [] for a in alist:
if a.get('href')[4:] == "http":
weblist.append(a.get('href')) print('weblist==',weblist) # ----------------单页每个文章--------------------------------- for ii in range(len(weblist)): url_a = weblist[ii] # print('0=',url_a) dic_a = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"} resp_a = requests.get(url_a, headers=dic_a, )
resp_a.encoding = 'gbk' # print('New:\n',resp_a.text) page_a = BeautifulSoup(resp_a.text, "html.parser") # 指定html解析器 # print('123:\n',page_a) page_b = page_a.find('section').find_all('p') # print(page_b)
fp=open(f'D:/桌面/爬虫-银行/中国证券网/中国银行/2020/(2020){ii+m+1}.txt','w+',encoding='utf-8') txt_list = []
for txt_a in page_b:
# print('txt_a===',txt_a.text)
txt_list.append(txt_a.text)
print(f'\n-++++++++++++++++++第{ii+1}篇文章++++++++++++++++-\n',txt_list,len(txt_list))
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ++++++++++++++++++++++文本写入+++++++++++++++++++++++++++++++ for i in range(len(txt_list)):
fp.write(txt_list[i] + '\n') # 只包含文本 # print('-----------------------------------')
print(f'\n> > >{ii+1}成功! < < <')
fp.close()
m=m+len(weblist)+1 print('---------------\n>>>爬取完毕<<<')

历史优化记录:05_中证网.py

# coding=utf-8
import requests
from bs4 import BeautifulSoup
import io
import sys sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030') # 改变标准输出的默认编码 query = input("【中证网】请输入你想搜索的内容:")
year = int(input('要爬取的年份:'))
pages = int(input("要爬取的页数(不小于1):")) if pages < 1:
exit() m = 0
for p in range(1, pages + 1):
url = f'http://search.cs.com.cn/search?page={p}&channelid=215308&searchword={query}&keyword={query}&token=12.1462412070719.47&perpage=10&outlinepage=5&&andsen=&total=&orsen=&exclude=&searchscope=&timescope=&timescopecolumn=&orderby=&timeline=={year}' dic = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"} resp = requests.get(url, headers=dic, )
resp.encoding = 'utf-8'
# print(resp) print(f'\n>>>--------------------第{p}页---------------------<<<\n')
print(f'\n>>>--------------------第{p}页---------------------<<<\n')
print(f'\n>>>--------------------第{p}页---------------------<<<\n') # print(resp.text)
page = BeautifulSoup(resp.text, "html.parser") # 指定html解析器 alist = page.find("table").find('tr').find_all('a') # print('alist:', alist) weblist = [] for a in alist:
if a.get('href')[:4] == "http":
weblist.append(a.get('href')) print('weblist==', weblist) # ----------------单页每个文章--------------------------------- for ii in range(len(weblist)): url_a = weblist[ii] # print('0=',url_a) dic_a = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"} resp_a = requests.get(url_a, headers=dic_a, )
resp_a.encoding = 'gbk' # print('New:\n',resp_a.text) page_a = BeautifulSoup(resp_a.text, "html.parser") # 指定html解析器 # print('123:\n',page_a) page_b = page_a.find_all('p') # print(page_b)
fp = open(f'D:/桌面/爬虫-银行/中国证券网/中国银行/{year}/({year}){ii + m + 1}.txt', 'w+', encoding='utf-8') txt_list = []
for txt_a in page_b:
# print('txt_a===',txt_a.text)
txt_list.append(txt_a.text)
print(f'\n-++++++++++++++++++第{ii + 1}篇文章++++++++++++++++-\n', txt_list, len(txt_list))
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ++++++++++++++++++++++文本写入+++++++++++++++++++++++++++++++ for i in range(len(txt_list)):
fp.write(txt_list[i] + '\n') # 只包含文本 # print('-----------------------------------')
print(f'\n> > >{ii + 1}成功! < < <')
fp.close()
m = m + len(weblist) + 1 print('---------------\n>>>爬取完毕<<<')

历史优化记录:06_中证网(Plus).py

# coding=utf-8
import requests
from bs4 import BeautifulSoup
import io
import sys
import os sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030') # 改变标准输出的默认编码 # query = input("【中证网】请输入你想搜索的内容:")
query = '交通银行'
year = int(input('要爬取的年份:'))
pages = int(input("要爬取的页数(不小于1):")) if pages < 1:
exit() m = 0
for p in range(1, pages + 1):
url = f'http://search.cs.com.cn/search?page={p}&channelid=215308&searchword={query}&keyword={query}&token=12.1462412070719.47&perpage=10&outlinepage=5&&andsen=&total=&orsen=&exclude=&searchscope=&timescope=&timescopecolumn=&orderby=&timeline=={year}' dic = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"} resp = requests.get(url, headers=dic, )
resp.encoding = 'utf-8'
# print(resp) print(f'\n>>>--------------------第{p}页---------------------<<<\n')
print(f'\n>>>--------------------第{p}页---------------------<<<\n')
print(f'\n>>>--------------------第{p}页---------------------<<<\n') # print(resp.text)
page = BeautifulSoup(resp.text, "html.parser") # 指定html解析器 alist = page.find_all("table")
datalist = []
for ii in alist:
ss=ii.find('td', style='font-size: 12px;line-height: 24px;color: #333333;margin-top: 4px;')
# print('ss=\n\n',ss)
if ss != None:
ss = ss.get_text()
datalist.append(ss) # print('data:',datalist,len(datalist)) if not os.path.isdir(f'D:/桌面/爬虫-银行/中国证券网/{query}/{year}'): # 如果没有此文件夹
os.mkdir(f'D:/桌面/爬虫-银行/中国证券网/{query}/{year}') # 创建此文件夹 for ii in range(len(datalist)):
fp = open(f'D:/桌面/爬虫-银行/中国证券网/{query}/{year}/({year}){ii + m + 1}.txt', 'w+', encoding='utf-8')
fp.write(datalist[ii] + '\n') # 只包含文本
print(datalist[ii])
print(f'\n> > >第{p}页,第{ii + 1}篇,成功! < < <')
fp.close()
m = m + len(datalist) + 1 print('----------------------------')
print(f'------\n{year}年,爬取完毕----')
print('----------------------------')

Python网络爬虫 - 爬取中证网银行相关信息的更多相关文章

  1. 如何利用Python网络爬虫爬取微信朋友圈动态--附代码(下)

    前天给大家分享了如何利用Python网络爬虫爬取微信朋友圈数据的上篇(理论篇),今天给大家分享一下代码实现(实战篇),接着上篇往下继续深入. 一.代码实现 1.修改Scrapy项目中的items.py ...

  2. 利用Python网络爬虫爬取学校官网十条标题

    利用Python网络爬虫爬取学校官网十条标题 案例代码: # __author : "J" # date : 2018-03-06 # 导入需要用到的库文件 import urll ...

  3. 如何用Python网络爬虫爬取网易云音乐歌曲

    今天小编带大家一起来利用Python爬取网易云音乐,分分钟将网站上的音乐down到本地. 跟着小编运行过代码的筒子们将网易云歌词抓取下来已经不再话下了,在抓取歌词的时候在函数中传入了歌手ID和歌曲名两 ...

  4. 04 Python网络爬虫 <<爬取get/post请求的页面数据>>之requests模块

    一. urllib库 urllib是Python自带的一个用于爬虫的库,其主要作用就是可以通过代码模拟浏览器发送请求.其常被用到的子模块在Python3中的为urllib.request和urllib ...

  5. Python网络爬虫-爬取微博热搜

    微博热搜的爬取较为简单,我只是用了lxml和requests两个库 url=https://s.weibo.com/top/summary?Refer=top_hot&topnav=1& ...

  6. python网络爬虫&&爬取网易云音乐

    #爬取网易云音乐 url="https://music.163.com/discover/toplist" #歌单连接地址 url2 = 'http://music.163.com ...

  7. 如何利用Python网络爬虫抓取微信朋友圈的动态(上)

    今天小编给大家分享一下如何利用Python网络爬虫抓取微信朋友圈的动态信息,实际上如果单独的去爬取朋友圈的话,难度会非常大,因为微信没有提供向网易云音乐这样的API接口,所以很容易找不到门.不过不要慌 ...

  8. 如何利用Python网络爬虫抓取微信好友数量以及微信好友的男女比例

    前几天给大家分享了利用Python网络爬虫抓取微信朋友圈的动态(上)和利用Python网络爬虫爬取微信朋友圈动态——附代码(下),并且对抓取到的数据进行了Python词云和wordart可视化,感兴趣 ...

  9. 利用Python网络爬虫抓取微信好友的所在省位和城市分布及其可视化

    前几天给大家分享了如何利用Python网络爬虫抓取微信好友数量以及微信好友的男女比例,感兴趣的小伙伴可以点击链接进行查看.今天小编给大家介绍如何利用Python网络爬虫抓取微信好友的省位和城市,并且将 ...

随机推荐

  1. laravel 框架 知识点

    get 方法返回一个包含 Illuminate\Support\Collection 实例的结果,其中每一条记录都是 PHP stdClass 对象的一个实例.你可以通过对象属性的方式来获取每个字段的 ...

  2. Spring Bean生命周期,好像人的一生。。

    大家好,我是老三,上节我们手撸了一个简单的IOC容器五分钟,手撸一个Spring容器!,这节我们来看一看Spring中Bean的生命周期,我发现,和人的一生真的很像. 简单说说IoC和Bean IoC ...

  3. 使用Vscode和Cmake打造跨平台的C++ IDE

    准备工作 Viusal Studio Code 64位 :Download Visual Studio Code - Mac, Linux, Windows Cmake 3.4 :Download | ...

  4. git命令新建远程分支并推送,切换远程地址

    最近记性不好,老是忘记操作命令,记录下一下新建远程分支和切换.删除远程地址的命令: 1.查看当前分支:  git branch 2.查看所有分支:git branch -a 3.切换分支:git ch ...

  5. python基础之序列类型的方法——字符串方法

    python基础之序列类型的方法--字符串方法 Hello大家好,我是python学习者小杨同学,经过一段时间的沉淀(其实是偷懒不想更新),我终于想起了自己的博客账号,所以这次带来的是序列方法的后半部 ...

  6. spring——通过注解显式的完成自动装配

    构建bean文件: public class People { private String name = "小明"; } 编写配置类: @Configuration @Impor ...

  7. BUAA_DS_北航数据结构:输出全排列

    输入一个数 \(n\),输出 \(1\sim n\) 的所有全排列,每个排列占一行,每个字符保留 \(5\) 个场宽.勤奋的同学一定已经开始打表了是吧. 说是能做肯定不是骗大家,那怎么做呢~ 其实回溯 ...

  8. maven在idea中的一点使用技巧

    maven在idea中的一点使用技巧 idea已经支持将参数的意思也展示出来,确实很方便. -U是强制拉取,因为如果拉取某个jar包,失败了,那么在一段时间内,idea不会重试,除非指定-U. -X呢 ...

  9. JDK中哪些类是不能继承的?

    不能继承的是类是那些用final关键字修饰的类. 实际上即使我们自己开发的类,也可以通过使用final修饰来阻止被继承.通过使用final修饰一个类,可以阻止该类被继承,这样该类就被完全地封闭起来了, ...

  10. 如何使用双重检查锁定在 Java 中创建线程安全的单例?

    这个 Java 问题也常被问: 什么是线程安全的单例,你怎么创建它.好吧,在Java 5之前的版本, 使用双重检查锁定创建单例 Singleton 时,如果多个线程试图同时创建 Singleton 实 ...