python3小demo
总结常用的功能小实例,快速学习并掌握python技能
1.墨迹天气
- import requests
- from lxml.html import etree
- import json
- import time # 导入模块
- class MoJiWeather():
- def city_name(self): # 定义一个输入城市名称的函数
- cityname = str(input("输入城市名称:"))
- return cityname
- def search_city(city_name):# 搜索这个城市
- index_url = "http://tianqi.moji.com/api/citysearch/%s"%city_name # 构造查询相应城市天气的url
- response = requests.get(index_url)
- response.encoding = "utf-8"
- try:# 异常捕获
- city_id = json.loads(response.text).get('city_list')[].get('cityId')# 通过上面的url获取城市的id
- city_url = "http://tianqi.moji.com/api/redirect/%s"%str(city_id) # 通过城市id获取城市天气
- return city_url
- except:
- print('城市名输入错误')
- exit()
- def parse(city_url):# 解析函数
- response = requests.get(city_url)
- response.encoding = 'utf-8'
- html = etree.HTML(response.text)
- current_city = html.xpath("//div[@class='search_default']/em/text()")[]# 下面都是利用xpath解析的
- print('当前城市:'+current_city)
- current_kongqi = html.xpath("//div[@class='left']/div[@class='wea_alert clearfix']/ul/li/a/em/text()")[]
- print('空气质量:'+current_kongqi)
- current_wendu = html.xpath("//div[@class='left']/div[@class='wea_weather clearfix']/em/text()")[]
- print('当前温度:'+current_wendu+'℃')
- current_weather = html.xpath("//div[@class='wea_weather clearfix']/b/text()")[]
- print('天气状况:' + current_weather)
- current_shidu = html.xpath("//div[@class='left']/div[@class='wea_about clearfix']/span/text()")[]
- print('当前湿度:'+current_shidu)
- current_fengji = html.xpath("//div[@class='left']/div[@class='wea_about clearfix']/em/text()")[]
- print('当前风速:'+current_fengji)
- jingdian = html.xpath("//div[@class='right']/div[@class='near'][2]/div[@class='item clearfix']/ul/li/a/text()")
- print('附近景点:')
- for j in jingdian:
- print('\t\t'+j)
- if __name__ == '__main__':
- print("欢迎使用墨迹天气查询系统")
- city_name = MoJiWeather.city_name()
- city_url = MoJiWeather.search_city(city_name)
- MoJiWeather.parse(city_url)
- print("谢谢使用本查询系统")
- input("按任意键退出...")
2.Tiobe排行榜
- import json
- from lxml import etree
- from lxml.etree import ParseError
- import requests
- from requests.exceptions import RequestException
- '''
- lxml实例应用
- '''
- '''
- 获取页面数据
- '''
- def one_to_page(url):
- headers = {
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36'
- }
- try:
- res = requests.get(url,headers=headers)
- body = res.text #获取网页内容
- except RequestException as e:
- print('request is error',e)
- try:
- html = etree.HTML(body,etree.HTMLParser())
- # tr 下的所有子孙节点(只获取文本数,图片资源不获取)
- result = html.xpath('//table[contains(@class,"table-top20")]/tbody/tr//text()')
- pos =
- for i in range():
- if i == :
- yield result[i:]
- else:
- yield result[pos:pos+]
- pos +=
- except ParseError as e:
- print(e.position)
- '''
- 写入文件
- '''
- def write_file(data):
- for item in data:
- sul = {
- '2018年6月排行': item[],
- '2017年6排行': item[],
- '开发语言': item[],
- '评级': item[],
- '变化率': item[]
- }
- # with 更好处理异常情况,进行文件的关闭后续工作
- with open('test.txt','a',encoding='utf-8') as f:
- f.write(json.dumps(sul,ensure_ascii=False)+'\n')
- f.close()
- print(sul)
- return None
- '''
- 主程序
- '''
- def main():
- url = 'https://www.tiobe.com/tiobe-index/'
- data = one_to_page(url)
- ret = write_file(data)
- if ret == None:
- print('ok')
- if __name__ == '__main__':
- main()
3.新闻列表
- '''
- 墨迹天气文章爬虫
- '''
- import requests
- import json
- from lxml.html import etree
- from lxml.etree import ParseError
- '''
- 解析页面内容
- '''
- def parseHtml(content):
- try:
- html = etree.HTML(content,etree.HTMLParser())
- # one = html.xpath('//ul[@class="advisory_list_item"]//text()')
- one = html.xpath('//ul[@class="advisory_list_item"]//li/a/@href')
- print(one)
- exit()
- LOOP =
- pos =
- for i in range():
- if i == :
- yield one[:LOOP]
- else:
- yield one[pos:pos+LOOP]
- pos += LOOP
- except ParseError as e:
- print(e.position)
- '''
- 写入文件
- '''
- def write_log(data):
- for item in data:
- msg = {
- '发文时间':item[],
- '文章标题':item[]
- }
- with open('moji.log','a',encoding='utf-8') as f:
- f.write(json.dumps(msg,ensure_ascii=False)+'\n')
- f.close()
- print(msg)
- return None
- '''
- 主程序
- '''
- def main():
- for page in range(,):
- url = 'https://tianqi.moji.com/news/list/moji/{}'.format(page)
- res = requests.get(url)
- res.encoding = 'utf-8'
- content = parseHtml(res.text)
- ret = write_log(content)
- if ret is None:
- print('ok')
- if __name__ == '__main__':
- main()
4.爬取IP
- import requests
- import re
- import random
- from bs4 import BeautifulSoup
- ua_list = [
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36",
- "Mozilla / 5.0(Windows NT 6.1;WOW64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 45.0.2454.101Safari / 537.36"
- ]
- def ip_parse_xici(page):
- """
- :param page: 采集的页数
- :return:
- """
- ip_list = []
- for pg in range(, int(page)):
- url = 'http://www.xicidaili.com/nn/' + str(pg)
- user_agent = random.choice(ua_list)
- my_headers = {
- 'Accept': 'text/html, application/xhtml+xml, application/xml;',
- 'Accept-Encoding': 'gzip, deflate, sdch',
- 'Accept-Language': 'zh-CN,zh;q=0.8',
- 'Referer': 'http: // www.xicidaili.com/nn',
- 'User-Agent': user_agent
- }
- try:
- r = requests.get(url, headers=my_headers)
- soup = BeautifulSoup(r.text, 'html.parser')
- except requests.exceptions.ConnectionError:
- print('ConnectionError')
- else:
- data = soup.find_all('td')
- # 定义IP和端口Pattern规则
- ip_compile = re.compile(r'<td>(\d+\.\d+\.\d+\.\d+)</td>') # 匹配IP
- port_compile = re.compile(r'<td>(\d+)</td>') # 匹配端口
- ips = re.findall(ip_compile, str(data)) # 获取所有IP
- ports = re.findall(port_compile, str(data)) # 获取所有端口
- check_api = "http://ip.taobao.com/service/getIpInfo2.php?ip="
- for i in range(len(ips)):
- if i < len(ips):
- ip = ips[i]
- api = check_api + ip
- api_headers = {
- 'User-Agent': user_agent
- }
- try:
- response = requests.get(url=api, headers=api_headers, timeout=)
- print("ip:%s 可用" % ip)
- except Exception as e:
- print("此ip %s 已失效:%s" % (ip, e))
- del ips[i]
- del ports[i]
- ips_usable = ips
- ip_list += [':'.join(n) for n in zip(ips_usable, ports)] # 列表生成式
- print('第{}页ip采集完成'.format(pg))
- print(ip_list)
- if __name__ == '__main__':
- xici_pg = input("请输入需要采集的页数:")
- ip_parse_xici(page=xici_pg)
python3小demo的更多相关文章
- selenium 3+python3.6+firefox的windows详细环境搭建以及小demo
最近也是学习了下selenium和python,就记录了下在自己工作机上环境的搭建过程以及小demo 1,安装python3.6.1 我是去官网直接下载当前最新版的python3.6.1 官网网址为h ...
- 新手 gulp+ seajs 小demo
首先,不说废话,它的介绍和作者就不在多说了,网上一百度一大堆: 我在这里只是来写写我这2天抽空对seajs的了解并爬过的坑,和实现的一个小demo(纯属为了实现,高手请绕道); 一.环境工具及安装 1 ...
- Nancy之基于Nancy.Hosting.Self的小Demo
继昨天的Nancy之基于Nancy.Hosting.Aspnet的小Demo后, 今天来做个基于Nancy.Hosting.Self的小Demo. 关于Self Hosting Nancy,官方文档的 ...
- Nancy之基于Nancy.Owin的小Demo
前面做了基于Nancy.Hosting.Aspnet和Nancy.Hosting.Self的小Demo 今天我们来做个基于Nancy.Owin的小Demo 开始之前我们来说说什么是Owin和Katan ...
- Nancy之基于Self Hosting的补充小Demo
前面把Hosting Nancy with ASP.NET.Self Hosting Nancy和Hosting Nancy with OWIN 以demo的形式简单描述了一下. 这篇是为Self H ...
- [Unity3D]做个小Demo学习Input.touches
[Unity3D]做个小Demo学习Input.touches 学不如做,下面用一个简单的Demo展示的Input.touches各项字段,有图有真相. 本项目已发布到Github,地址在(https ...
- Android -- 自定义View小Demo,动态画圆(一)
1,转载:(http://blog.csdn.NET/lmj623565791/article/details/24500107),现在如下图的效果: 由上面的效果图可以看到其实是一个在一个圆上换不同 ...
- Win10 FaceAPI小demo开发问题汇总
Win10 FaceAPI小demo开发问题汇总 最近使用微软牛津计划做一个小demo,使用FaceAPI做一个小应用,实现刷脸的功能.开发的过程中用到几个问题,具体如下: Stream 与IRand ...
- 模仿京东顶部搜索条效果制作的一个小demo
最近模仿京东顶部搜索条效果制作的一个小demo,特贴到这里,今后如果有用到可以参考一下,代码如下 #define kScreenWidth [UIScreen mainScreen].bounds.s ...
随机推荐
- Java线程细节
启动一个线程是用 run() 还是 start()?启动一个线程是调用 start()方法,启动线程并调用 run 方法 线程的基本概念.线程的基本状态以及状态之间的关系线程是进程内的并发,没有自已 ...
- maven国内镜像、国内外仓库(直接下载jar)
阿里: https://maven.aliyun.com/mvn/search 官方: http://repo.maven.apache.org/maven2/ maven仓库 阿里巴巴的镜像仓库, ...
- 套接字I/O函数write/read writev/readv send/recv sendto/recvfrom sendmsg/recvmsg
函数原型 read/write系原型 #include <unistd.h> ssize_t read(int fd, void *buf, size_t count); #include ...
- ubuntu16.04修改host上外網
1.打开hosts文件: sudo emacs /etc/hosts 2.加入下面的内容 #chrome同步服务器 203.208.46.132 chrome.google.com203.208.46 ...
- LeetCode 61. 旋转链表(Rotate List)
题目描述 给定一个链表,旋转链表,将链表每个节点向右移动 k 个位置,其中 k 是非负数. 示例 1: 输入: 1->2->3->4->5->NULL, k = 2 输出 ...
- [HTML辅助方法-Html.Raw()的简单应用]
Html.Raw(); 当我们使用 文本编辑器,存入到数据库中的数据会带 html 标签,如果我们需要在前台显示存入时的相同样式,不输出为带有html标签的字符串 ,不通过富文本显示的话,可以通过ht ...
- 浏览器端-W3School-JavaScript:Location 对象
ylbtech-浏览器端-W3School-JavaScript:Location 对象 1.返回顶部 1. Location 对象 Location 对象 Location 对象包含有关当前 URL ...
- nodejs之简单应用与运行
1.nodejs第一个应用,入口函数为http.createServer() var http=require('http');//1.引入 http 模块 //2.用 http 模块创建服务 htt ...
- flum到kafka 收集数据 存储到redis 案例 (ip.txt)
ip.scala package ip import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka. ...
- 错误:expected initializer before "***"
今天写了一个程序,编译时报了一个错误:expected initializer before "***"报错的语句只是程序开头的一个变量定义语句,怎么会有这样的错误呢,琢磨了半天也 ...