/*引入模块*/ var http = require('http') var url = 'http://www.cnblogs.com/txxt' var cheerio = require('cheerio') /*过滤函数*/ function filter(html) { var $ = cheerio.load(html) var titleData = []; var title = $('.postTitle').text(); console.log(title) } /*数据
抓取豆瓣读书中的(http://book.douban.com/)最受关注图书,按照评分排序,并保存至txt文件中,需要抓取书籍的名称,作者,评分,体裁和一句话评 方法一: #coding=utf-8 from selenium import webdriver from time import sleep class DoubanPopularBook: def __init__(self): self.dr = webdriver.Chrome() self.popular_books_li
抓取豆瓣电影(http://movie.douban.com/nowplaying/chengdu/)中的正在热映前12部电影,并按照评分排序,保存至txt文件 #coding=utf-8 from selenium import webdriver import unittest from time import sleep class DoubanMovie(unittest.TestCase): def setUp(self): self.dr = webdriver.Chrome() s
CasperJS is a navigation scripting & testing utility for the PhantomJS (WebKit) and SlimerJS (Gecko) headless browsers, written in Javascript. PhantomJS是基于WebKit内核的headless browser SlimerJS则是基于Gecko内核的headless browser Headless browser: 无界面显示的浏览器,可以用于