scrapy_ip_agent
#File name is rotate_useragent
# -*- coding: UTF-8 -*-
import random
import urllib2
import redis
from scrapy.contrib.downloadermiddleware.useragent import UserAgentMiddleware
from CrawlerTools.ScrapyFileSystem.config import *
class RotateUserAgentMiddleware(UserAgentMiddleware):
def __inti__(self,user_agent=""):
self.user_agent=user_agent
def process_request(self,request,spider):
user_agent_list=["Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 "
"(KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 "
"(KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 "
"(KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 "
"(KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 "
"(KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 "
"(KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 "
"(KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 "
"(KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 "
"(KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24" ]
ua=random.choice(user_agent_list)
if ua:
request.headers.setdefault("User-Agent",ua)
#Get ip
cfg=config("Redis")
redisId=redis.Redis(cfg["host"],cfg["port"],1,cfg["pwd"])
res=redisId.srandmember("ipPool",1)[0].strip()
urls=res.split(":")
request.meta['proxy'] ="http://"+str(urls[0])+":"+str(urls[1])
# Use the following lines if your proxy requires authentication
#Configuration profile
DOWNLOADER_MIDDLEWARES = {
'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware' : None,
'CrawlerTools.rotate_useragent.RotateUserAgentMiddleware' :400
}
scrapy_ip_agent的更多相关文章
随机推荐
- cocos2d-x获得系统的语言
获得手机系统的语言 CCSize winSize = CCDirector::sharedDirector()->getWinSize(); CCLabelTTF *label = CCLabe ...
- oracle 监测数据库是否存在指定字段
public static bool ExistColumn(string tableName, string columnName, string connStr) { using (OracleC ...
- CodeForces 589D Boulevard (数学,相遇)
题意:给定 n 个的在 x 轴上的坐标,和开始时间,结束坐标,从起点向终点走,如果和其他人相遇,就互相打招乎,问你每人打招乎的次数. 析:其实这一个数学题,由于 n 比较小,我们就可以两两暴力,这两个 ...
- fixed 定位 苹果手机输入框触发时内容全部隐藏
问题出现在东钿微信公众号用户注册页面 页面中只有两个输入框 页面没有超过一屏,悬浮按钮也要出现在本页面 ,开始布局页面的时候没什么问题,然后我在我自己手机上测试 ,输入手机号码,非常奇怪的问题出现了, ...
- 《解剖PetShop》系列转载
1 <解剖PetShop>系列之六 PetShop之表示层设计 http://ityup.com/showtopic-8.html 2 <解剖PetShop>系列之五 ...
- Nuget~让包包带上自己的配置信息
我们知道一般开发组件之后,组件都有相关配置项,最常见的作法就是把它写到web.config里,而如果你将这个文件直接放到nuget里打包,在进行安装包包时,会提示你这个文件已经存在,不能去覆盖原来的c ...
- static关键字修饰类
今天继续研究公司的新项目,据说是京东的架构,研究看了一番,果然很牛逼,大致是Maven+spingmvc+spring+mybatis+ehcache+velocity来搭建的,数据库用的是mysql ...
- Android EditText载入HTML内容(内容包括网络图片)
android中的Html.fromHtml能够用来载入HTML的内容.fromHtml有三个參数须要设置,第一个是要显示的html内容,第二个就是要说的重点,ImageGetter,用来处理图片载入 ...
- Codeforces Beta Round #51 D. Beautiful numbers 数位dp
D. Beautiful numbers Time Limit: 20 Sec Memory Limit: 256 MB 题目连接 http://codeforces.com/contest/55/p ...
- Communications link failure报错的处理
一.报错的问题: 测试环境在做压力测试的时候爆出错误 com.mysql.jdbc.exceptions.jdbc4.CommunicationsException: Communications l ...