scrapy_ip_agent
#File name is rotate_useragent
# -*- coding: UTF-8 -*-
import random
import urllib2
import redis
from scrapy.contrib.downloadermiddleware.useragent import UserAgentMiddleware
from CrawlerTools.ScrapyFileSystem.config import *
class RotateUserAgentMiddleware(UserAgentMiddleware):
def __inti__(self,user_agent=""):
self.user_agent=user_agent
def process_request(self,request,spider):
user_agent_list=["Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 "
"(KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 "
"(KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 "
"(KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 "
"(KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 "
"(KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 "
"(KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 "
"(KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 "
"(KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 "
"(KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24" ]
ua=random.choice(user_agent_list)
if ua:
request.headers.setdefault("User-Agent",ua)
#Get ip
cfg=config("Redis")
redisId=redis.Redis(cfg["host"],cfg["port"],1,cfg["pwd"])
res=redisId.srandmember("ipPool",1)[0].strip()
urls=res.split(":")
request.meta['proxy'] ="http://"+str(urls[0])+":"+str(urls[1])
# Use the following lines if your proxy requires authentication
#Configuration profile
DOWNLOADER_MIDDLEWARES = {
'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware' : None,
'CrawlerTools.rotate_useragent.RotateUserAgentMiddleware' :400
}
scrapy_ip_agent的更多相关文章
随机推荐
- Gym 100507A About Grisha N. (水题)
About Grisha N. 题目链接: http://acm.hust.edu.cn/vjudge/contest/126546#problem/A Description Grisha N. t ...
- POJ 1287 Networking (最小生成树)
Networking 题目链接: http://acm.hust.edu.cn/vjudge/contest/124434#problem/B Description You are assigned ...
- CodeForces 455A Boredom (DP)
Boredom 题目链接: http://acm.hust.edu.cn/vjudge/contest/121334#problem/G Description Alex doesn't like b ...
- 在Hibernate中使用HibernateTemplate来进行包含sql语句的查询
/** * 使用sql语句进行查询操作 * @param sql * @return */ public List queryWithSql(final Stri ...
- Node.js Crypto 加密算法库
Crypto库是随Nodejs内核一起打包发布的,主要提供了加密.解密.签名.验证等功能.Crypto利用OpenSSL库来实现它的加密技术,它提供OpenSSL中的一系列哈希方法,包括hmac.ci ...
- 深入浅出之Smarty模板引擎工作机制(一)
深入浅出Smarty模板引擎工作机制,我们将对比使用smarty模板引擎和没使用smarty模板引擎的两种开发方式的区别,并动手开发一个自己的模板引擎,以便加深对smarty模板引擎工作机制的理解. ...
- PostgreSQL的 initdb 源代码分析之十
继续分析, 如下这段,因为条件不成立,被跳过: /* Create transaction log symlink, if required */ ) { fprintf(stderr,"I ...
- PostgreSQL中,database,schema,table之间关系
从逻辑上看,schema,table,都是位于database之下. 首先,在postgres数据库下建立表(相当于建立在public schema下): [pgsql@localhost bin]$ ...
- TcxDBLookupCombobox 级联时第二级不显示正确内容的处理方法
在使用两个级联的 TcxDBLookupCombobox 时,会出现这种情况:当第一级的内容变更后,第二级的控件在界面上显示的文本不变化,即使数据集已经通过 Properites.OnChange 事 ...
- iOS GCD 与 NSOperationQueue
NSOperationQueue ios NSOperation vs. GCD StackOverflow: NSOperation vs. Grand Central Dispatch Blog: ...