Mechanize库浏览页面

#!/usr/bin/python
#coding=utf-8
import mechanize def viewPage(url):
browser = mechanize.Browser()
page = browser.open(url)
source_code = page.read()
print source_code viewPage('http://www.imooc.com/')

使用代理服务器、User-Agent和cookie:

#!/usr/bin/python
#coding=utf-8
import mechanize def testProxy(url, proxy):
browser = mechanize.Browser()
browser.set_proxies(proxy)
page = browser.open(url)
source_code = page.read()
print source_code url = 'http://2017.ip138.com/ic.asp'
hideMeProxy = {'http': '139.196.202.164:9001'}
testProxy(url, hideMeProxy)
#!/usr/bin/python
#coding=utf-8
import mechanize def testUserAgent(url, userAgent):
browser = mechanize.Browser()
browser.addheaders = userAgent
page = browser.open(url)
source_code = page.read()
print source_code url = 'http://whatismyuseragent.dotdoh.com/'
userAgent = [('User-agent', 'Mozilla/5.0 (X11; U; Linux 2.4.2-2 i586; en-US; m18) Gecko/20010131 Netscape6/6.01')]
testUserAgent(url, userAgent)

把代码集成在Python类的AnonBrowser中

#!/usr/bin/python
#coding=utf-8
import mechanize
import cookielib
import random class anonBrowser(mechanize.Browser):
def __init__(self, proxies = [], user_agents = []):
mechanize.Browser.__init__(self)
self.set_handle_robots(False)
# 可供用户使用的代理服务器列表
self.proxies = proxies
# user_agent列表
self.user_agents = user_agents + ['Mozilla/4.0 ', 'FireFox/6.01','ExactSearch', 'Nokia7110/1.0']
self.cookie_jar = cookielib.LWPCookieJar()
self.set_cookiejar(self.cookie_jar)
self.anonymize() # 清空cookie
def clear_cookies(self):
self.cookie_jar = cookielib.LWPCookieJar()
self.set_cookiejar(self.cookie_jar) # 从user_agent列表中随机设置一个user_agent
def change_user_agent(self):
index = random.randrange(0, len(self.user_agents) )
self.addheaders = [('User-agent', ( self.user_agents[index] ))] # 从代理列表中随机设置一个代理
def change_proxy(self):
if self.proxies:
index = random.randrange(0, len(self.proxies))
self.set_proxies( {'http': self.proxies[index]} ) # 调用上述三个函数改变UA、代理以及清空cookie以提高匿名性,其中sleep参数可让进程休眠以进一步提高匿名效果
def anonymize(self, sleep = False):
self.clear_cookies()
self.change_user_agent()
self.change_proxy() if sleep:
time.sleep(60)

测试每次是否使用不同的cookie访问:

#!/usr/bin/python
#coding=utf-8
from anonBrowser import * ab = anonBrowser(proxies=[], user_agents=[('User-agent','superSecretBroswer')]) for attempt in range(1, 5):
# 每次访问都进行一次匿名操作
ab.anonymize()
print '[*] Fetching page'
response = ab.open('http://www.kittenwar.com/')
for cookie in ab.cookie_jar:
print cookie

用BeautifulSoup解析Href链接:

#!/usr/bin/python
#coding=utf-8
from anonBrowser import *
from BeautifulSoup import BeautifulSoup
import os
import optparse
import re def printLinks(url):
ab = anonBrowser()
ab.anonymize()
page = ab.open(url)
html = page.read()
# 使用re模块解析href链接
try:
print '[+] Printing Links From Regex.'
link_finder = re.compile('href="(.*?)"')
links = link_finder.findall(html)
for link in links:
print link
except:
pass
# 使用bs4模块解析href链接
try:
print '\n[+] Printing Links From BeautifulSoup.'
soup = BeautifulSoup(html)
links = soup.findAll(name='a')
for link in links:
if link.has_key('href'):
print link['href']
except:
pass def main():
parser = optparse.OptionParser('[*]Usage: python linkParser.py -u <target url>')
parser.add_option('-u', dest='tgtURL', type='string', help='specify target url')
(options, args) = parser.parse_args()
url = options.tgtURL if url == None:
print parser.usage
exit(0)
else:
printLinks(url) if __name__ == '__main__':
main()

用BeautifulSoup映射图像

#!/usr/bin/python
#coding=utf-8
from anonBrowser import *
from BeautifulSoup import BeautifulSoup
import os
import optparse def mirrorImages(url, dir):
ab = anonBrowser()
ab.anonymize()
html = ab.open(url)
soup = BeautifulSoup(html)
image_tags = soup.findAll('img') for image in image_tags:
# lstrip() 方法用于截掉字符串左边的空格或指定字符
filename = image['src'].lstrip('http://')
filename = os.path.join(dir, filename.replace('/', '_'))
print '[+] Saving ' + str(filename)
data = ab.open(image['src']).read()
# 回退
ab.back()
save = open(filename, 'wb')
save.write(data)
save.close() def main():
parser = optparse.OptionParser('[*]Usage: python imageMirror.py -u <target url> -d <destination directory>')
parser.add_option('-u', dest='tgtURL', type='string', help='specify target url')
parser.add_option('-d', dest='dir', type='string', help='specify destination directory')
(options, args) = parser.parse_args()
url = options.tgtURL
dir = options.dir
if url == None or dir == None:
print parser.usage
exit(0)
else:
try:
mirrorImages(url, dir)
except Exception, e:
print '[-] Error Mirroring Images.'
print '[-] ' + str(e) if __name__ == '__main__':
main()

用Python与谷歌API交互

#!/usr/bin/python
#coding=utf-8
import urllib
from anonBrowser import * def google(search_term):
ab = anonBrowser()
# URL编码
search_term = urllib.quote_plus(search_term)
response = ab.open('https://www.googleapis.com/customsearch/v1?key=你的key&cx=你的id&num=1&alt=json&q=' + search_term)
print response.read() google('Boundock Saint')

接着就对Json格式的数据进行处理,添加json库的load()函数对Json数据进行加载即可

#!/usr/bin/python
#coding=utf-8
import urllib
from anonBrowser import *
import json def google(search_term):
ab = anonBrowser()
# URL编码
search_term = urllib.quote_plus(search_term)
response = ab.open('https://www.googleapis.com/customsearch/v1?key=你的key&cx=你的id&num=1&alt=json&q=' + search_term)
objects = json.load(response)
print objects google('Boundock Saint')

编写Google_Result类,用于保存Json数据解析下来的标题

#!/usr/bin/python
#coding=utf-8
import urllib
from anonBrowser import *
import json
import optparse class Google_Result:
def __init__(self,title,text,url):
self.title = title
self.text = text
self.url = url def __repr__(self):
return self.title def google(search_term):
ab = anonBrowser()
# URL编码
search_term = urllib.quote_plus(search_term)
response = ab.open('https://www.googleapis.com/customsearch/v1?key=你的key&cx=你的id&num=1&alt=json&q=' + search_term)
objects = json.load(response)
results = [] for result in objects['items']:
url = result['link']
title = result['title']
text = result['snippet']
print url
print title
print text
new_gr = Google_Result(title, text, url)
results.append(new_gr)
return results def main():
parser = optparse.OptionParser('[*]Usage: python anonGoogle.py -k <keywords>')
parser.add_option('-k', dest='keyword', type='string', help='specify google keyword')
(options, args) = parser.parse_args()
keyword = options.keyword if options.keyword == None:
print parser.usage
exit(0)
else:
results = google(keyword)
print results if __name__ == '__main__':
main()

用Python解析Tweets个人主页

#!/usr/bin/python
#coding=utf-8
import json
import urllib
from anonBrowser import * class reconPerson:
def __init__(self, first_name, last_name, job='', social_media={}):
self.first_name = first_name
self.last_name = last_name
self.job = job
self.social_media = social_media def __repr__(self):
return self.first_name + ' ' + self.last_name + ' has job ' + self.job def get_social(self, media_name):
if self.social_media.has_key(media_name):
return self.social_media[media_name]
return None def query_twitter(self, query):
query = urllib.quote_plus(query)
results = []
browser = anonBrowser()
response = browser.open('http://search.twitter.com/search.json?q=' + query)
json_objects = json.load(response)
for result in json_objects['results']:
new_result = {}
new_result['from_user'] = result['from_user_name']
new_result['geo'] = result['geo']
new_result['tweet'] = result['text']
results.append(new_result)
return results ap = reconPerson('Boondock', 'Saint')
print ap.query_twitter('from:th3j35t3r since:2010-01-01 include:retweets')

从推文中提取地理位置信息

#!/usr/bin/python
#coding=utf-8
import json
import urllib
import optparse
from anonBrowser import * def get_tweets(handle):
query = urllib.quote_plus('from:' + handle + ' since:2009-01-01 include:retweets')
tweets = []
browser = anonBrowser()
browser.anonymize()
response = browser.open('http://search.twitter.com/search.json?q='+ query)
json_objects = json.load(response)
for result in json_objects['results']:
new_result = {}
new_result['from_user'] = result['from_user_name']
new_result['geo'] = result['geo']
new_result['tweet'] = result['text']
tweets.append(new_result)
return tweets def load_cities(cityFile):
cities = []
for line in open(cityFile).readlines():
city=line.strip('\n').strip('\r').lower()
cities.append(city)
return cities def twitter_locate(tweets,cities):
locations = []
locCnt = 0
cityCnt = 0
tweetsText = "" for tweet in tweets:
if tweet['geo'] != None:
locations.append(tweet['geo'])
locCnt += 1 tweetsText += tweet['tweet'].lower() for city in cities:
if city in tweetsText:
locations.append(city)
cityCnt+=1 print "[+] Found " + str(locCnt) + " locations via Twitter API and " + str(cityCnt) + " locations from text search."
return locations def main():
parser = optparse.OptionParser('[*]Usage: python twitterGeo.py -u <twitter handle> [-c <list of cities>]')
parser.add_option('-u', dest='handle', type='string', help='specify twitter handle')
parser.add_option('-c', dest='cityFile', type='string', help='specify file containing cities to search')
(options, args) = parser.parse_args()
handle = options.handle
cityFile = options.cityFile
if (handle==None):
print parser.usage
exit(0)
cities = []
if (cityFile!=None):
cities = load_cities(cityFile)
tweets = get_tweets(handle)
locations = twitter_locate(tweets,cities)
print "[+] Locations: "+str(locations) if __name__ == '__main__':
main()

用正则表达式解析Twitter用户的兴趣爱好

#!/usr/bin/python
#coding=utf-8
import json
import re
import urllib
import urllib2
import optparse
from anonBrowser import * def get_tweets(handle):
query = urllib.quote_plus('from:' + handle + ' since:2009-01-01 include:retweets')
tweets = []
browser = anonBrowser()
browser.anonymize()
response = browser.open('http://search.twitter.com/search.json?q='+ query)
json_objects = json.load(response)
for result in json_objects['results']:
new_result = {}
new_result['from_user'] = result['from_user_name']
new_result['geo'] = result['geo']
new_result['tweet'] = result['text']
tweets.append(new_result)
return tweets def find_interests(tweets):
interests = {}
interests['links'] = []
interests['users'] = []
interests['hashtags'] = [] for tweet in tweets:
text = tweet['tweet']
links = re.compile('(http.*?)\Z|(http.*?) ').findall(text) for link in links:
if link[0]:
link = link[0]
elif link[1]:
link = link[1]
else:
continue try:
response = urllib2.urlopen(link)
full_link = response.url
interests['links'].append(full_link)
except:
pass
interests['users'] += re.compile('(@\w+)').findall(text)
interests['hashtags'] += re.compile('(#\w+)').findall(text) interests['users'].sort()
interests['hashtags'].sort()
interests['links'].sort() return interests def main():
parser = optparse.OptionParser('[*]Usage: python twitterInterests.py -u <twitter handle>')
parser.add_option('-u', dest='handle', type='string', help='specify twitter handle')
(options, args) = parser.parse_args()
handle = options.handle
if handle == None:
print parser.usage
exit(0) tweets = get_tweets(handle)
interests = find_interests(tweets)
print '\n[+] Links.'
for link in set(interests['links']):
print ' [+] ' + str(link) print '\n[+] Users.'
for user in set(interests['users']):
print ' [+] ' + str(user) print '\n[+] HashTags.'
for hashtag in set(interests['hashtags']):
print ' [+] ' + str(hashtag) if __name__ == '__main__':
main()

编写reconPerson类,封装所有抓取的地理位置、兴趣爱好以及Twitter页面的代码:

#!/usr/bin/python
#coding=utf-8
import urllib
from anonBrowser import *
import json
import re
import urllib2 class reconPerson:
def __init__(self, handle):
self.handle = handle
self.tweets = self.get_tweets() def get_tweets(self):
query = urllib.quote_plus('from:' + self.handle + ' since:2009-01-01 include:retweets')
tweets = []
browser = anonBrowser()
browser.anonymize()
response = browser.open('http://search.twitter.com/search.json?q=' + query)
json_objects = json.load(response)
for result in json_objects['results']:
new_result = {}
new_result['from_user'] = result['from_user_name']
new_result['geo'] = result['geo']
new_result['tweet'] = result['text']
tweets.append(new_result)
return tweets def find_interests(self):
interests = {}
interests['links'] = []
interests['users'] = []
interests['hashtags'] = [] for tweet in self.tweets:
text = tweet['tweet']
links = re.compile('(http.*?)\Z|(http.*?) ').findall(text) for link in links:
if link[0]:
link = link[0]
elif link[1]:
link = link[1]
else:
continue
try:
response = urllib2.urlopen(link)
full_link = response.url
interests['links'].append(full_link)
except:
pass
interests['users'] += re.compile('(@\w+)').findall(text)
interests['hashtags'] += re.compile('(#\w+)').findall(text) interests['users'].sort()
interests['hashtags'].sort()
interests['links'].sort()
return interests def twitter_locate(self, cityFile):
cities = []
if cityFile != None:
for line in open(cityFile).readlines():
city = line.strip('\n').strip('\r').lower()
cities.append(city) locations = []
locCnt = 0
cityCnt = 0
tweetsText = '' for tweet in self.tweets:
if tweet['geo'] != None:
locations.append(tweet['geo'])
locCnt += 1
tweetsText += tweet['tweet'].lower() for city in cities:
if city in tweetsText:
locations.append(city)
cityCnt += 1 return locations

使用Smtplib给目标对象发邮件

#!/usr/bin/python
#coding=utf-8
import smtplib
from email.mime.text import MIMEText def sendMail(user, pwd, to, subject, text):
msg = MIMEText(text)
msg['From'] = user
msg['To'] = to
msg['Subject'] = subject
try:
smtpServer = smtplib.SMTP('smtp.gmail.com', 587)
print "[+] Connecting To Mail Server."
smtpServer.ehlo()
print "[+] Starting Encrypted Session."
smtpServer.starttls()
smtpServer.ehlo()
print "[+] Logging Into Mail Server."
smtpServer.login(user, pwd)
print "[+] Sending Mail."
smtpServer.sendmail(user, to, msg.as_string())
smtpServer.close()
print "[+] Mail Sent Successfully."
except:
print "[-] Sending Mail Failed." user = 'username'
pwd = 'password'
sendMail(user, pwd, 'target@tgt.tgt', 'Re: Important', 'Test Message')

用smtplib进行网络钓鱼

#!/usr/bin/python
#coding=utf-8
import smtplib
import optparse
from email.mime.text import MIMEText
from twitterClass import *
from random import choice def sendMail(user, pwd, to, subject, text):
msg = MIMEText(text)
msg['From'] = user
msg['To'] = to
msg['Subject'] = subject
try:
smtpServer = smtplib.SMTP('smtp.gmail.com', 587)
print "[+] Connecting To Mail Server."
smtpServer.ehlo()
print "[+] Starting Encrypted Session."
smtpServer.starttls()
smtpServer.ehlo()
print "[+] Logging Into Mail Server."
smtpServer.login(user, pwd)
print "[+] Sending Mail."
smtpServer.sendmail(user, to, msg.as_string())
smtpServer.close()
print "[+] Mail Sent Successfully."
except:
print "[-] Sending Mail Failed." def main():
parser = optparse.OptionParser('[*]Usage: python sendSam.py -u <twitter target> -t <target email> ' + '-l <gmail login> -p <gmail password>')
parser.add_option('-u', dest='handle', type='string', help='specify twitter handle')
parser.add_option('-t', dest='tgt', type='string', help='specify target email')
parser.add_option('-l', dest='user', type='string', help='specify gmail login')
parser.add_option('-p', dest='pwd', type='string', help='specify gmail password')
(options, args) = parser.parse_args()
handle = options.handle
tgt = options.tgt
user = options.user
pwd = options.pwd
if handle == None or tgt == None or user ==None or pwd==None:
print parser.usage
exit(0) print "[+] Fetching tweets from: " + str(handle)
spamTgt = reconPerson(handle)
spamTgt.get_tweets()
print "[+] Fetching interests from: " + str(handle)
interests = spamTgt.find_interests()
print "[+] Fetching location information from: " + str(handle)
location = spamTgt.twitter_locate('mlb-cities.txt') spamMsg = "Dear " + tgt + "," if (location != None):
randLoc = choice(location)
spamMsg += " Its me from " + randLoc + "." if (interests['users'] != None):
randUser = choice(interests['users'])
spamMsg += " " + randUser + " said to say hello." if (interests['hashtags'] != None):
randHash=choice(interests['hashtags'])
spamMsg += " Did you see all the fuss about " + randHash + "?" if (interests['links']!=None):
randLink=choice(interests['links'])
spamMsg += " I really liked your link to: " + randLink + "." spamMsg += " Check out my link to http://evil.tgt/malware"
print "[+] Sending Msg: " + spamMsg sendMail(user, pwd, tgt, 'Re: Important', spamMsg) if __name__ == '__main__':
main()

《Python绝技:运用Python成为顶级黑客》 用Python刺探网络的更多相关文章

  1. 《Python绝技:运用Python成为顶级黑客》 Python实用小工具

    1.实现简单探测 使用socket模块,connect()方法建立与指定IP和端口的网络连接:revc(1024)方法将读取套接字中接下来的1024B数据 mport socket import sy ...

  2. python绝技:运用python成为顶级黑客|中文pdf完整版[42MB|网盘地址附提取码自行提取|

    Python 是一门常用的编程语言,它不仅上手容易,而且还拥有丰富的支持库.对经常需要针对自己所 处的特定场景编写专用工具的黑客.计算机犯罪调查人员.渗透测试师和安全工程师来说,Python 的这些 ...

  3. Python 绝技 —— TCP服务器与客户端

    i春秋作家:wasrehpic 0×00 前言 「网络」一直以来都是黑客最热衷的竞技场.数据在网络中肆意传播:主机扫描.代码注入.网络嗅探.数据篡改重放.拒绝服务攻击……黑客的功底越深厚,能做的就越多 ...

  4. Python 绝技 —— UDP 服务器与客户端

    i春秋作家:wasrehpic 0x00 前言 在上一篇文章「Python 绝技 —— TCP 服务器与客户端」中,介绍了传输层的核心协议 TCP ,并运用 Python 脚本的 socket 模块演 ...

  5. Python爆火的原因与未来|内附Python学习书籍大礼包无偿领取|

    从12年到20年,python以肉眼可见的趋势超过了java,成为了当今It界人人皆知的编程语言. python为什么这么火? 网络编程语言搜索指数 适合初学者 Python具有语法简单.语句清晰的特 ...

  6. Python高手之路【一】初识python

    Python简介 1:Python的创始人 Python (英国发音:/ˈpaɪθən/ 美国发音:/ˈpaɪθɑːn/), 是一种解释型.面向对象.动态数据类型的高级程序设计语言,由荷兰人Guido ...

  7. 跟着老男孩教育学Python开发【第一篇】:初识Python

    Python简介 Python前世今生 Python的创始人为吉多·范罗苏姆(Guido van Rossum).1989年的圣诞节期间,吉多·范罗苏姆为了在阿姆斯特丹打发时间,决心开发一个新的脚本解 ...

  8. 【Python五篇慢慢弹】数据结构看python

    数据结构看python 作者:白宁超 2016年10月9日14:04:47 摘要:继<快速上手学python>一文之后,笔者又将python官方文档认真学习下.官方给出的pythondoc ...

  9. python之最强王者(2)——python基础语法

    背景介绍:由于本人一直做java开发,也是从txt开始写hello,world,使用javac命令编译,一直到使用myeclipse,其中的道理和辛酸都懂(请容许我擦干眼角的泪水),所以对于pytho ...

  10. Python之路3【第一篇】Python基础

    本节内容 Python简介 Python安装 第一个Python程序 编程语言的分类 Python简介 1.Python的由来 python的创始人为吉多·范罗苏姆(Guido van Rossum) ...

随机推荐

  1. Halcon开发环境和数据结构介绍——第1讲

    1.Halcon是什么?如何初步了解Halcon? 这点我讲得不太好,不如给大家看看三个链接: ① Halcon官方网站:https://www.mvtec.com/products/halcon/ ...

  2. 使用第三方容器服务,自动化部署.Net Core

    1.为什么用第三方,而不自建,有哪些第三方,最后实现的效果 a.尝试过自建,并成功了,但是很麻烦,要敲一堆命令,无法达到全自动化部署的要求. b.自建,就算用第三方的镜像包,感觉下载还是不快,不知道为 ...

  3. linux-centos6-rabbitmq安装及配置

    服务器版本centos6.8 一.先安装Erlang 具体安装哪个版本可以对照 http://www.rabbitmq.com/which-erlang.html 如下图: 准备安装RabbitMQ3 ...

  4. 带你认识那些App可靠性设计

    可靠性是软件一个重要的质量属性,它关注的是软件功能持续的可用性,以及出现故障之后是否能够容错,是否能快速的恢复使用. 可靠性六条基本准则 1.故障应在第一时间被检测和感知: 2.能避免的故障都不应该发 ...

  5. zabbix分布式系统监视

    http://blog.chinaunix.net/uid-25266990-id-3380929.html

  6. 【文件下载】Java下载文件的几种方式

    [文件下载]Java下载文件的几种方式  摘自:https://www.cnblogs.com/sunny3096/p/8204291.html 1.以流的方式下载. public HttpServl ...

  7. 解决Android启动显示空白界面的问题

    Android程序启动时,第一个看的界面并不是我们的指定的第一个Activity界面,而是显示了一个空白的界面,带标题栏的,但是界面什么内容都没有,这个界面只显示不到1秒左右的时间就会切换到我们的第一 ...

  8. 关于多系统跨浏览器 BrowserStack 的使用

    偶然在Scott Hanselman Blogs看到一篇关于 BrowserStack 博文,对于前端多浏览器测试. 现在拥有各自内核的浏览器越来越多,各自的特性也千差万别.如果作为一个前端攻城师想要 ...

  9. NET(C#)连接各类数据库-集锦

    1.C#连接连接Access程序代码:------------------------------------------------------------------------------- u ...

  10. 2018.09.15 poj1041John's trip(欧拉路输出方案)

    传送门 一个欧拉路输出方案的板子题. 竟然难在读入233. 代码: #include<iostream> #include<cstdio> #include<cstrin ...