Mechanize库浏览页面

#!/usr/bin/python
#coding=utf-8
import mechanize def viewPage(url):
browser = mechanize.Browser()
page = browser.open(url)
source_code = page.read()
print source_code viewPage('http://www.imooc.com/')

使用代理服务器、User-Agent和cookie:

#!/usr/bin/python
#coding=utf-8
import mechanize def testProxy(url, proxy):
browser = mechanize.Browser()
browser.set_proxies(proxy)
page = browser.open(url)
source_code = page.read()
print source_code url = 'http://2017.ip138.com/ic.asp'
hideMeProxy = {'http': '139.196.202.164:9001'}
testProxy(url, hideMeProxy)
#!/usr/bin/python
#coding=utf-8
import mechanize def testUserAgent(url, userAgent):
browser = mechanize.Browser()
browser.addheaders = userAgent
page = browser.open(url)
source_code = page.read()
print source_code url = 'http://whatismyuseragent.dotdoh.com/'
userAgent = [('User-agent', 'Mozilla/5.0 (X11; U; Linux 2.4.2-2 i586; en-US; m18) Gecko/20010131 Netscape6/6.01')]
testUserAgent(url, userAgent)

把代码集成在Python类的AnonBrowser中

#!/usr/bin/python
#coding=utf-8
import mechanize
import cookielib
import random class anonBrowser(mechanize.Browser):
def __init__(self, proxies = [], user_agents = []):
mechanize.Browser.__init__(self)
self.set_handle_robots(False)
# 可供用户使用的代理服务器列表
self.proxies = proxies
# user_agent列表
self.user_agents = user_agents + ['Mozilla/4.0 ', 'FireFox/6.01','ExactSearch', 'Nokia7110/1.0']
self.cookie_jar = cookielib.LWPCookieJar()
self.set_cookiejar(self.cookie_jar)
self.anonymize() # 清空cookie
def clear_cookies(self):
self.cookie_jar = cookielib.LWPCookieJar()
self.set_cookiejar(self.cookie_jar) # 从user_agent列表中随机设置一个user_agent
def change_user_agent(self):
index = random.randrange(0, len(self.user_agents) )
self.addheaders = [('User-agent', ( self.user_agents[index] ))] # 从代理列表中随机设置一个代理
def change_proxy(self):
if self.proxies:
index = random.randrange(0, len(self.proxies))
self.set_proxies( {'http': self.proxies[index]} ) # 调用上述三个函数改变UA、代理以及清空cookie以提高匿名性,其中sleep参数可让进程休眠以进一步提高匿名效果
def anonymize(self, sleep = False):
self.clear_cookies()
self.change_user_agent()
self.change_proxy() if sleep:
time.sleep(60)

测试每次是否使用不同的cookie访问:

#!/usr/bin/python
#coding=utf-8
from anonBrowser import * ab = anonBrowser(proxies=[], user_agents=[('User-agent','superSecretBroswer')]) for attempt in range(1, 5):
# 每次访问都进行一次匿名操作
ab.anonymize()
print '[*] Fetching page'
response = ab.open('http://www.kittenwar.com/')
for cookie in ab.cookie_jar:
print cookie

用BeautifulSoup解析Href链接:

#!/usr/bin/python
#coding=utf-8
from anonBrowser import *
from BeautifulSoup import BeautifulSoup
import os
import optparse
import re def printLinks(url):
ab = anonBrowser()
ab.anonymize()
page = ab.open(url)
html = page.read()
# 使用re模块解析href链接
try:
print '[+] Printing Links From Regex.'
link_finder = re.compile('href="(.*?)"')
links = link_finder.findall(html)
for link in links:
print link
except:
pass
# 使用bs4模块解析href链接
try:
print '\n[+] Printing Links From BeautifulSoup.'
soup = BeautifulSoup(html)
links = soup.findAll(name='a')
for link in links:
if link.has_key('href'):
print link['href']
except:
pass def main():
parser = optparse.OptionParser('[*]Usage: python linkParser.py -u <target url>')
parser.add_option('-u', dest='tgtURL', type='string', help='specify target url')
(options, args) = parser.parse_args()
url = options.tgtURL if url == None:
print parser.usage
exit(0)
else:
printLinks(url) if __name__ == '__main__':
main()

用BeautifulSoup映射图像

#!/usr/bin/python
#coding=utf-8
from anonBrowser import *
from BeautifulSoup import BeautifulSoup
import os
import optparse def mirrorImages(url, dir):
ab = anonBrowser()
ab.anonymize()
html = ab.open(url)
soup = BeautifulSoup(html)
image_tags = soup.findAll('img') for image in image_tags:
# lstrip() 方法用于截掉字符串左边的空格或指定字符
filename = image['src'].lstrip('http://')
filename = os.path.join(dir, filename.replace('/', '_'))
print '[+] Saving ' + str(filename)
data = ab.open(image['src']).read()
# 回退
ab.back()
save = open(filename, 'wb')
save.write(data)
save.close() def main():
parser = optparse.OptionParser('[*]Usage: python imageMirror.py -u <target url> -d <destination directory>')
parser.add_option('-u', dest='tgtURL', type='string', help='specify target url')
parser.add_option('-d', dest='dir', type='string', help='specify destination directory')
(options, args) = parser.parse_args()
url = options.tgtURL
dir = options.dir
if url == None or dir == None:
print parser.usage
exit(0)
else:
try:
mirrorImages(url, dir)
except Exception, e:
print '[-] Error Mirroring Images.'
print '[-] ' + str(e) if __name__ == '__main__':
main()

用Python与谷歌API交互

#!/usr/bin/python
#coding=utf-8
import urllib
from anonBrowser import * def google(search_term):
ab = anonBrowser()
# URL编码
search_term = urllib.quote_plus(search_term)
response = ab.open('https://www.googleapis.com/customsearch/v1?key=你的key&cx=你的id&num=1&alt=json&q=' + search_term)
print response.read() google('Boundock Saint')

接着就对Json格式的数据进行处理,添加json库的load()函数对Json数据进行加载即可

#!/usr/bin/python
#coding=utf-8
import urllib
from anonBrowser import *
import json def google(search_term):
ab = anonBrowser()
# URL编码
search_term = urllib.quote_plus(search_term)
response = ab.open('https://www.googleapis.com/customsearch/v1?key=你的key&cx=你的id&num=1&alt=json&q=' + search_term)
objects = json.load(response)
print objects google('Boundock Saint')

编写Google_Result类,用于保存Json数据解析下来的标题

#!/usr/bin/python
#coding=utf-8
import urllib
from anonBrowser import *
import json
import optparse class Google_Result:
def __init__(self,title,text,url):
self.title = title
self.text = text
self.url = url def __repr__(self):
return self.title def google(search_term):
ab = anonBrowser()
# URL编码
search_term = urllib.quote_plus(search_term)
response = ab.open('https://www.googleapis.com/customsearch/v1?key=你的key&cx=你的id&num=1&alt=json&q=' + search_term)
objects = json.load(response)
results = [] for result in objects['items']:
url = result['link']
title = result['title']
text = result['snippet']
print url
print title
print text
new_gr = Google_Result(title, text, url)
results.append(new_gr)
return results def main():
parser = optparse.OptionParser('[*]Usage: python anonGoogle.py -k <keywords>')
parser.add_option('-k', dest='keyword', type='string', help='specify google keyword')
(options, args) = parser.parse_args()
keyword = options.keyword if options.keyword == None:
print parser.usage
exit(0)
else:
results = google(keyword)
print results if __name__ == '__main__':
main()

用Python解析Tweets个人主页

#!/usr/bin/python
#coding=utf-8
import json
import urllib
from anonBrowser import * class reconPerson:
def __init__(self, first_name, last_name, job='', social_media={}):
self.first_name = first_name
self.last_name = last_name
self.job = job
self.social_media = social_media def __repr__(self):
return self.first_name + ' ' + self.last_name + ' has job ' + self.job def get_social(self, media_name):
if self.social_media.has_key(media_name):
return self.social_media[media_name]
return None def query_twitter(self, query):
query = urllib.quote_plus(query)
results = []
browser = anonBrowser()
response = browser.open('http://search.twitter.com/search.json?q=' + query)
json_objects = json.load(response)
for result in json_objects['results']:
new_result = {}
new_result['from_user'] = result['from_user_name']
new_result['geo'] = result['geo']
new_result['tweet'] = result['text']
results.append(new_result)
return results ap = reconPerson('Boondock', 'Saint')
print ap.query_twitter('from:th3j35t3r since:2010-01-01 include:retweets')

从推文中提取地理位置信息

#!/usr/bin/python
#coding=utf-8
import json
import urllib
import optparse
from anonBrowser import * def get_tweets(handle):
query = urllib.quote_plus('from:' + handle + ' since:2009-01-01 include:retweets')
tweets = []
browser = anonBrowser()
browser.anonymize()
response = browser.open('http://search.twitter.com/search.json?q='+ query)
json_objects = json.load(response)
for result in json_objects['results']:
new_result = {}
new_result['from_user'] = result['from_user_name']
new_result['geo'] = result['geo']
new_result['tweet'] = result['text']
tweets.append(new_result)
return tweets def load_cities(cityFile):
cities = []
for line in open(cityFile).readlines():
city=line.strip('\n').strip('\r').lower()
cities.append(city)
return cities def twitter_locate(tweets,cities):
locations = []
locCnt = 0
cityCnt = 0
tweetsText = "" for tweet in tweets:
if tweet['geo'] != None:
locations.append(tweet['geo'])
locCnt += 1 tweetsText += tweet['tweet'].lower() for city in cities:
if city in tweetsText:
locations.append(city)
cityCnt+=1 print "[+] Found " + str(locCnt) + " locations via Twitter API and " + str(cityCnt) + " locations from text search."
return locations def main():
parser = optparse.OptionParser('[*]Usage: python twitterGeo.py -u <twitter handle> [-c <list of cities>]')
parser.add_option('-u', dest='handle', type='string', help='specify twitter handle')
parser.add_option('-c', dest='cityFile', type='string', help='specify file containing cities to search')
(options, args) = parser.parse_args()
handle = options.handle
cityFile = options.cityFile
if (handle==None):
print parser.usage
exit(0)
cities = []
if (cityFile!=None):
cities = load_cities(cityFile)
tweets = get_tweets(handle)
locations = twitter_locate(tweets,cities)
print "[+] Locations: "+str(locations) if __name__ == '__main__':
main()

用正则表达式解析Twitter用户的兴趣爱好

#!/usr/bin/python
#coding=utf-8
import json
import re
import urllib
import urllib2
import optparse
from anonBrowser import * def get_tweets(handle):
query = urllib.quote_plus('from:' + handle + ' since:2009-01-01 include:retweets')
tweets = []
browser = anonBrowser()
browser.anonymize()
response = browser.open('http://search.twitter.com/search.json?q='+ query)
json_objects = json.load(response)
for result in json_objects['results']:
new_result = {}
new_result['from_user'] = result['from_user_name']
new_result['geo'] = result['geo']
new_result['tweet'] = result['text']
tweets.append(new_result)
return tweets def find_interests(tweets):
interests = {}
interests['links'] = []
interests['users'] = []
interests['hashtags'] = [] for tweet in tweets:
text = tweet['tweet']
links = re.compile('(http.*?)\Z|(http.*?) ').findall(text) for link in links:
if link[0]:
link = link[0]
elif link[1]:
link = link[1]
else:
continue try:
response = urllib2.urlopen(link)
full_link = response.url
interests['links'].append(full_link)
except:
pass
interests['users'] += re.compile('(@\w+)').findall(text)
interests['hashtags'] += re.compile('(#\w+)').findall(text) interests['users'].sort()
interests['hashtags'].sort()
interests['links'].sort() return interests def main():
parser = optparse.OptionParser('[*]Usage: python twitterInterests.py -u <twitter handle>')
parser.add_option('-u', dest='handle', type='string', help='specify twitter handle')
(options, args) = parser.parse_args()
handle = options.handle
if handle == None:
print parser.usage
exit(0) tweets = get_tweets(handle)
interests = find_interests(tweets)
print '\n[+] Links.'
for link in set(interests['links']):
print ' [+] ' + str(link) print '\n[+] Users.'
for user in set(interests['users']):
print ' [+] ' + str(user) print '\n[+] HashTags.'
for hashtag in set(interests['hashtags']):
print ' [+] ' + str(hashtag) if __name__ == '__main__':
main()

编写reconPerson类,封装所有抓取的地理位置、兴趣爱好以及Twitter页面的代码:

#!/usr/bin/python
#coding=utf-8
import urllib
from anonBrowser import *
import json
import re
import urllib2 class reconPerson:
def __init__(self, handle):
self.handle = handle
self.tweets = self.get_tweets() def get_tweets(self):
query = urllib.quote_plus('from:' + self.handle + ' since:2009-01-01 include:retweets')
tweets = []
browser = anonBrowser()
browser.anonymize()
response = browser.open('http://search.twitter.com/search.json?q=' + query)
json_objects = json.load(response)
for result in json_objects['results']:
new_result = {}
new_result['from_user'] = result['from_user_name']
new_result['geo'] = result['geo']
new_result['tweet'] = result['text']
tweets.append(new_result)
return tweets def find_interests(self):
interests = {}
interests['links'] = []
interests['users'] = []
interests['hashtags'] = [] for tweet in self.tweets:
text = tweet['tweet']
links = re.compile('(http.*?)\Z|(http.*?) ').findall(text) for link in links:
if link[0]:
link = link[0]
elif link[1]:
link = link[1]
else:
continue
try:
response = urllib2.urlopen(link)
full_link = response.url
interests['links'].append(full_link)
except:
pass
interests['users'] += re.compile('(@\w+)').findall(text)
interests['hashtags'] += re.compile('(#\w+)').findall(text) interests['users'].sort()
interests['hashtags'].sort()
interests['links'].sort()
return interests def twitter_locate(self, cityFile):
cities = []
if cityFile != None:
for line in open(cityFile).readlines():
city = line.strip('\n').strip('\r').lower()
cities.append(city) locations = []
locCnt = 0
cityCnt = 0
tweetsText = '' for tweet in self.tweets:
if tweet['geo'] != None:
locations.append(tweet['geo'])
locCnt += 1
tweetsText += tweet['tweet'].lower() for city in cities:
if city in tweetsText:
locations.append(city)
cityCnt += 1 return locations

使用Smtplib给目标对象发邮件

#!/usr/bin/python
#coding=utf-8
import smtplib
from email.mime.text import MIMEText def sendMail(user, pwd, to, subject, text):
msg = MIMEText(text)
msg['From'] = user
msg['To'] = to
msg['Subject'] = subject
try:
smtpServer = smtplib.SMTP('smtp.gmail.com', 587)
print "[+] Connecting To Mail Server."
smtpServer.ehlo()
print "[+] Starting Encrypted Session."
smtpServer.starttls()
smtpServer.ehlo()
print "[+] Logging Into Mail Server."
smtpServer.login(user, pwd)
print "[+] Sending Mail."
smtpServer.sendmail(user, to, msg.as_string())
smtpServer.close()
print "[+] Mail Sent Successfully."
except:
print "[-] Sending Mail Failed." user = 'username'
pwd = 'password'
sendMail(user, pwd, 'target@tgt.tgt', 'Re: Important', 'Test Message')

用smtplib进行网络钓鱼

#!/usr/bin/python
#coding=utf-8
import smtplib
import optparse
from email.mime.text import MIMEText
from twitterClass import *
from random import choice def sendMail(user, pwd, to, subject, text):
msg = MIMEText(text)
msg['From'] = user
msg['To'] = to
msg['Subject'] = subject
try:
smtpServer = smtplib.SMTP('smtp.gmail.com', 587)
print "[+] Connecting To Mail Server."
smtpServer.ehlo()
print "[+] Starting Encrypted Session."
smtpServer.starttls()
smtpServer.ehlo()
print "[+] Logging Into Mail Server."
smtpServer.login(user, pwd)
print "[+] Sending Mail."
smtpServer.sendmail(user, to, msg.as_string())
smtpServer.close()
print "[+] Mail Sent Successfully."
except:
print "[-] Sending Mail Failed." def main():
parser = optparse.OptionParser('[*]Usage: python sendSam.py -u <twitter target> -t <target email> ' + '-l <gmail login> -p <gmail password>')
parser.add_option('-u', dest='handle', type='string', help='specify twitter handle')
parser.add_option('-t', dest='tgt', type='string', help='specify target email')
parser.add_option('-l', dest='user', type='string', help='specify gmail login')
parser.add_option('-p', dest='pwd', type='string', help='specify gmail password')
(options, args) = parser.parse_args()
handle = options.handle
tgt = options.tgt
user = options.user
pwd = options.pwd
if handle == None or tgt == None or user ==None or pwd==None:
print parser.usage
exit(0) print "[+] Fetching tweets from: " + str(handle)
spamTgt = reconPerson(handle)
spamTgt.get_tweets()
print "[+] Fetching interests from: " + str(handle)
interests = spamTgt.find_interests()
print "[+] Fetching location information from: " + str(handle)
location = spamTgt.twitter_locate('mlb-cities.txt') spamMsg = "Dear " + tgt + "," if (location != None):
randLoc = choice(location)
spamMsg += " Its me from " + randLoc + "." if (interests['users'] != None):
randUser = choice(interests['users'])
spamMsg += " " + randUser + " said to say hello." if (interests['hashtags'] != None):
randHash=choice(interests['hashtags'])
spamMsg += " Did you see all the fuss about " + randHash + "?" if (interests['links']!=None):
randLink=choice(interests['links'])
spamMsg += " I really liked your link to: " + randLink + "." spamMsg += " Check out my link to http://evil.tgt/malware"
print "[+] Sending Msg: " + spamMsg sendMail(user, pwd, tgt, 'Re: Important', spamMsg) if __name__ == '__main__':
main()

《Python绝技:运用Python成为顶级黑客》 用Python刺探网络的更多相关文章

  1. 《Python绝技:运用Python成为顶级黑客》 Python实用小工具

    1.实现简单探测 使用socket模块,connect()方法建立与指定IP和端口的网络连接:revc(1024)方法将读取套接字中接下来的1024B数据 mport socket import sy ...

  2. python绝技:运用python成为顶级黑客|中文pdf完整版[42MB|网盘地址附提取码自行提取|

    Python 是一门常用的编程语言,它不仅上手容易,而且还拥有丰富的支持库.对经常需要针对自己所 处的特定场景编写专用工具的黑客.计算机犯罪调查人员.渗透测试师和安全工程师来说,Python 的这些 ...

  3. Python 绝技 —— TCP服务器与客户端

    i春秋作家:wasrehpic 0×00 前言 「网络」一直以来都是黑客最热衷的竞技场.数据在网络中肆意传播:主机扫描.代码注入.网络嗅探.数据篡改重放.拒绝服务攻击……黑客的功底越深厚,能做的就越多 ...

  4. Python 绝技 —— UDP 服务器与客户端

    i春秋作家:wasrehpic 0x00 前言 在上一篇文章「Python 绝技 —— TCP 服务器与客户端」中,介绍了传输层的核心协议 TCP ,并运用 Python 脚本的 socket 模块演 ...

  5. Python爆火的原因与未来|内附Python学习书籍大礼包无偿领取|

    从12年到20年,python以肉眼可见的趋势超过了java,成为了当今It界人人皆知的编程语言. python为什么这么火? 网络编程语言搜索指数 适合初学者 Python具有语法简单.语句清晰的特 ...

  6. Python高手之路【一】初识python

    Python简介 1:Python的创始人 Python (英国发音:/ˈpaɪθən/ 美国发音:/ˈpaɪθɑːn/), 是一种解释型.面向对象.动态数据类型的高级程序设计语言,由荷兰人Guido ...

  7. 跟着老男孩教育学Python开发【第一篇】:初识Python

    Python简介 Python前世今生 Python的创始人为吉多·范罗苏姆(Guido van Rossum).1989年的圣诞节期间,吉多·范罗苏姆为了在阿姆斯特丹打发时间,决心开发一个新的脚本解 ...

  8. 【Python五篇慢慢弹】数据结构看python

    数据结构看python 作者:白宁超 2016年10月9日14:04:47 摘要:继<快速上手学python>一文之后,笔者又将python官方文档认真学习下.官方给出的pythondoc ...

  9. python之最强王者(2)——python基础语法

    背景介绍:由于本人一直做java开发,也是从txt开始写hello,world,使用javac命令编译,一直到使用myeclipse,其中的道理和辛酸都懂(请容许我擦干眼角的泪水),所以对于pytho ...

  10. Python之路3【第一篇】Python基础

    本节内容 Python简介 Python安装 第一个Python程序 编程语言的分类 Python简介 1.Python的由来 python的创始人为吉多·范罗苏姆(Guido van Rossum) ...

随机推荐

  1. macos安装postman

    安装命令 brew cask install postman brew 是从下载源码解压然后./configure && make install,同时会包含相关依存库.并自动配置 好 ...

  2. poj3017 Cut the Sequence 单调队列 + 堆 dp

    描述 把一个正数列 $A$分成若干段, 每段之和 不超过 $M$, 并且使得每段数列的最大值的和最小, 求出这个最小值. 题目链接 题解 首先我们可以列出一个$O(n^2)$ 的转移方程 : $F_i ...

  3. 2018年上半年UI领域主要的13个设计趋势

    2018年时间过半,通过过去的6个月的观察,其实我们已经可以对于2018年的整个UI领域的设计趋势有了一个更为清晰的判断. 也是推出这篇文章比较合理的时机.下面我们就一起来回顾一下,过去的半年当中,U ...

  4. makefile all

    all:udps udpc udps:udpserv.c    gcc -Wall -o udps udpserv.cudpc:udpclient.c    gcc -Wall -o udpc udp ...

  5. Vue-cli 配置开发环境让测试服务器监听所有IP

    //config/inex.js // Various Dev Server settingshost: '0.0.0.0', // can be overwritten by process.env ...

  6. 电商类Web原型制作分享-IKEA

    IKEA是一个家居整合大型零售商,属于电商类官网.电商以展示商品.售后服务.购物流程为主.根据网站的图文方式排版,主导航栏使用的标签组,区域导航栏使用的是垂直选项卡,实现下拉弹出面板交互的功能. 本原 ...

  7. 2018.10.20 loj#2593. 「NOIP2010」乌龟棋(多维dp)

    传送门 f[i][j][k][l]f[i][j][k][l]f[i][j][k][l]表示用iii张111,jjj张222,kkk张333,lll张444能凑出的最大贡献. 然后从f[i−1][j][ ...

  8. hdu -1114(完全背包)

    题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=1114 思路:求出存钱罐装全部装满情况下硬币的最小数量,即求出硬币的最小价值.转换为最小背包的问题. # ...

  9. 37 The Benefits of Cutting Salt 减少盐分摄取量的益处

    The Benefits of Cutting Salt 减少盐分摄取量的益处 ①Just when you had figured out how to manage fat in your die ...

  10. 19. Fight over Fox-hunting 猎狐引发的冲突

    . Fight over Fox-hunting 猎狐引发的冲突 ① Foxes and farmers have never got on well.These small dog-like ani ...