Visualizing Email Data(Week 6&7)

code segment

gword.py

import sqlite3
import time
import zlib
import string conn = sqlite3.connect('index.sqlite')
cur = conn.cursor() cur.execute('SELECT id, subject FROM Subjects')
subjects = dict()
for message_row in cur :
subjects[message_row[0]] = message_row[1] # cur.execute('SELECT id, guid,sender_id,subject_id,headers,body FROM Messages')
cur.execute('SELECT subject_id FROM Messages')
counts = dict()
for message_row in cur :
text = subjects[message_row[0]]
text = text.translate(str.maketrans('','',string.punctuation))
text = text.translate(str.maketrans('','','1234567890'))
text = text.strip()
text = text.lower()
words = text.split()
for word in words:
if len(word) < 4 : continue
counts[word] = counts.get(word,0) + 1 x = sorted(counts, key=counts.get, reverse=True)
highest = None
lowest = None
for k in x[:100]:
if highest is None or highest < counts[k] :
highest = counts[k]
if lowest is None or lowest > counts[k] :
lowest = counts[k]
print('Range of counts:',highest,lowest) # Spread the font sizes across 20-100 based on the count
bigsize = 80
smallsize = 20 fhand = open('gword.js','w')
fhand.write("gword = [")
first = True
for k in x[:100]:
if not first : fhand.write( ",\n")
first = False
size = counts[k]
size = (size - lowest) / float(highest - lowest)
size = int((size * bigsize) + smallsize)
fhand.write("{text: '"+k+"', size: "+str(size)+"}")
fhand.write( "\n];\n")
fhand.close() print("Output written to gword.js")
print("Open gword.htm in a browser to see the vizualization")

gline.py

import sqlite3
import time
import zlib conn = sqlite3.connect('index.sqlite')
cur = conn.cursor() cur.execute('SELECT id, sender FROM Senders')
senders = dict()
for message_row in cur :
senders[message_row[0]] = message_row[1] cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages')
messages = dict()
for message_row in cur :
messages[message_row[0]] = (message_row[1],message_row[2],message_row[3],message_row[4]) print("Loaded messages=",len(messages),"senders=",len(senders)) sendorgs = dict()
for (message_id, message) in list(messages.items()):
sender = message[1]
pieces = senders[sender].split("@")
if len(pieces) != 2 : continue
dns = pieces[1]
sendorgs[dns] = sendorgs.get(dns,0) + 1 # pick the top schools
orgs = sorted(sendorgs, key=sendorgs.get, reverse=True)
orgs = orgs[:10]
print("Top 10 Organizations")
print(orgs) counts = dict()
months = list()
# cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages')
for (message_id, message) in list(messages.items()):
sender = message[1]
pieces = senders[sender].split("@")
if len(pieces) != 2 : continue
dns = pieces[1]
if dns not in orgs : continue
month = message[3][:7]
if month not in months : months.append(month)
key = (month, dns)
counts[key] = counts.get(key,0) + 1 months.sort()
# print counts
# print months fhand = open('gline.js','w')
fhand.write("gline = [ ['Month'")
for org in orgs:
fhand.write(",'"+org+"'")
fhand.write("]") for month in months:
fhand.write(",\n['"+month+"'")
for org in orgs:
key = (month, org)
val = counts.get(key,0)
fhand.write(","+str(val))
fhand.write("]"); fhand.write("\n];\n")
fhand.close() print("Output written to gline.js")
print("Open gline.htm to visualize the data")

Coursera课程笔记----P4E.Capstone----Week 6&7的更多相关文章

  1. Coursera课程笔记----P4E.Capstone----Week 4&5

    Spidering and Modeling Email Data(week4&5) Mailing List - Gmane Crawl the archive of a mailing l ...

  2. Coursera课程笔记----P4E.Capstone----Week 2&3

    Building a Search Engine(week 2&3) Search Engine Architecture Web Crawling Index Building Search ...

  3. 操作系统学习笔记----进程/线程模型----Coursera课程笔记

    操作系统学习笔记----进程/线程模型----Coursera课程笔记 进程/线程模型 0. 概述 0.1 进程模型 多道程序设计 进程的概念.进程控制块 进程状态及转换.进程队列 进程控制----进 ...

  4. Coursera课程笔记----C++程序设计----Week3

    类和对象(Week 3) 内联成员函数和重载成员函数 内联成员函数 inline + 成员函数 整个函数题出现在类定义内部 class B{ inline void func1(); //方式1 vo ...

  5. Coursera课程笔记----Write Professional Emails in English----Week 3

    Introduction and Announcement Emails (Week 3) Overview of Introduction & Announcement Emails Bas ...

  6. Coursera课程笔记----Write Professional Emails in English----Week 1

    Get to Know Basic Email Writing Structures(Week 1) Introduction to Course Email and Editing Basics S ...

  7. Coursera课程笔记----C程序设计进阶----Week 5

    指针(二) (Week 5) 字符串与指针 指向数组的指针 int a[10]; int *p; p = a; 指向字符串的指针 指向字符串的指针变量 char a[10]; char *p; p = ...

  8. Coursera课程笔记----Write Professional Emails in English----Week 5

    Culture Matters(Week 5) High/Low Context Communication High Context Communication The Middle East, A ...

  9. Coursera课程笔记----Write Professional Emails in English----Week 4

    Request and Apology Emails(Week 4) How to Write Request Emails Write more POLITELY & SINCERELUY ...

随机推荐

  1. Python 编程环境搭建(Windows 系统中)

    由于大家普遍使用 Windows 系统,所以本文只介绍 Windows 系统中 Python 环境的安装. 在 Windows 中安装 Python 与安装普通软件没什么差别,下载所需版本的安装包后, ...

  2. Python-selenium安装与Java-selenium安装

    一.Python安装及selenium的安装 1.安装Pythonhttps://www.Python.org2.安装setuptools.distribute.piphttps://pypi.pyt ...

  3. 如果这篇文章说不清epoll的本质,那就过来掐死我吧!

    转载自:https://www.toutiao.com/i6683264188661367309/ 目录 一.从网卡接收数据说起 二.如何知道接收了数据? 三.进程阻塞为什么不占用cpu资源? 四.内 ...

  4. Java读源码之CountDownLatch

    前言 相信大家都挺熟悉 CountDownLatch 的,顾名思义就是一个栅栏,其主要作用是多线程环境下,让多个线程在栅栏门口等待,所有线程到齐后,栅栏打开程序继续执行. 案例 用一个最简单的案例引出 ...

  5. vue2.x学习笔记(十四)

    接着前面的内容:https://www.cnblogs.com/yanggb/p/12602256.html. 组件的Prop Prop是组件之间通信的一个重要途径,了解其知识十分重要. Prop的大 ...

  6. Laravel 上手增删改查

    拿到一个框架,除了解框架,还要能实现基本的CURD操作. 添加 1.配置路由,指定添加页面: // routes/web.php 中增加如下: // 添加页面.存放路径 Laravel7/resour ...

  7. 设计模式-原型模式(Prototype)【重点:浅复制与深复制】

    讲故事 最近重温了一下星爷的<唐伯虎点秋香>,依然让我捧腹不已,幻想着要是我也能有一名秋香如此的侍女,夫复何求呀,带着这个美好的幻想沉沉睡去... 突然想到,我是一名程序猿呀,想要什么对象 ...

  8. 单图像三维重建、2D到3D风格迁移和3D DeepDream

    作者:Longway Date:2020-04-25 来源:单图像三维重建.2D到3D风格迁移和3D DeepDream 项目网址:http://hiroharu-kato.com/projects_ ...

  9. Jmeter与LoadRunner的比较

    一.与Loadrunner的比较相似点 1.Jmeter的架构跟loadrunner原理一样, 都是通过中间代理,监控&收集并发客户端发现的指令,把他们生成脚本,再发送到应用服务器,再监控服务 ...

  10. PHP中的数据库操作

    PDO project data object 连接到数据库 $db=new PDO("mysql:dbname=database;host=sever","userna ...