COS418, Distributed System, Go Language
go test -run Sequential
func TestSequentialSingle(t *testing.T) {
mr := Sequential("test", makeInputs(1), 1, MapFunc, ReduceFunc)
mr.Wait()
check(t, mr.files)
checkWorker(t, mr.stats)
cleanup(mr)
}
func Sequential(jobName string, files []string, nreduce int,
mapF func(string, string) []KeyValue,
reduceF func(string, []string) string,
) (mr *Master) {
mr = newMaster("master")
go mr.run(jobName, files, nreduce, func(phase jobPhase) {
switch phase {
case mapPhase:
for i, f := range mr.files {
doMap(mr.jobName, i, f, mr.nReduce, mapF)
}
case reducePhase:
for i := 0; i < mr.nReduce; i++ {
doReduce(mr.jobName, i, len(mr.files), reduceF)
}
}
}, func() {
mr.stats = []int{len(files) + nreduce}
})
return
}
func (mr *Master) run(jobName string, files []string, nreduce int,
schedule func(phase jobPhase),
finish func(),
) {
mr.jobName = jobName
mr.files = files
mr.nReduce = nreduce debug("%s: Starting Map/Reduce task %s\n", mr.address, mr.jobName) schedule(mapPhase)
schedule(reducePhase)
finish()
mr.merge() debug("%s: Map/Reduce task completed\n", mr.address) mr.doneChannel <- true
}
func(phase jobPhase) {
switch phase {
case mapPhase:
for i, f := range mr.files {
doMap(mr.jobName, i, f, mr.nReduce, mapF)
}
case reducePhase:
for i := 0; i < mr.nReduce; i++ {
doReduce(mr.jobName, i, len(mr.files), reduceF)
}
}
}
func() {
mr.stats = []int{len(files) + nreduce}
}
func reduceName(jobName string, mapTask int, reduceTask int) string {
return "mrtmp." + jobName + "-" + strconv.Itoa(mapTask) + "-" + strconv.Itoa(reduceTask)
}
func doMap(
jobName string, // the name of the MapReduce job
mapTaskNumber int, // which map task this is
inFile string,
nReduce int, // the number of reduce task that will be run
mapF func(file string, contents string) []KeyValue,
) {
dat, err := ioutil.ReadFile(inFile)
if err != nil {
debug("file open fail:%s", inFile)
} else {
kvs := mapF(inFile, string(dat))
partitions := make([][]KeyValue, nReduce)
for _ , kv:= range kvs {
r := int(ihash(kv.Key)) % nReduce
partitions[r] = append(partitions[r], kv)
}
for i := range partitions {
j, _ := json.Marshal(partitions[i])
f := reduceName(jobName, mapTaskNumber, i)
ioutil.WriteFile(f, j, 0644)
}
}
}

func mergeName(jobName string, reduceTask int) string {
return "mrtmp." + jobName + "-res-" + strconv.Itoa(reduceTask)
}
func doReduce(
jobName string, // the name of the whole MapReduce job
reduceTaskNumber int, // which reduce task this is
nMap int, // the number of map tasks that were run ("M" in the paper)
reduceF func(key string, values []string) string,
) {
kvs := make(map[string][]string)
for m := 0; m < nMap; m++ {
fileName := reduceName(jobName, m, reduceTaskNumber)
dat, err := ioutil.ReadFile(fileName)
if err != nil {
debug("file open fail:%s", fileName)
} else {
var items []KeyValue
json.Unmarshal(dat, &items)
for _ , item := range items {
k := item.Key
v := item.Value
kvs[k] = append(kvs[k], v)
}
}
} // create the final output file
mergeFileName := mergeName(jobName, reduceTaskNumber)
file, err := os.Create(mergeFileName)
if err != nil {
debug("file open fail:%s", mergeFileName)
} // sort
var keys []string
for k := range kvs {
keys = append(keys, k)
}
sort.Strings(keys) enc := json.NewEncoder(file)
for _, key := range keys {
enc.Encode(KeyValue{key, reduceF(key, kvs[key])})
}
file.Close()
}


go run wc.go master sequential pg-*.txt

Reduce输入格式为list(<word,””> ),输出格式为list(<word,num>) 。处理过程如下图所示:

func mapF(document string, value string) (res []mapreduce.KeyValue) {
words := strings.FieldsFunc(value, func(r rune) bool {
return !unicode.IsLetter(r)
})
res = []mapreduce.KeyValue{}
for _, w := range words {
res = append(res, mapreduce.KeyValue{w, ""})
}
return res
}
func reduceF(key string, values []string) string {
return strconv.Itoa(len(values))
}
sort -n -k2 mrtmp.wcseq | tail -10
he: 34077
was: 37044
that: 37495
I: 44502
in: 46092
a: 60558
to: 74357
of: 79727
and: 93990
the: 154024

go test -run TestBasic
func TestBasic(t *testing.T) {
mr := setup()
for i := 0; i < 2; i++ {
go RunWorker(mr.address, port("worker"+strconv.Itoa(i)),
MapFunc, ReduceFunc, -1)
}
mr.Wait()
check(t, mr.files)
checkWorker(t, mr.stats)
cleanup(mr)
}
func setup() *Master {
files := makeInputs(nMap)
master := port("master")
mr := Distributed("test", files, nReduce, master)
return mr
}
func Distributed(jobName string, files []string, nreduce int, master string) (mr *Master) {
mr = newMaster(master)
mr.startRPCServer()
go mr.run(jobName, files, nreduce,
func(phase jobPhase) {
ch := make(chan string)
go mr.forwardRegistrations(ch)
schedule(mr.jobName, mr.files, mr.nReduce, phase, ch)
},
func() {
mr.stats = mr.killWorkers()
mr.stopRPCServer()
})
return
}
func(phase jobPhase) {
ch := make(chan string)
go mr.forwardRegistrations(ch)
schedule(mr.jobName, mr.files, mr.nReduce, phase, ch)
}
func() {
mr.stats = mr.killWorkers()
mr.stopRPCServer()
}

func (mr *Master) schedule(phase jobPhase) {
var ntasks int
var nios int // number of inputs (for reduce) or outputs (for map)
switch phase {
case mapPhase:
ntasks = len(mr.files)
nios = mr.nReduce
case reducePhase:
ntasks = mr.nReduce
nios = len(mr.files)
} debug("Schedule: %v %v tasks (%d I/Os)\n", ntasks, phase, nios) stats := make([]bool, ntasks)
currentWorker := 0 for {
count := ntasks
for i := 0; i < ntasks; i++ {
if !stats[i] {
mr.Lock()
numWorkers := len(mr.workers)
fmt.Println(numWorkers)
if numWorkers==0 {
mr.Unlock()
time.Sleep(time.Second)
continue
}
currentWorker = (currentWorker + 1) % numWorkers
Worker := mr.workers[currentWorker]
mr.Unlock()
var file string
if phase == mapPhase {
file = mr.files[i]
}
args := DoTaskArgs{JobName: mr.jobName, File: file, Phase: phase, TaskNumber: i, NumOtherPhase: nios}
go func(slot int, worker_ string) {
success := call(worker_, "Worker.DoTask", &args, new(struct{}))
if success {
stats[slot] = true
}
}(i, Worker)
} else {
count--
}
}
if count == 0 {
break
}
time.Sleep(time.Second)
}
debug("Schedule: %v phase done\n", phase)
}
go test -run Failure
go run ii.go master sequential pg-*.txt
func mapF(document string, value string) (res []mapreduce.KeyValue) {
words := strings.FieldsFunc(value, func(c rune) bool {
return !unicode.IsLetter(c)
})
WordDocument := make(map[string]string, 0)
for _,word := range words {
WordDocument[word] = document
}
res = make([]mapreduce.KeyValue, 0)
for k,v := range WordDocument {
res = append(res, mapreduce.KeyValue{k, v})
}
return
}
func reduceF(key string, values []string) string {
nDoc := len(values)
sort.Strings(values)
var buf bytes.Buffer;
buf.WriteString(strconv.Itoa(nDoc))
buf.WriteRune(' ')
for i,doc := range values {
buf.WriteString(doc)
if (i != nDoc-1) {
buf.WriteRune(',')
}
}
return buf.String()
}
head -n5 mrtmp.iiseq
A: 16 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
ABC: 2 pg-les_miserables.txt,pg-war_and_peace.txt
ABOUT: 2 pg-moby_dick.txt,pg-tom_sawyer.txt
ABRAHAM: 1 pg-dracula.txt
ABSOLUTE: 1 pg-les_miserables.txt
COS418, Distributed System, Go Language的更多相关文章
- 分布式系统(Distributed System)资料
这个资料关于分布式系统资料,作者写的太好了.拿过来以备用 网址:https://github.com/ty4z2008/Qix/blob/master/ds.md 希望转载的朋友,你可以不用联系我.但 ...
- 译《Time, Clocks, and the Ordering of Events in a Distributed System》
Motivation <Time, Clocks, and the Ordering of Events in a Distributed System>大概是在分布式领域被引用的最多的一 ...
- Aysnc-callback with future in distributed system
Aysnc-callback with future in distributed system
- Note: Time clocks and the ordering of events in a distributed system
http://research.microsoft.com/en-us/um/people/lamport/pubs/time-clocks.pdf 分布式系统的时钟同步是一个非常困难的问题,this ...
- 分布式学习材料Distributed System Prerequisite List
接下的内容按几个大类来列:1. 文件系统a. GFS – The Google File Systemb. HDFS1) The Hadoop Distributed File System2) Th ...
- Notes on Distributed System -- Distributed Hash Table Based On Chord
task: 基于Chord实现一个Hash Table 我负责写Node,队友写SuperNode和Client.总体参考paper[Stoica et al., 2001]上的伪代码 FindSuc ...
- 「2014-2-23」Note on Preliminary Introduction to Distributed System
今天读了几篇分布式相关的内容,记录一下.非经典论文,非系统化阅读,非严谨思考和总结.主要的着眼点在于分布式存储:好处是,跨越单台物理机器的计算和存储能力的限制,防止单点故障(single point ...
- Note on Preliminary Introduction to Distributed System
今天读了几篇分布式相关的内容,记录一下.非经典论文,非系统化阅读,非严谨思考和总结.主要的着眼点在于分布式存储:好处是,跨越单台物理机器的计算和存储能力的限制,防止单点故障(single point ...
- Time, Clocks, and the Ordering of Events in a Distributed System
作者:Leslie Lamport(非常厉害的老头了) 在使用消息进行通信的分布式系统中,使用物理时钟对不同process进行时间同步与事件排序是非常困难的.一是因为不同process的时钟有差异,另 ...
随机推荐
- LQB201803乘积尾零
果然是练思维呀!!要是我的话估计就能挨个算一算呜呜呜 分解成 2和5相乘的式子 #include <iostream> using namespace std; //快速幂运算 int m ...
- 03_Linux定制篇
第十四章 JAVAEE定制篇 搭建JAVAEE环境 14.1 安装JDK 1)先将软件通过xftp5上传到/opt下 2)解压缩到/opt 3)配置环境变量的配置文件vim/etc/profile J ...
- 如何部署redis服务
使用工具 redis-64-3.2.100 部署系统 windows server 2012R2 1.下载安装redis-64-3.2.100安装包,下载地址:https://github.com/m ...
- PHP imageaffine - 返回经过仿射变换后的图像
imageaffine — 返回经过仿射变换后的图像,剪切区域可选.高佣联盟 www.cgewang.com 语法 resource imageaffine ( resource $image , a ...
- 7.12 NOI模拟赛 探险队 期望 博弈 dp 最坏情况下最优策略 可并堆
LINK:探险队 非常难的题目 考试的时候爆零了 完全没有想到到到底怎么做 (当时去刚一道数论题了. 首先考虑清楚一件事情 就是当前是知道整张地图的样子 但是不清楚到底哪条边断了. 所以我们要做的其实 ...
- mongoDB数据库原生配置
最近小冷在工作中使用到了mongoDB数据库,所以就简单的写了个demo,和大家简单分享下,如果大家也有想分享的东西或者需要分享的东西,生活或者其他都行,可以关注小冷公众号秦川以北或者加小冷微信qxy ...
- 使用IDEA生成jar包的步骤(IDEA打jar包)
第一步: 1.把module目录下的MATA-INF文件夹删除,如果没有MATA-INF文件夹则不用删除 2.Ctrl + Alt + Shift + S 打开 Project Structure 窗 ...
- Spring Boot的自动配置,到底是怎么做到?
作者:祖大帅 链接:juejin.im/post/5b679fbc5188251aad213110 来源:掘金 1. Spring Boot.Spring MVC 和 Spring 有什么区别? 分开 ...
- 分类模型的F1-score、Precision和Recall 计算过程
分类模型的F1分值.Precision和Recall 计算过程 引入 通常,我们在评价classifier的性能时使用的是accuracy 考虑在多类分类的背景下 accuracy = (分类正确的样 ...
- 023_go语言中的通道
代码演示 package main import "fmt" func main() { messages := make(chan string) go func() { mes ...