
chile    parent
Tom    Lucy
Tom    Jack
Jone    Lucy
Jone    Jack
Lucy    Mary
Lucy    Ben
Jack    Alice
Jack    Jesse
Terry    Alice
Terry    Jesse
Philip    Terry
Philip    Alima
Mark    Terry
Mark    Alma
package com.stjoin;

import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser; public class STjoin {
public static int time = 0;
public static class Map extends Mapper<Object, Text, Text, Text>{
public void map(Object key,Text value,Context context)throws IOException,InterruptedException{
String childname = new String();
String parentname = new String();
String relationtype = new String();
String line = value.toString();
int i = 0;
while(line.charAt(i) != ' '){
String[] values = {line.substring(0, i),line.substring(i+1)};
System.out.println("child:"+values[0]+" parent:"+values[1]);
if(values[0].compareTo("child") != 0){//如果是child,则为0,否则为-1
context.write(new Text(values[1]),new Text(relationtype+"+"+childname+"+"+parentname));
System.out.println("key:"+values[1]+" value: "+relationtype+"+"+childname+"+"+parentname);
relationtype = "2";
context.write(new Text(values[0]), new Text(relationtype+"+"+childname+"+"+parentname));
System.out.println("key:"+values[0]+" value: "+relationtype+"+"+childname+"+"+parentname);
public static class Reduce extends Reducer<Text, Text, Text, Text>{
public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
System.out.println("key:"+key+" values:"+values);
context.write(new Text("grandchild"), new Text("grandparent"));
int grandchildnum = 0;
String grandchild[] = new String[10];
int grandparentnum = 0;
String grandparent[] = new String[10]; Iterator ite = values.iterator();
String record = ite.next().toString();
System.out.println("record: "+record); int len = record.length();
int i = 2;
if(len==0) continue;
char relationtype = record.charAt(0);
String childname = new String();
String parentname = new String();
childname = childname + record.charAt(i);
System.out.println("childname: "+childname);
System.out.println("parentname: "+parentname);
if (relationtype=='1') {
grandchild[grandchildnum] = childname;
for(int m = 0 ; m < grandchildnum ; m++){
for(int n = 0 ; n < grandparentnum; n++){
context.write(new Text(grandchild[m]), new Text(grandparent[n]));
System.out.println("grandchild: "+grandchild[m]+" grandparent: "+grandparent[n]);
} public static void main(String [] args)throws Exception{
Configuration conf = new Configuration();
String otherArgs[] = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length != 2){
System.err.println("Usage: sort<in><out>");
Job job = new Job(conf,"single table join");
job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job,new Path(otherArgs[1])); System.exit(job.waitForCompletion(true)? 0 : 1);


package com.stjoin;

import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser; public class STjoin {
public static int time = 0; public static class Map extends Mapper<Object, Text, Text, Text>{
public void map(Object key,Text value,Context context)throws IOException,InterruptedException{
String relationtype = new String();
String line = value.toString();
int i = 0;
/* while(line.charAt(i) != ' '){
String[] values = {line.substring(0, i),line.substring(i+1)};
String[] values = new String[10];
StringTokenizer itr = new StringTokenizer(line);
values[i] = itr.nextToken();
i = i+1;
} System.out.println("child:"+values[0]+" parent:"+values[1]);
if(values[0].compareTo("child") != 0){//如果是child,则为0,否则为-1 relationtype="1";
context.write(new Text(values[1]),new Text(relationtype+"+"+values[0]));
System.out.println("key:"+values[1]+" value: "+relationtype+"+"+values[0]);
relationtype = "2";
context.write(new Text(values[0]), new Text(relationtype+"+"+values[1]));
System.out.println("key:"+values[0]+" value: "+relationtype+"+"+values[1]);
} public static class Reduce extends Reducer<Text, Text, Text, Text>{
public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
System.out.println("key:"+key+" values:"+values);
context.write(new Text("grandchild"), new Text("grandparent"));
int grandchildnum = 0;
String grandchild[] = new String[10];
int grandparentnum = 0;
String grandparent[] = new String[10]; String name = new String();
// Iterator ite = values.iterator();
// while(ite.hasNext()){ //遍历方法二:用for循环
for(Text val : values){
// String record = ite.next().toString();
String record = val.toString();
System.out.println("record: "+record); int i = 2;
char relationtype = record.charAt(0);
name = record.substring(i); System.out.println("name: "+name); if (relationtype=='1') {
grandchild[grandchildnum] = name;
for(int m = 0 ; m < grandchildnum ; m++){
for(int n = 0 ; n < grandparentnum; n++){
context.write(new Text(grandchild[m]), new Text(grandparent[n]));
System.out.println("grandchild: "+grandchild[m]+" grandparent: "+grandparent[n]);
public static void main(String [] args)throws Exception{
Configuration conf = new Configuration();
String otherArgs[] = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length != 2){
System.err.println("Usage: sort<in><out>");
Job job = new Job(conf,"single table join");
job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job,new Path(otherArgs[1])); System.exit(job.waitForCompletion(true)? 0 : 1);
14/09/22 20:31:48 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
14/09/22 20:31:48 WARN mapred.JobClient: No job jar file set.  User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
14/09/22 20:31:48 INFO input.FileInputFormat: Total input paths to process : 1
14/09/22 20:31:48 WARN snappy.LoadSnappy: Snappy native library not loaded
14/09/22 20:31:48 INFO mapred.JobClient: Running job: job_local_0001
14/09/22 20:31:48 INFO util.ProcessTree: setsid exited with exit code 0
14/09/22 20:31:48 INFO mapred.Task:  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@1a430c37
14/09/22 20:31:48 INFO mapred.MapTask: io.sort.mb = 100
14/09/22 20:31:48 INFO mapred.MapTask: data buffer = 79691776/99614720
14/09/22 20:31:48 INFO mapred.MapTask: record buffer = 262144/327680
child:child  parent:parent
child:Tom  parent:Lucy
key:Lucy  value: 1+Tom+Lucy
key:Tom  value: 2+Tom+Lucy
child:Tom  parent:Jack
key:Jack  value: 1+Tom+Jack
key:Tom  value: 2+Tom+Jack
child:Jone  parent:Lucy
key:Lucy  value: 1+Jone+Lucy
key:Jone  value: 2+Jone+Lucy
child:Jone  parent:Jack
key:Jack  value: 1+Jone+Jack
key:Jone  value: 2+Jone+Jack
child:Lucy  parent:Mary
key:Mary  value: 1+Lucy+Mary
key:Lucy  value: 2+Lucy+Mary
child:Lucy  parent:Ben
key:Ben  value: 1+Lucy+Ben
key:Lucy  value: 2+Lucy+Ben
child:Jack  parent:Alice
key:Alice  value: 1+Jack+Alice
14/09/22 20:31:49 INFO mapred.MapTask: Starting flush of map output
key:Jack  value: 2+Jack+Alice
child:Jack  parent:Jesse
key:Jesse  value: 1+Jack+Jesse
key:Jack  value: 2+Jack+Jesse
child:Terry  parent:Alice
key:Alice  value: 1+Terry+Alice
key:Terry  value: 2+Terry+Alice
child:Terry  parent:Jesse
key:Jesse  value: 1+Terry+Jesse
key:Terry  value: 2+Terry+Jesse
child:Philip  parent:Terry
key:Terry  value: 1+Philip+Terry
key:Philip  value: 2+Philip+Terry
child:Philip  parent:Alima
key:Alima  value: 1+Philip+Alima
key:Philip  value: 2+Philip+Alima
child:Mark  parent:Terry
key:Terry  value: 1+Mark+Terry
key:Mark  value: 2+Mark+Terry
child:Mark  parent:Alma
key:Alma  value: 1+Mark+Alma
key:Mark  value: 2+Mark+Alma
14/09/22 20:31:49 INFO mapred.MapTask: Finished spill 0
14/09/22 20:31:49 INFO mapred.Task: Task:attempt_local_0001_m_000000_0 is done. And is in the process of commiting
14/09/22 20:31:49 INFO mapred.JobClient:  map 0% reduce 0%
14/09/22 20:31:51 INFO mapred.LocalJobRunner: 
14/09/22 20:31:51 INFO mapred.Task: Task 'attempt_local_0001_m_000000_0' done.
14/09/22 20:31:51 INFO mapred.Task:  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@5c448d98
14/09/22 20:31:51 INFO mapred.LocalJobRunner: 
14/09/22 20:31:51 INFO mapred.Merger: Merging 1 sorted segments
14/09/22 20:31:51 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 564 bytes
14/09/22 20:31:51 INFO mapred.LocalJobRunner: 
key:Alice  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Jack+Alice
childname: Jack
parentname: Alice
record: 1+Terry+Alice
childname: Terry
parentname: Alice
key:Alima  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Philip+Alima
childname: Philip
parentname: Alima
key:Alma  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Mark+Alma
childname: Mark
parentname: Alma
key:Ben  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Lucy+Ben
childname: Lucy
parentname: Ben
key:Jack  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Jack+Alice
childname: Jack
parentname: Alice
record: 2+Jack+Jesse
childname: Jack
parentname: Jesse
record: 1+Tom+Jack
childname: Tom
parentname: Jack
record: 1+Jone+Jack
childname: Jone
parentname: Jack
grandchild: Tom  grandparent: Alice
grandchild: Tom  grandparent: Jesse
grandchild: Jone  grandparent: Alice
grandchild: Jone  grandparent: Jesse
key:Jesse  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Jack+Jesse
childname: Jack
parentname: Jesse
record: 1+Terry+Jesse
childname: Terry
parentname: Jesse
key:Jone  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Jone+Lucy
childname: Jone
parentname: Lucy
record: 2+Jone+Jack
childname: Jone
parentname: Jack
key:Lucy  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Tom+Lucy
childname: Tom
parentname: Lucy
record: 1+Jone+Lucy
childname: Jone
parentname: Lucy
record: 2+Lucy+Mary
childname: Lucy
parentname: Mary
record: 2+Lucy+Ben
childname: Lucy
parentname: Ben
grandchild: Tom  grandparent: Mary
grandchild: Tom  grandparent: Ben
grandchild: Jone  grandparent: Mary
grandchild: Jone  grandparent: Ben
key:Mark  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Mark+Terry
childname: Mark
parentname: Terry
record: 2+Mark+Alma
childname: Mark
parentname: Alma
key:Mary  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Lucy+Mary
childname: Lucy
parentname: Mary
key:Philip  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Philip+Terry
childname: Philip
parentname: Terry
record: 2+Philip+Alima
childname: Philip
parentname: Alima
key:Terry  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Terry+Alice
childname: Terry
parentname: Alice
record: 2+Terry+Jesse
childname: Terry
parentname: Jesse
record: 1+Philip+Terry
childname: Philip
parentname: Terry
record: 1+Mark+Terry
childname: Mark
parentname: Terry
grandchild: Philip  grandparent: Alice
grandchild: Philip  grandparent: Jesse
grandchild: Mark  grandparent: Alice
grandchild: Mark  grandparent: Jesse
key:Tom  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Tom+Jack
childname: Tom
parentname: Jack
record: 2+Tom+Lucy
childname: Tom
parentname: Lucy
14/09/22 20:31:52 INFO mapred.Task: Task:attempt_local_0001_r_000000_0 is done. And is in the process of commiting
14/09/22 20:31:52 INFO mapred.LocalJobRunner: 
14/09/22 20:31:52 INFO mapred.Task: Task attempt_local_0001_r_000000_0 is allowed to commit now
14/09/22 20:31:52 INFO output.FileOutputCommitter: Saved output of task 'attempt_local_0001_r_000000_0' to hdfs://localhost:9000/user/hadoop/stjoin_output07
14/09/22 20:31:52 INFO mapred.JobClient:  map 100% reduce 0%
14/09/22 20:31:54 INFO mapred.LocalJobRunner: reduce > reduce
14/09/22 20:31:54 INFO mapred.Task: Task 'attempt_local_0001_r_000000_0' done.
14/09/22 20:31:55 INFO mapred.JobClient:  map 100% reduce 100%
14/09/22 20:31:55 INFO mapred.JobClient: Job complete: job_local_0001
14/09/22 20:31:55 INFO mapred.JobClient: Counters: 22
14/09/22 20:31:55 INFO mapred.JobClient:   Map-Reduce Framework
14/09/22 20:31:55 INFO mapred.JobClient:     Spilled Records=56
14/09/22 20:31:55 INFO mapred.JobClient:     Map output materialized bytes=568
14/09/22 20:31:55 INFO mapred.JobClient:     Reduce input records=28
14/09/22 20:31:55 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=0
14/09/22 20:31:55 INFO mapred.JobClient:     Map input records=15
14/09/22 20:31:55 INFO mapred.JobClient:     SPLIT_RAW_BYTES=117
14/09/22 20:31:55 INFO mapred.JobClient:     Map output bytes=506
14/09/22 20:31:55 INFO mapred.JobClient:     Reduce shuffle bytes=0
14/09/22 20:31:55 INFO mapred.JobClient:     Physical memory (bytes) snapshot=0
14/09/22 20:31:55 INFO mapred.JobClient:     Reduce input groups=13
14/09/22 20:31:55 INFO mapred.JobClient:     Combine output records=0
14/09/22 20:31:55 INFO mapred.JobClient:     Reduce output records=13
14/09/22 20:31:55 INFO mapred.JobClient:     Map output records=28
14/09/22 20:31:55 INFO mapred.JobClient:     Combine input records=0
14/09/22 20:31:55 INFO mapred.JobClient:     CPU time spent (ms)=0
14/09/22 20:31:55 INFO mapred.JobClient:     Total committed heap usage (bytes)=408420352
14/09/22 20:31:55 INFO mapred.JobClient:   File Input Format Counters 
14/09/22 20:31:55 INFO mapred.JobClient:     Bytes Read=163
14/09/22 20:31:55 INFO mapred.JobClient:   FileSystemCounters
14/09/22 20:31:55 INFO mapred.JobClient:     HDFS_BYTES_READ=326
14/09/22 20:31:55 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=81802
14/09/22 20:31:55 INFO mapred.JobClient:     FILE_BYTES_READ=912
14/09/22 20:31:55 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=149
14/09/22 20:31:55 INFO mapred.JobClient:   File Output Format Counters 
14/09/22 20:31:55 INFO mapred.JobClient:     Bytes Written=149
14/09/22 20:26:02 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
14/09/22 20:26:02 WARN mapred.JobClient: No job jar file set.  User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
14/09/22 20:26:02 INFO input.FileInputFormat: Total input paths to process : 1
14/09/22 20:26:02 WARN snappy.LoadSnappy: Snappy native library not loaded
14/09/22 20:26:03 INFO mapred.JobClient: Running job: job_local_0001
14/09/22 20:26:03 INFO util.ProcessTree: setsid exited with exit code 0
14/09/22 20:26:03 INFO mapred.Task:  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@3b8c40d0
14/09/22 20:26:03 INFO mapred.MapTask: io.sort.mb = 100
14/09/22 20:26:03 INFO mapred.MapTask: data buffer = 79691776/99614720
14/09/22 20:26:03 INFO mapred.MapTask: record buffer = 262144/327680
child:child  parent:parent
child:Tom  parent:Lucy
key:Lucy  value: 1+Tom
key:Tom  value: 2+Lucy
child:Tom  parent:Jack
key:Jack  value: 1+Tom
key:Tom  value: 2+Jack
child:Jone  parent:Lucy
key:Lucy  value: 1+Jone
key:Jone  value: 2+Lucy
child:Jone  parent:Jack
key:Jack  value: 1+Jone
key:Jone  value: 2+Jack
child:Lucy  parent:Mary
key:Mary  value: 1+Lucy
key:Lucy  value: 2+Mary
child:Lucy  parent:Ben
key:Ben  value: 1+Lucy
key:Lucy  value: 2+Ben
child:Jack  parent:Alice
key:Alice  value: 1+Jack
key:Jack  value: 2+Alice
child:Jack  parent:Jesse
key:Jesse  value: 1+Jack
key:Jack  value: 2+Jesse
child:Terry  parent:Alice
key:Alice  value: 1+Terry
key:Terry  value: 2+Alice
child:Terry  parent:Jesse
key:Jesse  value: 1+Terry
key:Terry  value: 2+Jesse
child:Philip  parent:Terry
key:Terry  value: 1+Philip
key:Philip  value: 2+Terry
child:Philip  parent:Alima
key:Alima  value: 1+Philip
key:Philip  value: 2+Alima
child:Mark  parent:Terry
key:Terry  value: 1+Mark
key:Mark  value: 2+Terry
child:Mark  parent:Alma
key:Alma  value: 1+Mark
key:Mark  value: 2+Alma
14/09/22 20:26:03 INFO mapred.MapTask: Starting flush of map output
14/09/22 20:26:03 INFO mapred.MapTask: Finished spill 0
14/09/22 20:26:03 INFO mapred.Task: Task:attempt_local_0001_m_000000_0 is done. And is in the process of commiting
14/09/22 20:26:04 INFO mapred.JobClient:  map 0% reduce 0%
14/09/22 20:26:06 INFO mapred.LocalJobRunner: 
14/09/22 20:26:06 INFO mapred.Task: Task 'attempt_local_0001_m_000000_0' done.
14/09/22 20:26:06 INFO mapred.Task:  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@4eba27a5
14/09/22 20:26:06 INFO mapred.LocalJobRunner: 
14/09/22 20:26:06 INFO mapred.Merger: Merging 1 sorted segments
14/09/22 20:26:06 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 414 bytes
14/09/22 20:26:06 INFO mapred.LocalJobRunner: 
key:Alice  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Jack
name: Jack
record: 1+Terry
name: Terry
key:Alima  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Philip
name: Philip
key:Alma  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Mark
name: Mark
key:Ben  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Lucy
name: Lucy
key:Jack  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Alice
name: Alice
record: 2+Jesse
name: Jesse
record: 1+Tom
name: Tom
record: 1+Jone
name: Jone
grandchild: Tom  grandparent: Alice
grandchild: Tom  grandparent: Jesse
grandchild: Jone  grandparent: Alice
grandchild: Jone  grandparent: Jesse
key:Jesse  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Jack
name: Jack
record: 1+Terry
name: Terry
key:Jone  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Lucy
name: Lucy
record: 2+Jack
name: Jack
key:Lucy  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Tom
name: Tom
record: 1+Jone
name: Jone
record: 2+Mary
name: Mary
record: 2+Ben
name: Ben
grandchild: Tom  grandparent: Mary
grandchild: Tom  grandparent: Ben
grandchild: Jone  grandparent: Mary
grandchild: Jone  grandparent: Ben
key:Mark  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Terry
name: Terry
record: 2+Alma
name: Alma
key:Mary  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Lucy
name: Lucy
key:Philip  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Terry
name: Terry
record: 2+Alima
name: Alima
key:Terry  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Alice
name: Alice
record: 2+Jesse
name: Jesse
record: 1+Philip
name: Philip
record: 1+Mark
name: Mark
grandchild: Philip  grandparent: Alice
grandchild: Philip  grandparent: Jesse
grandchild: Mark  grandparent: Alice
grandchild: Mark  grandparent: Jesse
key:Tom  values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Jack
name: Jack
record: 2+Lucy
name: Lucy
14/09/22 20:26:06 INFO mapred.Task: Task:attempt_local_0001_r_000000_0 is done. And is in the process of commiting
14/09/22 20:26:06 INFO mapred.LocalJobRunner: 
14/09/22 20:26:06 INFO mapred.Task: Task attempt_local_0001_r_000000_0 is allowed to commit now
14/09/22 20:26:06 INFO output.FileOutputCommitter: Saved output of task 'attempt_local_0001_r_000000_0' to hdfs://localhost:9000/user/hadoop/stjoin_output06
14/09/22 20:26:07 INFO mapred.JobClient:  map 100% reduce 0%
14/09/22 20:26:09 INFO mapred.LocalJobRunner: reduce > reduce
14/09/22 20:26:09 INFO mapred.Task: Task 'attempt_local_0001_r_000000_0' done.
14/09/22 20:26:10 INFO mapred.JobClient:  map 100% reduce 100%
14/09/22 20:26:10 INFO mapred.JobClient: Job complete: job_local_0001
14/09/22 20:26:10 INFO mapred.JobClient: Counters: 22
14/09/22 20:26:10 INFO mapred.JobClient:   Map-Reduce Framework
14/09/22 20:26:10 INFO mapred.JobClient:     Spilled Records=56
14/09/22 20:26:10 INFO mapred.JobClient:     Map output materialized bytes=418
14/09/22 20:26:10 INFO mapred.JobClient:     Reduce input records=28
14/09/22 20:26:10 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=0
14/09/22 20:26:10 INFO mapred.JobClient:     Map input records=15
14/09/22 20:26:10 INFO mapred.JobClient:     SPLIT_RAW_BYTES=117
14/09/22 20:26:10 INFO mapred.JobClient:     Map output bytes=356
14/09/22 20:26:10 INFO mapred.JobClient:     Reduce shuffle bytes=0
14/09/22 20:26:10 INFO mapred.JobClient:     Physical memory (bytes) snapshot=0
14/09/22 20:26:10 INFO mapred.JobClient:     Reduce input groups=13
14/09/22 20:26:10 INFO mapred.JobClient:     Combine output records=0
14/09/22 20:26:10 INFO mapred.JobClient:     Reduce output records=13
14/09/22 20:26:10 INFO mapred.JobClient:     Map output records=28
14/09/22 20:26:10 INFO mapred.JobClient:     Combine input records=0
14/09/22 20:26:10 INFO mapred.JobClient:     CPU time spent (ms)=0
14/09/22 20:26:10 INFO mapred.JobClient:     Total committed heap usage (bytes)=406847488
14/09/22 20:26:10 INFO mapred.JobClient:   File Input Format Counters 
14/09/22 20:26:10 INFO mapred.JobClient:     Bytes Read=163
14/09/22 20:26:10 INFO mapred.JobClient:   FileSystemCounters
14/09/22 20:26:10 INFO mapred.JobClient:     HDFS_BYTES_READ=326
14/09/22 20:26:10 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=81502
14/09/22 20:26:10 INFO mapred.JobClient:     FILE_BYTES_READ=762
14/09/22 20:26:10 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=149
14/09/22 20:26:10 INFO mapred.JobClient:   File Output Format Counters 
14/09/22 20:26:10 INFO mapred.JobClient:     Bytes Written=149
grandchild    grandparent
Tom    Alice
Tom    Jesse
Jone    Alice
Jone    Jesse
Tom    Mary
Tom    Ben
Jone    Mary
Jone    Ben
Philip    Alice
Philip    Jesse
Mark    Alice
Mark    Jesse

MapReduce编程系列 — 5:单表关联的更多相关文章

  1. MapReduce应用案例--单表关联

    1. 实例描述 单表关联这个实例要求从给出的数据中寻找出所关心的数据,它是对原始数据所包含信息的挖掘. 实例中给出child-parent 表, 求出grandchild-grandparent表. ...

  2. Hadoop on Mac with IntelliJ IDEA - 8 单表关联NullPointerException

    简化陆喜恒. Hadoop实战(第2版)5.4单表关联的代码时遇到空指向异常,经分析是逻辑问题,在此做个记录. 环境:Mac OS X 10.9.5, IntelliJ IDEA 13.1.5, Ha ...

  3. Hadoop 单表关联

    前面的实例都是在数据上进行一些简单的处理,为进一步的操作打基础.单表关联这个实例要求从给出的数据中寻找到所关心的数据,它是对原始数据所包含信息的挖掘.下面进入这个实例. 1.实例描述 实例中给出chi ...

  4. MapRedece(单表关联)

    源数据:Child--Parent表 Tom Lucy Tom Jack Jone Lucy Jone Jack Lucy Marry Lucy Ben Jack Alice Jack Jesse T ...

  5. MR案例:单表关联查询

    "单表关联"这个实例要求从给出的数据中寻找所关心的数据,它是对原始数据所包含信息的挖掘. 需求:实例中给出 child-parent(孩子—父母)表,要求输出 grandchild ...

  6. MySQL 性能优化系列之一 单表预处理

    MySQL 性能优化系列之一 单表预处理 背景介绍 我们经常在写多表关联的SQL时,会想到 left jion(左关联),right jion(右关联),inner jion(内关联)等. 但是,当表 ...

  7. MySql系列之单表查询

    单表查询的语法 SELECT 字段1,字段2... FROM 表名 WHERE 条件 GROUP BY field HAVING 筛选 ORDER BY field LIMIT 限制条数 关键字的执行 ...

  8. MapReduce编程系列 — 6:多表关联

    1.项目名称: 2.程序代码: 版本一(详细版): package com.mtjoin; import java.io.IOException; import java.util.Iterator; ...

  9. 【原创】MapReduce编程系列之表连接

    问题描述 需要连接的表如下:其中左边是child,右边是parent,我们要做的是找出grandchild和grandparent的对应关系,为此需要进行表的连接. Tom Lucy Tom Jim ...


  1. linux 进程控制笔记

    进程创建 普通函数调用完成后,最多返回(return)一次,但fork/vfork会返回二次,一次返回给父进程,一次返回给子进程 父进程的返回值为子进程的进程ID,子进程的返回值为0 1.pid_t ...

  2. oracle 外部表

    CREATE TABLE "EXT_ENTRY_WORKFLOW" ( ), ), "CREATE_DATE" DATE, ), ), ), ), ), ), ...

  3. C# WPF打印报表

    前天我的一个同学由于打印报表而苦恼,所以就介绍了一下WPF的打印报表,希望能帮助到大家. 展示报表 1. 首先新建项“报表”,选定项目,右击,点击“添加”->“新建项”->“报表”

  4. Django 学习笔记之三 数据库输入数据

    假设建立了django_blog项目,建立blog的app ,在models.py里面增加了Blog类,同步数据库,并且建立了对应的表.具体的参照Django 学习笔记之二的相关命令. 那么这篇主要介 ...

  5. matlab和本机MySQL链接

    1.安装好 ***matlab*** 和 ***mysql***: 2.[下载](http://dev.mysql.com/downloads/connector/j/#downloads) mysq ...

  6. JAVA里的String、Timestamp、Date相互转换(转)

    转自:http://blog.sina.com.cn/s/blog_6675493d0100lbfl.html Timestamp转化为String: SimpleDateFormat df = ne ...

  7. Objective-C传递数据小技巧

    转自:http://www.guokr.com/blog/203413/ 比如说,如果你想向UIAlertView的delegate方法中传递一些信息,怎么办?继承UIAlertView么?使用Cat ...

  8. 01-06-01【Nhibernate (版本3.3.1.4000) 出入江湖】事务

    Nhibernate事务的使用: public void Add(Customer customer) { ISession session = _sessionManager.GetSession( ...

  9. dom4j处理xml文件,读取xml字符串,格式化xml文件

    1.xml文件 <?xml version="1.0" encoding="UTF-8"?> <employees> <emplo ...

  10. Spring @ Component 的作用

    1.@controller 控制器(注入服务) 2.@service 服务(注入dao) 3.@repository dao(实现dao访问) 4.@component (把普通pojo实例化到spr ...