MapReduce的方式进行HBase向HDFS导入和导出

附录代码:

HBase---->HDFS

 import java.io.IOException;

 import org.apache.hadoop.conf.Configuration;

 import org.apache.hadoop.fs.Path;

 import org.apache.hadoop.hbase.HBaseConfiguration;

 import org.apache.hadoop.hbase.client.Result;

 import org.apache.hadoop.hbase.client.Scan;

 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;

 import org.apache.hadoop.hbase.mapreduce.TableMapper;

 import org.apache.hadoop.io.Text;

 import org.apache.hadoop.mapreduce.Job;

 import org.apache.hadoop.mapreduce.Mapper;

 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

 public class HBase2HDFS {

     public static void main(String[] args) throws Exception {

         Configuration conf = HBaseConfiguration.create();

         Job job = Job.getInstance(conf, HBase2HDFS.class.getSimpleName());

         job.setJarByClass(HBase2HDFS.class);

         //MR有输入和输出,输入一般是FileInputFormat等...但是在HBase中需要用到一个特殊的工具类是TableMapReduceUtil

         TableMapReduceUtil.initTableMapperJob(args[0], new Scan(), HBase2HDFSMapper.class,

                                             Text.class, Text.class, job);

         //HBase中的具体操作打到MR的job中.

         TableMapReduceUtil.addDependencyJars(job);

         job.setMapperClass(HBase2HDFSMapper.class);

         job.setMapOutputKeyClass(Text.class);

         job.setMapOutputValueClass(Text.class);

         job.setOutputFormatClass(TextOutputFormat.class);

         FileOutputFormat.setOutputPath(job, new Path(args[1]));

         //FileOutputFormat.setOutputPath(job, new Path("/t1-out"));

         job.setNumReduceTasks(0);

         job.waitForCompletion(true);

     }

     static class HBase2HDFSMapper extends TableMapper<Text, Text>{

         private Text rowKeyText = new Text();

         private Text value = new Text();

         //这个TableMapper中的两个泛型是Map阶段的输出..HBase中的数据要想进入HBase,几乎都用引号引起来.

         //TableMapper是Mapper类的一个子类.这个类用来定义前面的两个泛型参数.

         @Override

         protected void map(

                 ImmutableBytesWritable key,

                 Result result,

                 Mapper<ImmutableBytesWritable, Result, Text, Text>.Context context)

                 throws IOException, InterruptedException {

             //结果都在result对象,用raw方法从result对象中找到数据. 这个raw()方法已经过时了.

             /*

             KeyValue[] raw = result.raw();

             for (KeyValue keyValue : raw) {

                 keyValue.getValue();

             }

             */

             /*

              * 想输出的数据格式如下: 1 zhangsan 13  (行键,name,age)

              *                     2 lisi 14

              */

             //要想精确的获得某一列的值,要根据行键,列族,列的时间戳.

             //getColumnLatestCell 是获得最新的时间戳的值 相当于时间戳已经定义好了.

             byte[] nameBytes = result.getColumnLatestCell("cf".getBytes(), "name".getBytes()).getValue();

             byte[] ageBytes = result.getColumnLatestCell("cf".getBytes(), "age".getBytes()).getValue();

             rowKeyText.set(key.get());

             value.set(new String(nameBytes) + "\t" + new String(ageBytes));

             context.write(new Text(key.get()), value);

             //这里已经把数据搞成了 1 name age 的形式....就不需要写Reduce

         }

     }

 }

HDFS---->HBase 通过MR导入到HBase

 import java.io.IOException;

 import org.apache.hadoop.conf.Configuration;

 import org.apache.hadoop.hbase.HBaseConfiguration;

 import org.apache.hadoop.hbase.client.Mutation;

 import org.apache.hadoop.hbase.client.Put;

 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;

 import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;

 import org.apache.hadoop.hbase.mapreduce.TableReducer;

 import org.apache.hadoop.io.LongWritable;

 import org.apache.hadoop.io.NullWritable;

 import org.apache.hadoop.io.Text;

 import org.apache.hadoop.mapreduce.Job;

 import org.apache.hadoop.mapreduce.Mapper;

 import org.apache.hadoop.mapreduce.Reducer;

 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

 public class HDFS2HBaseImport {

     public static void main(String[] args) throws Exception {

         Configuration conf = HBaseConfiguration.create();

         conf.set(TableOutputFormat.OUTPUT_TABLE, args[0]);

         Job job = Job.getInstance(conf, HDFS2HBaseImport.class.getSimpleName());

         job.setJarByClass(HDFS2HBaseImport.class);

         //数据到底放到哪一张表中,还是要用到TableMapReduceUtil类.

         TableMapReduceUtil.addDependencyJars(job);

         job.setMapperClass(HDFS2HBaseMapper.class);

         job.setMapOutputKeyClass(Text.class);

         job.setMapOutputValueClass(Text.class);

         job.setOutputFormatClass(TextOutputFormat.class);

         job.setReducerClass(HDFS2HBaseReducer.class);

         job.setOutputFormatClass(TableOutputFormat.class);

         FileInputFormat.setInputPaths(job, args[1]);

         job.waitForCompletion(true);

     }

     static class HDFS2HBaseMapper extends Mapper<LongWritable, Text, Text, Text>{

         private Text rowKeyText = new Text();

         private Text value = new Text();

         @Override

         protected void map(LongWritable key, Text text,

                 Mapper<LongWritable, Text, Text, Text>.Context context)

                 throws IOException, InterruptedException {

             String[] splits = text.toString().split("\t");

             rowKeyText.set(splits[0]);

             value.set(splits[1] + "\t" + splits[2]);//name\tage

             context.write(rowKeyText, value);

         }

     }

     //Reduce继承的是和在导出的时候Map extends TableMapper 对应的  因为导入的是HBase中,所以后面的参数用NullWritable代替

     static class HDFS2HBaseReducer extends TableReducer<Text, Text, NullWritable> {

         @Override

         protected void reduce(Text k2, Iterable<Text> v2s,

                 Reducer<Text, Text, NullWritable, Mutation>.Context context)

                 throws IOException, InterruptedException {

             //向HBase中插入数据一定要用到Put对象.

             Put put = new Put(k2.getBytes());

             for (Text text : v2s) {

                 String[] splits = text.toString().split("\t");

                 //加载列和对应的值

                 put.add("cf".getBytes(), "name".getBytes(), splits[0].getBytes());

                 put.add("cf".getBytes(), "age".getBytes(), splits[1].getBytes());

                 context.write(NullWritable.get(), put);//一个参数是key,一个是对应的value.

                 //导入HBase不需要key...直接用NullWritable对象和封装好数据的put对象.

             }

         }

     }

 }

MapReduce的方式进行HBase向HDFS导入和导出的更多相关文章

HBase从hdfs导入数据
需求:将HDFS上的文件中的数据导入到hbase中实现上面的需求也有两种办法,一种是自定义mr,一种是使用hbase提供好的import工具一.hdfs中的数据是这样的每一行的数据是这样的id ...
HBase数据的导入和导出
查阅了几篇中英文资料,发现有的地方说的不是很全部,总结在此,共有两种命令行的方式来实现数据的导入导出功能,即备份和还原. 1 HBase本身提供的接口其调用形式为: 1)导入 ./hbase org ...
HBase 实战(1)--HBase的数据导入方式
前言: 作为Hadoop生态系统中重要的一员, HBase作为分布式列式存储, 在线实时处理的特性, 备受瞩目, 将来能在很多应用场景, 取代传统关系型数据库的江湖地位. 本篇博文重点讲解HBase的 ...
mapreduce方式操作hbase
一.导入数据到hbase 1.配置hbase-site.xml指向hdfs <configuration> <property> <name>hbase.rootd ...
HBase、HDFS和MapReduce架构异同简解
HBase.HDFS和MapReduce架构异同 .. HBase(公司架构模型) HDFS2.0(公司架构模型) MR2.0(公司架构模型) MR1.0(公司架构模型) 中央 HMaster Nam ...
HBase数据快速导入之ImportTsv&Bulkload
导入数据最快的方式,可以略过WAL直接生产底层HFile文件 (环境:centos6.5.Hadoop2.6.0.HBase0.98.9) 1.SHELL方式 1.1 ImportTsv直接导入命令 ...
Sqoop_mysql,hive,hdfs导入导出操作
前言: 搭建环境,这里使用cdh版hadoop+hive+sqoop+mysql 下载 hadoop-2.5.0-cdh5.3.6.tar.gz hive-0.13.1-cdh5.3.6.tar.gz ...
HBase -- 基于HDFS的开源分布式NoSQL数据库
HBase(Hadoop Database)是一个高可靠性.高性能.面向列.可伸缩的分布式存储系统,我们可以利用HBase技术在廉价的PC上搭建起大规模结构化存储集群.同Google的Bigtable ...
HBase(三): Azure HDInsigt HBase表数据导入本地HBase
目录: hdfs 命令操作本地 hbase Azure HDInsight HBase表数据导入本地 hbase hdfs命令操作本地hbase: 参见 HDP2.4安装(五):集群及组件安装 , ...

随机推荐

redis的使用
phpredis是php的一个扩展,效率是相当高有链表排序功能,对创建内存级的模块业务关系很有用;以下是redis官方提供的命令使用技巧: 下载地址如下: https://github.com/ow ...
mongodb基础系列——数据库查询数据返回前台JSP（二）
上篇博客论述了,数据库查询数据返回前台JSP.博客中主要使用Ajax调用来显示JSON串,来获取其中某一个字段,赋给界面中的某一个控件. 那这篇博客中,我们讲解,把后台List传递JSP展示. Lis ...
Oracle DB 执行用户管理的备份和恢复
• 说明用户管理的备份和恢复与服务器管理的备份和恢复之间的差异 • 执行用户管理的数据库完全恢复 • 执行用户管理的数据库不完全恢复备份和恢复的使用类型数据库备份和恢复的类型包括: • 用户管理 ...
matlab和FPGA中无符号数和有符号数的转化（转）
在FPGA 设计过程中经常会遇到关于数表示之间的转化问题,最常见的是无符号数和有符号数之间的转化问题.(1)在FPGA设计过程中,能够很直接的看出数字的位宽,但经常以无符号数的形式输出,在后继的处理中 ...
windbg命令分类与概述
WinDBG的大多数功能是以命令方式工作的, 本系列将介绍WinDBG的三类命令, 标准命令, 元命令和扩展命令. =============== 标准命令 =============== 标准命令用 ...
Installation Directory must be on a local hard drive解决办法
今天带着公司的电脑来杭州这边,同事发来一个Sliksubversion.msi来进行安装,由于系统是win8.1的,直接点击安装不了,真的是醉了,于是乎发挥度娘的力量找到了答案,这里贴出来,供大家来参 ...
Oracle新建用户、角色，授权，建表空间
oracle数据库的权限系统分为系统权限与对象权限.系统权限( database system privilege )可以让用户执行特定的命令集.例如,create table权限允许用户创建表,gr ...
怎么修改电脑MAC地址电脑MAC地址修改图文教程
本文转载:http://www.45fan.com/a/Router/2677.html MAC地址是指电脑网卡的硬件地址,此地址一般烧录在网卡上.MAC地址工作在OSI七层模型的第二层,即数据链接层 ...
iOS开发——UI_swift篇&UITableView实现单元格展开与隐藏
UITableView实现单元格展开与隐藏关于UITableView的展开的收缩在前面的文章我已经结束,就是使用代理,通知,block传值的时候实现的,当时是使用一个Bool值来实现,最后使用着三 ...
android151 笔记
13. 14 .什么是Service以及描述下它的生命周期.Service有哪些启动方法,有什么区别,怎样停用Service? 在Service的生命周期中,被回调的方法比Activity少一些,只有 ...

MapReduce的方式进行HBase向HDFS导入和导出

MapReduce的方式进行HBase向HDFS导入和导出的更多相关文章

随机推荐

热门专题