使用MapReduce将HDFS数据导入到HBase（三）

使用MapReduce生成HFile文件，通过BulkLoader方式（跳过WAL验证）批量加载到HBase表中

package com.mengyao.bigdata.hbase;

import java.io.IOException;

import org.apache.commons.codec.digest.DigestUtils;

import org.apache.commons.lang.StringUtils;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.TableName;

import org.apache.hadoop.hbase.client.Connection;

import org.apache.hadoop.hbase.client.ConnectionFactory;

import org.apache.hadoop.hbase.client.HTable;

import org.apache.hadoop.hbase.client.Put;

import org.apache.hadoop.hbase.client.Table;

import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;

import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;

import org.apache.hadoop.hbase.util.Bytes;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**

 *

 * @author mengyao
 * HBase-1.0.1.1、Hadoop-2.6.0

 *

 */

public class BulkLoadApp {

    private static Configuration conf = HBaseConfiguration.create();

    private static String inPath;

    private static String outPath;

    private static String tableName;

    static {

        conf.set("hbase.zookeeper.quorum", "bdata200,bdata202,bdata203");

        conf.set("hbase.zookeeper.property.clientPort", "2181");

    }

    static class BulkLoadMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {

        private ImmutableBytesWritable row;

        @Override

        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            String line = value.toString();

            //id,username,email,birthday,mobile,phone,modified

            String[] fields = line.split("\t");

            String id = fields[0];

            String username = fields[1];

            String mail = fields[2];

            String birthday = fields[3];

            String mobile = fields[4];

            String phone = fields[5];

            String regtime = fields[6];

            String rowKey = DigestUtils.md5Hex(id);

            row = new ImmutableBytesWritable(Bytes.toBytes(rowKey));

            Put put = new Put(Bytes.toBytes(rowKey), System.currentTimeMillis());

            if (!StringUtils.isEmpty(id)) {

                put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("id"), Bytes.toBytes(id));

            }

            if (!StringUtils.isEmpty(username)) {

                put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("username"), Bytes.toBytes(username));

            }

            if (!StringUtils.isEmpty(mail)) {

                put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("mail"), Bytes.toBytes(mail));

            }

            if (!StringUtils.isEmpty(birthday) || !birthday.equals("0000-00-00")) {

                put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("birthday"), Bytes.toBytes(birthday));

            }

            if (!StringUtils.isEmpty(mobile)) {

                put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("mobile"), Bytes.toBytes(mobile));

            }

            if (!StringUtils.isEmpty(phone)) {

                put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("phone"), Bytes.toBytes(phone));

            }

            if (!StringUtils.isEmpty(regtime)) {

                put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("modified"), Bytes.toBytes(regtime));

            }

            context.write(row, put);

        }

    }

    static int createJob(String[] args) throws Exception {

        inPath = args[0];

        outPath = args[1];

        tableName = args[2];

        Connection connection = ConnectionFactory.createConnection(conf);

        Table table = connection.getTable(TableName.valueOf(tableName));

        Job job=Job.getInstance(conf);

        job.setJarByClass(BulkLoadApp.class);

        job.setMapperClass(BulkLoadMapper.class);

        job.setNumReduceTasks(0);

        job.setMapOutputKeyClass(ImmutableBytesWritable.class);

        job.setMapOutputValueClass(Put.class);

        job.setOutputFormatClass(HFileOutputFormat2.class);

        HFileOutputFormat2.configureIncrementalLoad(job, table, connection.getRegionLocator(TableName.valueOf(tableName)));

        FileInputFormat.addInputPath(job,new Path(inPath));

        FileOutputFormat.setOutputPath(job,new Path(outPath));

        return job.waitForCompletion(true)?0:1;

    }

    /**

     * use commond:

     *         1、hadoop jar MyJar INPUT_FILE OUTPUT_DIR TABLE_NAME

     *             hadoop jar bigdata.jar /tag/data/user/haier_user.csv /tag/data/user/haier_user_out tbl_shopuser

     *         2、hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles OUTPUT_DIR TABLE_NAME

     *             hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles /tag/data/user/haier_user_out tbl_shopuser

     * @param args

     * @throws Exception

     */

    @SuppressWarnings("deprecation")

    public static void main(String[] args) throws Exception {

        if (args.length!=3) {

            System.out.println("Usage: "+BulkLoadApp.class.getName()+" Input paramters <INPUT_PATH> <OUTPUT_PATH> <TABLE_NAME>");

        } else {

            int status = createJob(args);

            if (status == 0) {

                LoadIncrementalHFiles loadHFiles = new LoadIncrementalHFiles(conf);

                loadHFiles.doBulkLoad(new Path(outPath), new HTable(conf, Bytes.toBytes(tableName)));

            }

            System.exit(status);

        }

    }

}

使用MapReduce将HDFS数据导入到HBase（三）的更多相关文章

使用MapReduce将HDFS数据导入到HBase（二）
package com.bank.service; import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf. ...
使用MapReduce将HDFS数据导入到HBase（一）
package com.bank.service; import java.io.IOException; import org.apache.hadoop.conf.Configuration;im ...
使用MapReduce将HDFS数据导入Mysql
使用MapReduce将Mysql数据导入HDFS代码链接将HDFS数据导入Mysql,代码示例 package com.zhen.mysqlToHDFS; import java.io.DataI ...
使用MapReduce将mysql数据导入HDFS
package com.zhen.mysqlToHDFS; import java.io.DataInput; import java.io.DataOutput; import java.io.IO ...
用mapreduce读取hdfs数据到hbase上
hdfs数据到hbase过程将HDFS上的文件中的数据导入到hbase中实现上面的需求也有两种办法,一种是自定义mr,一种是使用hbase提供好的import工具 hbase先创建好表 cre ...
HBase(三): Azure HDInsigt HBase表数据导入本地HBase
目录: hdfs 命令操作本地 hbase Azure HDInsight HBase表数据导入本地 hbase hdfs命令操作本地hbase: 参见 HDP2.4安装(五):集群及组件安装 , ...
将Excel中数据导入数据库（三）
上篇文章将Excel中数据导入数据库时,将从Excel读入的数据均转换成了数据库相应字段的类型,其实这是没有必要的,因为对于数据库各种类型的插入,均可以字符串格式插入.比如表WQ_SWMSAR_A字段 ...
HBase结合MapReduce批量导入（HDFS中的数据导入到HBase）
HBase结合MapReduce批量导入 package hbase; import java.text.SimpleDateFormat; import java.util.Date; import ...
把hdfs数据写入到hbase表
功能:把hdfs上的数据写入到hbase表. hadoop的mapreduce输出要导入到hbase表,最好先输出HFile格式,再导入hbase,因为HFile是hbase的内部存储格式,所以导入效 ...

随机推荐

点击查看大图Activity
1.使用方式 Intent intent = new Intent(FriendCircleActivity.this, ImageGralleryPagerActivity.class);//0,索 ...
winform label去背景
以pictureBox上面显示一个不需要背景的label为例: 1.保证label的父控件是该pictureBox: 2.label的color属性为transParent:
【转】给大家分享一下目前mlc颗粒的内存卡资料
以下信息是LZ从其它论坛上找到的TF卡也是有讲究的,一分价钱一分货 dboy99 楼主骚(6) #1楼 2015-8-5 14:49引用Micro SD卡也叫TF卡,作为手机扩展存储空间的唯一方式用 ...
【EasyNetQ】- 控制队列名称
在为队列生成名称时,EasyNetQ的默认行为是使用消息类型名称并将其附加到订阅ID.例如PartyInvitation,命名空间中的消息类型EasyNetQ.Tests.Integration将使用 ...
js 给某个div增加class 样式（三种方式）
第一种: el.setAttribute('class','abc'); <!DOCTYPE HTML> <HTML> <HEAD> <meta c ...
larbin之哈希之谈
由于工作原因,打算对larbin的源码进行分析一番用的是2.6.3版本的larbin源码,由于这是业余,会断断续续的分析上传,已做记录笔记今天我们分析一下larbin的哈希表这个哈希表结构比较简 ...
关于如何利用原生js动态给一个空对象添加属性以及属性值
首先,回忆一下,访问对象属性一共有两种方法:点获取法和方括号获取法.而我们最常用的就是点获取法了.但是当我们遇到需要给对象动态添加属性和属性值时,点获取法好像就不太好用了,尤其是我们不知道属性名的时候 ...
并发(二)CyclicBarrier
CyclicBarrier 循环屏障,用于一组固定数目的线程互相等待.使用场景如下: 主任务有一组串行的执行节点,每个节点之间有一批任务,固定数量的线程执行这些任务,执行完成后,在节点完成集合后,再继 ...
java实现数据库连接的工具类
第一种 (带事务) package com.china.util; import java.sql.Connection; import java.sql.DriverManager; import ...
Android开发工具常用快捷键大全
Android开发中常用的开发工具有android studio和eclipse两种,下面小编整理了一些这两种开发工具中常用的快捷键,使用这些快捷键,你的android编程将事半功倍. android ...

使用MapReduce将HDFS数据导入到HBase（三）

使用MapReduce将HDFS数据导入到HBase（三）的更多相关文章

随机推荐

热门专题