读取hive文件并将数据导入hbase
package cn.tansun.bd.hbase; import java.io.IOException; import java.net.URI; import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import cn.tansun.bd.utils.JDBCUtils; /** * @author 作者 E-mail: zgl * @version 创建时间:2016年7月5日 下午7:57:17 类说明 */ public class HiveMySQl2HBaseMR extends Configured implements Tool { public static String tableName; public static String cf = null; public static String strRowkey = null; public static String strIndex = null; public static String column_name = null; public static String strColumn = null; private static Configuration conf = null; public static void main(String[] args) { getDatas(); try { int runs = ToolRunner.run(new HiveMySQl2HBaseMR(), args); System.exit(runs); } catch (Exception e) { e.printStackTrace(); } } // mysql读取获得cf、rowKey、cloumn, qual @SuppressWarnings("rawtypes") public static List<Map> getDatas() { // List<Map> listDatas = new ArrayList<Map>(); String sql = "SELECT DISTINCT s.tableName, ar.rowkey,af.column_family, aq.column_hive_index, aq.column_name FROM " + " archive_htable s, archive_hrowkey ar, archive_hfamily af, archive_hqualifier aq WHERE " + " s.rowkey_id = ar.rowkey_id AND ar.family_id = af.family_id AND s.tableName = '2'"; List<Map> selectDatas = JDBCUtils.selectDatas(sql); for (Map<String, String> metaData : selectDatas) { if (null == tableName) { tableName = metaData.get("tableName"); } if (null == cf) { cf = metaData.get("column_family"); } if (null == strRowkey) { strRowkey = metaData.get("rowkey"); } String strTempIndex = metaData.get("column_hive_index"); String strTempName = metaData.get("column_name"); if (null == strColumn || (null != strColumn && "".equals(strColumn))) { strColumn = strTempIndex + " " + strTempName; } else { strColumn = strColumn + "," + strTempIndex + " " + strTempName; } } return selectDatas; } @SuppressWarnings("deprecation") public int run(String[] args) throws Exception { /* * if (args.length != 3){ System.err.println( * "Usage: HiveMySQl2HBaseMR <table_name><data_input_path><hfile_output_path>" * ); System.exit( -1 ); } */ conf = new Configuration(); conf.addResource("hbase-site.xml"); String table = "2"; String input = "hdfs://node11:9000/datas/hivedata5"; String output = "hdfs://node11:9000/datas/out1"; HTable htable; try { // 运行前,删除已存在的中间输出目录 try { FileSystem fs = FileSystem.get(URI.create(output), conf); fs.delete(new Path(output), true); fs.close(); } catch (IOException e1) { e1.printStackTrace(); } htable = new HTable(conf, table.getBytes()); Job job = new Job(conf); job.setJobName("Generate HFile"); job.setJarByClass(HiveMySQl2HBaseMR.class); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(HiveMySQlMapper.class); FileInputFormat.setInputPaths(job, input); job.getConfiguration().set("mapred.mapoutput.key.class", "org.apache.hadoop.hbase.io.ImmutableBytesWritable"); job.getConfiguration().set("mapred.mapoutput.value.class", "org.apache.hadoop.hbase.KeyValue"); FileOutputFormat.setOutputPath(job, new Path(output)); HFileOutputFormat2.configureIncrementalLoad(job, htable); try { job.waitForCompletion(true); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } catch (IOException e) { e.printStackTrace(); } return 0; } public static class HiveMySQlMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue> { @Override protected void setup( Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue>.Context context) throws IOException, InterruptedException { super.setup( context ); conf = new Configuration(); } String tableName = HiveMySQl2HBaseMR.tableName; String cf = HiveMySQl2HBaseMR.cf; String rowKey = HiveMySQl2HBaseMR.strRowkey; String strColumnName = HiveMySQl2HBaseMR.column_name; String strColumn = HiveMySQl2HBaseMR.strColumn; String split = "001"; @Override protected void map( LongWritable key, Text value, Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue>.Context context) throws IOException, InterruptedException { // 将rowkey 是数字并且带有","的分隔符去掉,获得对应的数字 // get rkMaps indexa String strRowKey = ""; String[] datas = value.toString().split("\t"); for (String strIndex : rowKey.split(",")) { if (null == (strRowKey) || (null != strRowKey) && "".equals(strRowKey)) { strRowKey = datas[Integer.valueOf(strIndex)]; } else { strRowKey = strRowKey + split + datas[Integer.valueOf(strIndex)]; } } for (String str : strColumn.split(",")) { String[] columnTupe = str.split("\t"); String columnData = datas[Integer.valueOf(columnTupe[0])]; String columnName = columnTupe[1]; System.out.println(columnData + "columnDatacolumnData"); ImmutableBytesWritable rk = new ImmutableBytesWritable( Bytes.toBytes(rowKey)); // byte[] row, byte[] family, byte[] qualifier, byte[] value KeyValue kv = new KeyValue(Bytes.toBytes(strRowKey), // "a\001b\001\c\001" cf.getBytes(), Bytes.toBytes(columnName), Bytes.toBytes(columnData)); context.write(rk, kv); } } } }
JDBCUtils类:
package cn.tansun.bd.utils; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Properties; import java.util.Set; /** * @author 作者 E-mail: zgl * @version 创建时间:2016年6月23日 下午4:25:03 类说明 */ public class JDBCUtils { public JDBCUtils() { } public static String PATH = "jdbc.properties"; public static Properties prop; public static String url = null; public static String username = null; public static String password = null; public static Connection conn; public static Statement stmt; public static ResultSet rs; public static String fileName = null; static { try { InputStream inputStream = JDBCUtils.class.getClassLoader().getResourceAsStream( PATH ); prop = new Properties(); prop.load( inputStream ); url = prop.getProperty( "jdbc.url" ); username = prop.getProperty( "jdbc.username" ); password = prop.getProperty( "jdbc.password" ); if ( inputStream != null ) { inputStream.close(); } } catch ( IOException e ) { e.printStackTrace(); } } public static void closeConnection( Connection conn ) { if ( conn != null ) { try { conn.close(); } catch ( SQLException e ) { e.printStackTrace(); } } } /** * 根据sql语句查询 * * @param sql * @return */ @SuppressWarnings( "rawtypes" ) public static List<Map> selectDatas( String sql ) { List<Map> listDatas = new ArrayList<Map>(); try { conn = DriverManager.getConnection( url, username, password ); conn.setAutoCommit( false ); stmt = conn.prepareStatement( "load data local infile '' " + "into table loadtest fields terminated by ','" ); StringBuilder sb = new StringBuilder(); InputStream is = new ByteArrayInputStream( sb.toString().getBytes() ); ( (com.mysql.jdbc.Statement) stmt ).setLocalInfileInputStream( is ); rs = stmt.executeQuery( sql ); if ( rs != null ) { ResultSetMetaData metaData = rs.getMetaData(); int count = metaData.getColumnCount(); Map<String, Object> map = null; while ( rs.next() ) { map = new HashMap<String, Object>(); for ( int i = 1; i < count + 1; i++ ) { map.put( metaData.getColumnName( i ), rs.getObject( i ) ); } listDatas.add( map ); } } } catch ( SQLException e ) { e.printStackTrace(); } return listDatas; } /** * * @param sql * @return */ public static List<String> getStrMap( String sql) { List<String> strList = new ArrayList<String>(); try { conn = DriverManager.getConnection( url, username, password ); conn.setAutoCommit( false ); stmt = conn.prepareStatement( "load data local infile '' " + "into table loadtest fields terminated by ','" ); StringBuilder sb = new StringBuilder(); InputStream is = new ByteArrayInputStream( sb.toString().getBytes() ); ( (com.mysql.jdbc.Statement) stmt ).setLocalInfileInputStream( is ); rs = stmt.executeQuery( sql ); if ( rs != null ) { ResultSetMetaData metaData = rs.getMetaData(); int count = metaData.getColumnCount(); while (rs.next()){ for (int i = 1; i < count + 1; i++){ //String str1 = metaData.getColumnName( i ); String str2 = (String) rs.getObject( i ); strList.add(str2); } } } } catch ( SQLException e ) { e.printStackTrace(); } return strList; } public static String table_name = null; public static String rowkey = null; public static String column_family = null; public static String column_name = null; private static String rows = null; public static String sql = null; public static String sql2 = null; @SuppressWarnings( "rawtypes" ) public static void main( String[] args ) { sql2 = "SELECT GROUP_CONCAT( DISTINCT aq.column_hive_index,' ', aq.column_name ,' ' ORDER BY " + " aq.column_hive_index SEPARATOR ',' ) AS column_names FROM archive_hqualifier aq " + "where aq.table_id = 77 GROUP BY aq.column_name ORDER BY aq.column_hive_index"; sql ="SELECT DISTINCT s.tableName, ar.rowkey, af.column_family, " + "aq.column_name FROM archive_htable s,archive_hrowkey ar,archive_hfamily af," + " archive_hqualifier aq " + "WHERE s .rowkey_id = ar.rowkey_id AND ar.family_id = af.family_id " + "AND af.qualifier_id = aq.qualifier_id;"; String datas = null; List<String> strList = getStrMap(sql); String substring = null; for (int i = 0; i < strList.size(); i++){ datas = strList.get(i); //datas = strList.get(i).substring(0, strList.get(i).length()-1); System.out.print(datas); }
}
}
读取hive文件并将数据导入hbase的更多相关文章
- sqoop将mysql数据导入hbase、hive的常见异常处理
原创不易,如需转载,请注明出处https://www.cnblogs.com/baixianlong/p/10700700.html,否则将追究法律责任!!! 一.需求: 1.将以下这张表(test_ ...
- Sqoop将mysql数据导入hbase的血与泪
Sqoop将mysql数据导入hbase的血与泪(整整搞了大半天) 版权声明:本文为yunshuxueyuan原创文章.如需转载请标明出处: https://my.oschina.net/yunsh ...
- MapReduce将HDFS文本数据导入HBase中
HBase本身提供了很多种数据导入的方式,通常有两种常用方式: 使用HBase提供的TableOutputFormat,原理是通过一个Mapreduce作业将数据导入HBase 另一种方式就是使用HB ...
- [Python]将Excel文件中的数据导入MySQL
Github Link 需求 现有2000+文件夹,每个文件夹下有若干excel文件,现在要将这些excel文件中的数据导入mysql. 每个excel文件的第一行是无效数据. 除了excel文件中已 ...
- 使用sqoop将MySQL数据库中的数据导入Hbase
使用sqoop将MySQL数据库中的数据导入Hbase 前提:安装好 sqoop.hbase. 下载jbdc驱动:mysql-connector-java-5.1.10.jar 将 mysql-con ...
- 小技巧之“将Text文件中的数据导入到Excel中,这里空格为分割符为例”
1.使用场景 将数据以文本导出后,想录入到Excel中,的简便方案, 起因:对于Excel的导出,Text导出明显会更方便些 2.将Text文件中的数据导入到Excel中,这里空格为分割符为例的步骤 ...
- Hive如何加载和导入HBase的数据
当我们用HBase 存储实时数据的时候, 如果要做一些数据分析方面的操作, 就比较困难了, 要写MapReduce Job. Hive 主要是用来做数据分析的数据仓库,支持标准SQL 查询, 做数据分 ...
- Hive数据导入HBase引起数据膨胀引发的思考
最近朋友公司在做一些数据的迁移,主要是将一些Hive处理之后的热数据导入到HBase中,但是遇到了一个很奇怪的问题:同样的数据到了HBase中,所占空间竟增长了好几倍!详谈中,笔者建议朋友至少从几点原 ...
- Hive数据导入Hbase
方案一:Hive关联HBase表方式 适用场景:数据量不大4T以下(走hbase的api导入数据) 一.hbase表不存在的情况 创建hive表hive_hbase_table映射hbase表hbas ...
随机推荐
- python学习二十三天函数的定义
在计算机编程中,函数就是可以重复调用,可以传递参数,减少代码的量,可以高效写出好的代码,提高软件的运行质量,下面简单讲述python函数的定义方式 1,函数的定义 函数的定义用关键词def 函数名跟 ...
- Android应用程序开发中碰到的错误和获得的小经验
1,Installation error: INSTALL_FAILED_INSUFFICIENT_STORAGE Description:这表示手机内存不足,对内存较小的手机经常会出现这样的问题,从 ...
- gulp run 报错 gulp[3192]: src\node_contextify.cc:628: Assertion `args[1]->IsString()' failed.
由于把node升级到了10以上的版本 执行gulp rjs打包文件报错,错误如下: gulp[3192]: src\node_contextify.cc:628: Assertion `args[1] ...
- 2.Web中使用iReport 整合----------创建html格式的
转自:https://wenku.baidu.com/view/104156f9770bf78a65295462.html 1.
- 使用纯php构建一个简单的PHP服务器
使用原生PHP构建一个简单的PHPWeb服务器 1.目录机构 webserver --src -- Response.php -- Server.php -- Request.php -- vendo ...
- 将ibatis迁移到mybatis3的过程记录
将ibatis迁移到mybatis3的过程记录 场景:有些以前的老项目是使用ibatis开发的,现在有转换成mybatis3的需求. 环境准备:需要安装Ant,以下是本人的安装版本,具体怎么安装不再赘 ...
- GO string 的学习
1.replace func Replace(s, old, new string, n int) string s:原来的字符串 old: 要被替换的久的字符串 new string:要替换旧字符串 ...
- 亲测可用的golang sql例程与包管理
sqlite与golang package main import ( "database/sql" "fmt" "time" _ &quo ...
- python3 变量格式化转换成字符串
num=3 str='I am %f years old' % (num) print(str) 输出 I am 3.000000 years old
- Ubuntu12.04安装配置vncserver
安装 sudo apt-get install vnc4server 修改配置文件 sudo vim ~/.vnc/xstartup #!/bin/sh # Uncomment the followi ...