读取HDFS上文件数据

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringWriter;
import java.net.URI;
import java.util.ArrayList;
import java.util.List; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.util.Progressable;
/**
* @author 作者 E-mail:
* @version 创建时间:2016年3月8日 上午9:37:49 类说明
* 读取hdfs文件数据
*/
public class ReadHDFSDatas { static Configuration conf = new Configuration();
/**
*
*
* @param location
* @param conf
* @return
* @throws Exception
*/
public static List<String> readLines( Path location, Configuration conf )
throws Exception {
// StringBuffer sb = new StringBuffer();
FileSystem fileSystem = FileSystem.get( location.toUri(), conf );
CompressionCodecFactory factory = new CompressionCodecFactory( conf );
FileStatus[] items = fileSystem.listStatus( location );
if ( items == null )
return new ArrayList<String>();
List<String> results = new ArrayList<String>();
for ( FileStatus item : items ) { // ignoring files like _SUCCESS
if ( item.getPath().getName().startsWith( "_" ) ) {
continue;
}
CompressionCodec codec = factory.getCodec( item.getPath() );
InputStream stream = null; if ( codec != null ) {
stream = codec.createInputStream( fileSystem.open( item.getPath() ) );
}
else {
stream = fileSystem.open( item.getPath() );
} StringWriter writer = new StringWriter();
IOUtils.copy( stream, writer, "UTF-8" );
String raw = writer.toString();
// String[] resulting = raw.split( "\n" );
for ( String str : raw.split( "\t" ) ) {
results.add( str );
System.out.println( "start..." + results + "....." );
}
}
return results;
} public String ReadFile( String hdfs )
throws IOException {
StringBuffer sb = new StringBuffer();
FileSystem fs = FileSystem.get( URI.create( hdfs ), conf );
FSDataInputStream hdfsInStream = fs.open( new Path( hdfs ) );
try {
fs = FileSystem.get( conf );
hdfsInStream = fs.open( new Path( hdfs ) );
byte[] b = new byte[10240];
int numBytes = 0;
// Windows os error
while ( ( numBytes = hdfsInStream.read( b ) ) > 0 ) {
numBytes = hdfsInStream.read( b ); } }
catch ( IOException e ) { e.printStackTrace();
}
hdfsInStream.close();
fs.close();
return sb.toString();
} /**
*
* @param filePath
* @return
* @throws IOException
*/
public static String getFile( String filePath ) throws IOException {
String line = "";
try {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get( URI.create( filePath ), conf );
Path pathq = new Path( filePath );
FSDataInputStream fsr = fs.open( pathq ); while ( line != null ) {
line = fsr.readLine();
if ( line != null ) {
System.out.println( line );
}
} }
catch ( Exception e ) {
e.printStackTrace();
}
return line;
} /*
*
*/
public static List<String> getDatas( String filePath ) {
List<String> list = new ArrayList<String>(); try {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get( URI.create( filePath ), conf );
Path pathq = new Path( filePath );
FSDataInputStream fsr = fs.open( pathq );
String line ="";
while ( line != null ) {
line = fsr.readLine();
if ( line != null ) { list.add( line );
}
}
}
catch ( Exception e ) {
e.printStackTrace();
}
return list;
}
public static void main( String[] args ){
//String hdfs = "hdfs://node4:9000/hive/warehouse/u_data/u.data";
//String hdfs = "/datas/t1";
String hdfs = "/datas/u.data";
Path path = new Path( hdfs );
// String hdfs = "/datas";
// String hdfs = "/hive/warehouse/u_data/u.data";
// getFile(hdfs);
/**
* userid INT,
movieid INT,
rating INT,
weekday INT) */
List<String> listDatas = getDatas(hdfs);
for (int i = 0; i < listDatas.size(); i++){
String[] split = listDatas.get(i).split("\t");
String userid = split[0];
String movieid = split[1];
String rating = split[2];
String weekday = split[3];
String makeRowKey = RegionSeverSplit.makeRowKey(userid); 
         // 用put API实现批量入库
//System.out.println("userid--"+ userid + ".."+ "movieid--"+ movieid + ".." +"rating--"+ rating + ".."+"weekday--"+ weekday + "....");
HBaseUtils.addRows("t1", makeRowKey, "f1", "weekday-rating", (movieid+"-"+rating+"-"+weekday).getBytes());
}
System.out.println("success......");
}
}

HBase 随机生成rowkey 前置处理

import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException; import org.apache.commons.codec.binary.Hex; public class RegionSeverSplit { public static String makeRowKey(String id){
String md5_content = null;
try {
MessageDigest messageDigest = MessageDigest.getInstance("MD5");
messageDigest.reset();
messageDigest.update(id.getBytes());
byte[] bytes = messageDigest.digest();
md5_content = new String(Hex.encodeHex(bytes));
} catch (NoSuchAlgorithmException e1) {
e1.printStackTrace();
}
//turn right md5
String right_md5_id = Integer.toHexString(Integer.parseInt(md5_content.substring(0,7),16)>>1);
while(right_md5_id.length()<7){
right_md5_id = "0" + right_md5_id;
}
return right_md5_id + "-" + id;
}
public static void main(String[] args){
String rowky = makeRowKey("asdfasdf");
System.out.println(rowky);
}
}

HBase Util工具类,用put方式批量或者单条数据入库

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.mapreduce.InputSplit; import cn.tansun.bd.hdfs.ReadHDFSDatas; /**
*
* @author root
*
*/ public class HBaseUtils {
private static HBaseAdmin hadmin = null;
private static Configuration conf;
private static HTable htable = null; static {
conf = new Configuration();
String filePath = "hbase-site.xml";
Path path = new Path(filePath);
conf.addResource(path);
conf = HBaseConfiguration.create(conf);
} /**
* insert one row
*
* @param tableName
* @param rowkey
* @param columnFinaly
* @param columnName
* @param values
* @return
*/
public static boolean addRow(String tableName, String rowkey,
String columnFinaly, String columnName, byte[] values) {
boolean flag = true;
if (tableName != null) {
HTablePool hTpool = new HTablePool(conf, 1000);
HTableInterface table = hTpool.getTable(tableName);
Put put = new Put(rowkey.getBytes());
put.addColumn(columnFinaly.getBytes(), columnName.getBytes(),
values);
try {
table.put(put);
System.out.print("addRow success..." + "tableName....."
+ tableName);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
System.out.println(" please select tableName");
} return flag;
} public static void main(String[] args) {
/*String makeRowKey = RegionSeverSplit.makeRowKey("adcdfef");
String tableName = "student";
String columnfianly = "info";
String columnName = "name";
String values = "zhangsan";
addRow(tableName, makeRowKey, columnfianly, columnName,
values.getBytes());*/
ReadHDFSDatas readh = new ReadHDFSDatas();
String hdfs = "/datas/u.data";
List<String> getDatas = readh.getDatas(hdfs);
for (int i = 0; i < getDatas.size(); i++){
if (i < 100){
System.out.println(getDatas.get(i));
}
}
} /**
* put many rows
*
* @param tableName
* @param rowkey
* @param columnFinaly
* @param columnName
* @param values
* @return
*/
public static List<Put> addRows(String tableName, String rowkey,
String columnFinaly, String columnName, byte[] values) {
List<Put> lists = null;
long start = System.currentTimeMillis();
if (tableName != null || rowkey != null) {
HTablePool hTablePool = new HTablePool(conf, 1000);
HTableInterface table = hTablePool.getTable(tableName);
try {
table.setAutoFlush(false);
table.setWriteBufferSize(1024 * 1024 * 1);
lists = new ArrayList<Put>();
Random random = new Random();
byte[] buffers = new byte[256];
int count = 100;
for (int i = 0; i < count; i++){
Put put = new Put(rowkey.getBytes());
random.nextBytes(buffers);
put.add(columnFinaly.getBytes(), columnName.toString().getBytes(), values);
put.getDurability();
//table.setAutoFlush(false);
if ( i % 100 == 0){ lists.add(put);
try {
table.batch(lists);
} catch (InterruptedException e) {
System.out.println("error......");
e.printStackTrace();
}
table.put(lists);
lists.clear();
table.flushCommits();
}
}
} catch (IOException e) { e.printStackTrace();
} } else {
System.out.println("..tableName not null");
}
long end = System.currentTimeMillis();
long times = end - start;
System.out.println(times * 1.0 / 1000 +"..... finsh........" );
return lists;
} /**
* read datas by fileName
* @param fileName
* @return
*/
public List<String> getFileDatas(String fileName){ return null;
} /**
* read hdfs datas by fileName
* @param fileName
* @return
*/
public static List<String> getHdfsDatas(String fileName){ /* List<String> getDatas = ReadHDFSDatas.getDatas(fileName);
for (int i = 0; i < getDatas.size(); i++){
if (i < 100){
System.out.println(getDatas.get(i));
}
}
return getDatas;*/
return null;
}
/**
*
* @param startKey
* @param endKey
* @return
*/
public List<InputSplit> getSplits(byte[] startKey, byte[] endKey) {
return null;
}
}

HDFS 工具类的更多相关文章

  1. flink---实时项目--day02-----1. 解析参数工具类 2. Flink工具类封装 3. 日志采集架构图 4. 测流输出 5. 将kafka中数据写入HDFS 6 KafkaProducer的使用 7 练习

    1. 解析参数工具类(ParameterTool) 该类提供了从不同数据源读取和解析程序参数的简单实用方法,其解析args时,只能支持单只参数. 用来解析main方法传入参数的工具类 public c ...

  2. hadoop的dfs工具类一个【原创】

    开始没搞定插件问题,就弄了个dsf操作类,后面搞定了插件问题,这玩意也就聊胜于无了,还是丢这里算了. 首先是一个配置,ztool.hadoop.properties hadoop.home.dir=G ...

  3. Hbase javaAPI(工具类)表的增删改查

    建立连接: package Init; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.*; i ...

  4. Java基础Map接口+Collections工具类

    1.Map中我们主要讲两个接口 HashMap  与   LinkedHashMap (1)其中LinkedHashMap是有序的  怎么存怎么取出来 我们讲一下Map的增删改查功能: /* * Ma ...

  5. Android—关于自定义对话框的工具类

    开发中有很多地方会用到自定义对话框,为了避免不必要的城府代码,在此总结出一个工具类. 弹出对话框的地方很多,但是都大同小异,不同无非就是提示内容或者图片不同,下面这个类是将提示内容和图片放到了自定义函 ...

  6. [转]Java常用工具类集合

    转自:http://blog.csdn.net/justdb/article/details/8653166 数据库连接工具类——仅仅获得连接对象 ConnDB.java package com.ut ...

  7. js常用工具类.

    一些js的工具类 复制代码 /** * Created by sevennight on 15-1-31. * js常用工具类 */ /** * 方法作用:[格式化时间] * 使用方法 * 示例: * ...

  8. Guava库介绍之实用工具类

    作者:Jack47 转载请保留作者和原文出处 欢迎关注我的微信公众账号程序员杰克,两边的文章会同步,也可以添加我的RSS订阅源. 本文是我写的Google开源的Java编程库Guava系列之一,主要介 ...

  9. Java程序员的日常—— Arrays工具类的使用

    这个类在日常的开发中,还是非常常用的.今天就总结一下Arrays工具类的常用方法.最常用的就是asList,sort,toStream,equals,copyOf了.另外可以深入学习下Arrays的排 ...

随机推荐

  1. linux上执行jmeter脚本

    1.linux上安装jmeter 将windows上的zip包直接放到linux上 进入bin目录,chmod 777 jmeter 修改环境变量: 1 2 3 4 # vim /etc/profil ...

  2. mpvue 微信小程序半屏弹框(half-screen-dialog)

    <template> <div> <a @click="isShow">half-screen-dialog</a> <!-- ...

  3. Tomcat-部署多个项目(不同端口)

    20190713  整理 参考文档 https://blog.csdn.net/chenchunlin526/article/details/78799772 如何在Tomcat服务中,为不同端口部署 ...

  4. foreach与正常for循环效率对比

    foreach foreach编译成字节码之后,使用的是迭代器实现的. foreach特点: 无须获取容器大小 需要创建额外的迭代器变量 遍历期间得到的是对象,没有索引位置信息,因此不能进行赋值操作. ...

  5. 02 | 日志系统:一条SQL更新语句是如何执行的? 学习记录

    <MySQL实战45讲>02 | 日志系统:一条SQL更新语句是如何执行的? 学习记录http://naotu.baidu.com/file/ad320c7a0e031c2d6db7b5a ...

  6. 【LeetCode】抽样 sampling(共4题)

    第一部分 水塘抽样 reservoir sampling 水塘抽样的原理:(应该开一篇新文章)pssss [382]Linked List Random Node (2018年11月15日,新算法) ...

  7. PyQt5界面上调用subprocess.Popen会闪命令窗口的问题

    最近再做一个界面开发,主要实现的点击一个按钮,会执行adb安装应用程序的功能,在调试阶段一切都正常,但打包成一个exe安装程序,安装之后运行,点击按钮会闪一下adb的命令窗口 先列出subproces ...

  8. hdu 4619 Warm up 2 (二分匹配)

    题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=4619 题意: 平面上有一些1×2的骨牌,每张骨牌要么水平放置,要么竖直放置,并且保证同方向放置的骨牌不 ...

  9. note4

  10. iOS 常用代码之 UICollectionView

    记一下 不用每次都从0开始写,人生苦短 ,省点时间给自己 之前必须完成相关注册: . cell . 头部和尾部 [self.hotAndHistoryCollectionV registerNib:[ ...