HDFS 工具类

读取HDFS上文件数据

import java.io.File;

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.OutputStream;

import java.io.StringWriter;

import java.net.URI;

import java.util.ArrayList;

import java.util.List;

import org.apache.commons.io.IOUtils;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FSDataInputStream;

import org.apache.hadoop.fs.FSDataOutputStream;

import org.apache.hadoop.fs.FileStatus;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.compress.CompressionCodec;

import org.apache.hadoop.io.compress.CompressionCodecFactory;

import org.apache.hadoop.util.Progressable;

/**

 * @author 作者 E-mail:

 * @version 创建时间：2016年3月8日 上午9:37:49 类说明

 * 读取hdfs文件数据

 */

public class ReadHDFSDatas {

    static Configuration conf = new Configuration();

    /**

     *

     *

     * @param location

     * @param conf

     * @return

     * @throws Exception

     */

    public static List<String> readLines( Path location, Configuration conf )

        throws Exception {

        // StringBuffer sb = new StringBuffer();

        FileSystem fileSystem = FileSystem.get( location.toUri(), conf );

        CompressionCodecFactory factory = new CompressionCodecFactory( conf );

        FileStatus[] items = fileSystem.listStatus( location );

        if ( items == null )

            return new ArrayList<String>();

        List<String> results = new ArrayList<String>();

        for ( FileStatus item : items ) {

            // ignoring files like _SUCCESS

            if ( item.getPath().getName().startsWith( "_" ) ) {

                continue;

            }

            CompressionCodec codec = factory.getCodec( item.getPath() );

            InputStream stream = null;

            if ( codec != null ) {

                stream = codec.createInputStream( fileSystem.open( item.getPath() ) );

            }

            else {

                stream = fileSystem.open( item.getPath() );

            }

            StringWriter writer = new StringWriter();

            IOUtils.copy( stream, writer, "UTF-8" );

            String raw = writer.toString();

            // String[] resulting = raw.split( "\n" );

            for ( String str : raw.split( "\t" ) ) {

                results.add( str );

                System.out.println( "start..." + results + "....." );

            }

        }

        return results;

    }

    public String ReadFile( String hdfs )

        throws IOException {

        StringBuffer sb = new StringBuffer();

        FileSystem fs = FileSystem.get( URI.create( hdfs ), conf );

        FSDataInputStream hdfsInStream = fs.open( new Path( hdfs ) );

        try {

            fs = FileSystem.get( conf );

            hdfsInStream = fs.open( new Path( hdfs ) );

            byte[] b = new byte[10240];

            int numBytes = 0;

            // Windows os error

            while ( ( numBytes = hdfsInStream.read( b ) ) > 0 ) {

                numBytes = hdfsInStream.read( b );

            }

        }

        catch ( IOException e ) {

            e.printStackTrace();

        }

        hdfsInStream.close();

        fs.close();

        return sb.toString();

    }

    /**

     *

     * @param filePath

     * @return

     * @throws IOException

     */

    public static String getFile( String filePath ) throws IOException {

        String line = "";

        try {

            Configuration conf = new Configuration();

            FileSystem fs = FileSystem.get( URI.create( filePath ), conf );

            Path pathq = new Path( filePath );

            FSDataInputStream fsr = fs.open( pathq );

            while ( line != null ) {

                line = fsr.readLine();

                if ( line != null ) {

                    System.out.println( line );

                }

            }

        }

        catch ( Exception e ) {

            e.printStackTrace();

        }

        return line;

    }

    /*

     *

     */

    public static List<String> getDatas( String filePath )  {

       List<String> list = new ArrayList<String>();

        try {

            Configuration conf = new Configuration();

            FileSystem fs = FileSystem.get( URI.create( filePath ), conf );

            Path pathq = new Path( filePath );

            FSDataInputStream fsr = fs.open( pathq );

            String line ="";

            while ( line != null ) {

                line = fsr.readLine();

                if ( line != null ) {

                    list.add( line );

                }

            }

        }

        catch ( Exception e ) {

            e.printStackTrace();

        }

        return list;

    }

    public static void main( String[] args ){

        //String hdfs = "hdfs://node4:9000/hive/warehouse/u_data/u.data";

        //String  hdfs = "/datas/t1";

        String  hdfs = "/datas/u.data";

        Path path = new Path( hdfs );

        // String hdfs = "/datas";

        // String hdfs = "/hive/warehouse/u_data/u.data";

      //  getFile(hdfs);

        /**

         * userid INT,

        movieid INT,

        rating INT,

        weekday INT)

         */

        List<String> listDatas = getDatas(hdfs);

        for (int i = 0; i < listDatas.size(); i++){

                String[] split = listDatas.get(i).split("\t");

                String userid = split[0];

                String movieid = split[1];

                String rating = split[2];

                String weekday = split[3];

                String makeRowKey = RegionSeverSplit.makeRowKey(userid);　
　　　　　　　　　// 用put API实现批量入库

                //System.out.println("userid--"+ userid + ".."+ "movieid--"+ movieid + ".." +"rating--"+ rating + ".."+"weekday--"+ weekday + "....");

                HBaseUtils.addRows("t1", makeRowKey, "f1", "weekday-rating", (movieid+"-"+rating+"-"+weekday).getBytes());

        }

        System.out.println("success......");

    }

}

HBase 随机生成rowkey 前置处理

import java.security.MessageDigest;

import java.security.NoSuchAlgorithmException;

import org.apache.commons.codec.binary.Hex;

public class RegionSeverSplit {

    public  static String makeRowKey(String id){

         String md5_content = null;

            try {

                MessageDigest messageDigest = MessageDigest.getInstance("MD5");

                messageDigest.reset();

                messageDigest.update(id.getBytes());

                byte[] bytes = messageDigest.digest();

                md5_content = new String(Hex.encodeHex(bytes));

            } catch (NoSuchAlgorithmException e1) {

                e1.printStackTrace();

            }

            //turn right md5

            String right_md5_id = Integer.toHexString(Integer.parseInt(md5_content.substring(0,7),16)>>1);

            while(right_md5_id.length()<7){

                right_md5_id = "0" + right_md5_id;

            }

            return right_md5_id + "-" + id;

    }

    public static void main(String[] args){

        String rowky = makeRowKey("asdfasdf");

        System.out.println(rowky);

    }

}

HBase Util工具类，用put方式批量或者单条数据入库

import java.io.IOException;

import java.util.ArrayList;

import java.util.List;

import java.util.Random;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.client.HBaseAdmin;

import org.apache.hadoop.hbase.client.HTable;

import org.apache.hadoop.hbase.client.HTableInterface;

import org.apache.hadoop.hbase.client.HTablePool;

import org.apache.hadoop.hbase.client.Put;

import org.apache.hadoop.mapreduce.InputSplit;

import cn.tansun.bd.hdfs.ReadHDFSDatas;

/**

 *

 * @author root

 *

 */

public class HBaseUtils {

    private static HBaseAdmin hadmin = null;

    private static Configuration conf;

    private static HTable htable = null;

    static {

        conf = new Configuration();

        String filePath = "hbase-site.xml";

        Path path = new Path(filePath);

        conf.addResource(path);

        conf = HBaseConfiguration.create(conf);

    }

    /**

     * insert one row

     *

     * @param tableName

     * @param rowkey

     * @param columnFinaly

     * @param columnName

     * @param values

     * @return

     */

    public static boolean addRow(String tableName, String rowkey,

            String columnFinaly, String columnName, byte[] values) {

        boolean flag = true;

        if (tableName != null) {

            HTablePool hTpool = new HTablePool(conf, 1000);

            HTableInterface table = hTpool.getTable(tableName);

            Put put = new Put(rowkey.getBytes());

            put.addColumn(columnFinaly.getBytes(), columnName.getBytes(),

                    values);

            try {

                table.put(put);

                System.out.print("addRow success..." + "tableName....."

                        + tableName);

            } catch (IOException e) {

                // TODO Auto-generated catch block

                e.printStackTrace();

            }

        } else {

            System.out.println("  please select tableName");

        }

        return flag;

    }

    public static void main(String[] args) {

        /*String makeRowKey = RegionSeverSplit.makeRowKey("adcdfef");

        String tableName = "student";

        String columnfianly = "info";

        String columnName = "name";

        String values = "zhangsan";

        addRow(tableName, makeRowKey, columnfianly, columnName,

                values.getBytes());*/

        ReadHDFSDatas readh = new ReadHDFSDatas();

        String hdfs = "/datas/u.data";

        List<String> getDatas = readh.getDatas(hdfs);

        for (int i = 0; i < getDatas.size(); i++){

            if (i < 100){

                System.out.println(getDatas.get(i));

            }

        }

    }

    /**

     * put many rows

     *

     * @param tableName

     * @param rowkey

     * @param columnFinaly

     * @param columnName

     * @param values

     * @return

     */

    public static List<Put> addRows(String tableName, String rowkey,

            String columnFinaly, String columnName, byte[] values) {

        List<Put> lists  = null;

        long start = System.currentTimeMillis();

        if (tableName != null || rowkey != null) {

            HTablePool hTablePool = new HTablePool(conf, 1000);

            HTableInterface table = hTablePool.getTable(tableName);

            try {

                table.setAutoFlush(false);

                table.setWriteBufferSize(1024 * 1024 * 1);

                lists = new ArrayList<Put>();

                Random random = new Random();

                byte[] buffers = new byte[256];

                int count = 100;

                for (int i = 0; i < count; i++){

                    Put put = new Put(rowkey.getBytes());

                    random.nextBytes(buffers);

                    put.add(columnFinaly.getBytes(), columnName.toString().getBytes(), values);

                    put.getDurability();

                    //table.setAutoFlush(false);

                    if ( i % 100 == 0){

                        lists.add(put);

                        try {

                            table.batch(lists);

                        } catch (InterruptedException e) {

                            System.out.println("error......");

                            e.printStackTrace();

                        }

                        table.put(lists);

                        lists.clear();

                        table.flushCommits();

                    }

                }

            } catch (IOException e) {

                e.printStackTrace();

            }

        } else {

            System.out.println("..tableName  not null");

        }

        long end = System.currentTimeMillis();

        long times = end - start;

        System.out.println(times * 1.0 / 1000 +"..... finsh........"  );

        return lists;

    }

    /**

     * read datas by fileName

     * @param fileName

     * @return

     */

    public List<String> getFileDatas(String fileName){

        return null;

    } 

    /**

     * read hdfs datas by fileName

     * @param fileName

     * @return

     */

    public static List<String> getHdfsDatas(String fileName){

    /*    List<String> getDatas = ReadHDFSDatas.getDatas(fileName);

        for (int i = 0; i < getDatas.size(); i++){

            if (i < 100){

                System.out.println(getDatas.get(i));

            }

        }

        return getDatas;*/

        return null;

    }

    /**

     *

     * @param startKey

     * @param endKey

     * @return

     */

    public List<InputSplit> getSplits(byte[] startKey, byte[] endKey) {

        return null;

    }

}

HDFS 工具类的更多相关文章

flink---实时项目--day02-----1. 解析参数工具类 2. Flink工具类封装 3. 日志采集架构图 4. 测流输出 5. 将kafka中数据写入HDFS 6 KafkaProducer的使用 7 练习
1. 解析参数工具类(ParameterTool) 该类提供了从不同数据源读取和解析程序参数的简单实用方法,其解析args时,只能支持单只参数. 用来解析main方法传入参数的工具类 public c ...
hadoop的dfs工具类一个【原创】
开始没搞定插件问题,就弄了个dsf操作类,后面搞定了插件问题,这玩意也就聊胜于无了,还是丢这里算了. 首先是一个配置,ztool.hadoop.properties hadoop.home.dir=G ...
Hbase javaAPI（工具类）表的增删改查
建立连接: package Init; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.*; i ...
Java基础Map接口+Collections工具类
1.Map中我们主要讲两个接口 HashMap 与 LinkedHashMap (1)其中LinkedHashMap是有序的怎么存怎么取出来我们讲一下Map的增删改查功能: /* * Ma ...
Android—关于自定义对话框的工具类
开发中有很多地方会用到自定义对话框,为了避免不必要的城府代码,在此总结出一个工具类. 弹出对话框的地方很多,但是都大同小异,不同无非就是提示内容或者图片不同,下面这个类是将提示内容和图片放到了自定义函 ...
[转]Java常用工具类集合
转自:http://blog.csdn.net/justdb/article/details/8653166 数据库连接工具类——仅仅获得连接对象 ConnDB.java package com.ut ...
js常用工具类.
一些js的工具类复制代码 /** * Created by sevennight on 15-1-31. * js常用工具类 */ /** * 方法作用:[格式化时间] * 使用方法 * 示例: * ...
Guava库介绍之实用工具类
作者:Jack47 转载请保留作者和原文出处欢迎关注我的微信公众账号程序员杰克,两边的文章会同步,也可以添加我的RSS订阅源. 本文是我写的Google开源的Java编程库Guava系列之一,主要介 ...
Java程序员的日常—— Arrays工具类的使用
这个类在日常的开发中,还是非常常用的.今天就总结一下Arrays工具类的常用方法.最常用的就是asList,sort,toStream,equals,copyOf了.另外可以深入学习下Arrays的排 ...

随机推荐

67.Task Scheduler（任务规划）
Level: Medium 题目描述: Given a char array representing tasks CPU need to do. It contains capital letter ...
66.Subarray Sum Equals K（子数组和为K的个数）
Level: Medium 题目描述: Given an array of integers and an integer k, you need to find the total number ...
node-sass 安装失败解决方法
使用淘宝镜像源 npm config set sass_binary_site https://npm.taobao.org/mirrors/node-sass/ npm install node-s ...
CSS 针对谷歌浏览器(Chrome) safari的webkit核心浏览器CSS hack
@media screen and (-webkit-min-device-pixel-ratio:0) { ul#navUL ul a{padding:8px 2px;word-break:keep ...
This program cannot be run in DOS mode.
问题:通过ftp上传的exe执行时提示“This program cannot be run in DOS mode.” 解决方法:检查ftp传输模式,设置成binary模式上传即可参考:https ...
[SCOI2010]股票交易（单调队列优化dp）
[SCOI2010]股票交易题目描述最近lxhgww又迷上了投资股票,通过一段时间的观察和学习,他总结出了股票行情的一些规律. 通过一段时间的观察,lxhgww预测到了未来T天内某只股票的走势,第 ...
PLC 控制系统资源
之前整理的PC高级语言与PLC通讯代码下载链接:三菱:http://blog.sina.com.cn/s/blog_16d7d3ecb0102x6wj.html倍福:http://bbs.elecfa ...
牛客网NOIP赛前集训营-提高组（第七场）A-中国式家长 2
题目描述有一天,牛牛找到了一个叫<中国式家长>的游戏,游戏中需要靠"挖脑洞"来提升悟性. 挖脑洞在一个\(N\)行\(M\)列的地图上进行,一开始牛牛有\(K\)点行 ...
前端每日实战：56# 视频演示如何用纯 CSS 描述程序员的生活
效果预览按下右侧的"点击预览"按钮可以在当前页面预览,点击链接可以全屏预览. https://codepen.io/comehope/pen/YvYVvY 可交互视频此视频是可 ...
Linux进程前后台管理（&，fg, bg）
将进程置于后台 xlogo & 会把进程置于后台管理,使用ps命令查看进程 PID. 使用命令jobs [1]+ Running xlogo & 可以看到正在运行的 xlogo 进程. ...

HDFS 工具类

HDFS 工具类的更多相关文章

随机推荐

热门专题