用java api读取HDFS文件

import java.io.IOException;

import java.io.InputStream;

import java.security.PrivilegedExceptionAction;

import java.text.SimpleDateFormat;

import java.util.concurrent.ConcurrentHashMap;

import java.util.concurrent.ConcurrentMap;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileStatus;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.FsStatus;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.security.UserGroupInformation;

import org.springframework.stereotype.Controller;

import org.springframework.web.bind.annotation.RequestMapping;

import org.springframework.web.bind.annotation.RequestMethod;

import org.springframework.web.bind.annotation.ResponseBody;

import com.alibaba.fastjson.JSON;

import com.alibaba.fastjson.JSONObject;

import bean.TableStatistic;

@Controller

@RequestMapping("/dfview")

public class DataFrameViewController extends BaseController {

    private ConcurrentMap<String, UserGroupInformation> cache = new ConcurrentHashMap<String, UserGroupInformation>();

    private ConcurrentMap<String, FileSystem> fileSystemCache = new ConcurrentHashMap<String, FileSystem>();

    private Configuration hadoopConf = new Configuration();

    private static final String HDFS_JSON_NAME = "jsonObj";

    @RequestMapping(value = "/getDFviewOfColumn", method = { RequestMethod.GET })

    @ResponseBody

    public TableStatistic getDFviewOfTable(String tableName)

            throws Exception {

        String user = "bi";

        String dirpath = "/user/cbt/datax/temp_transfer/zzzdes";

        Path homePath = new Path(dirpath);

        FileSystem fs = this.createFileSystem(user);

        FileStatus[] stats = fs.listStatus(homePath);

        StringBuffer txtContent = new StringBuffer();

        for (int i = 0; i < stats.length; ++i) {

            if (stats[i].isFile()) {

                FileStatus file = stats[i];

                if( HDFS_JSON_NAME.equalsIgnoreCase(file.getPath().getName())){

                    InputStream in = fs.open(file.getPath());

                    byte[] b = new byte[1];

                    while (in.read(b) != -1)

                    {

                    // 字符串拼接

                    txtContent.append(new String(b));

                    }

                    in.close();

                    break;

                }

            }

        }

        TableStatistic ts = JSON.parseObject(txtContent.toString(), TableStatistic.class);

        return ts;

    }

    public static void main(String[] args) throws Exception {

        DataFrameViewController aaa = new DataFrameViewController();

        FileSystem fs = aaa.createFileSystem("bi");

        Path homePath = new Path("/user/cbt/datax/temp_transfer/zzzdes");

        System.out.println("***********************************");

        FileStatus[] stats = fs.listStatus(homePath);

        for (int i = 0; i < stats.length; ++i) {

            if (stats[i].isFile()) {

                FileStatus file = stats[i];

                StringBuffer txtContent = new StringBuffer();

                if( "jsonObj".equalsIgnoreCase(file.getPath().getName())){

                    InputStream in = fs.open(file.getPath());

                    byte[] b = new byte[1];

                    while (in.read(b) != -1)

                    {

                    // 字符串拼接

                    txtContent.append(new String(b));

                    }

//                    IOUtils.copyBytes(fs.open(file.getPath()), System.out, 4096,false);

                    in.close();

//                    fs.close();

                }

                System.out.print(txtContent.toString());

                System.out

                        .println("************************************************");

                JSONObject jb = JSON.parseObject(txtContent.toString());

                System.out.println("********!!!!! : "  + jb.get("colUnique"));

                TableStatistic ts = JSON.parseObject(txtContent.toString(), TableStatistic.class);

                System.out.println("********!!!!! : "  + ts.getColUnique().toString());

            } else if (stats[i].isDirectory()) {

                System.out.println(stats[i].getPath().toString());

            } else if (stats[i].isSymlink()) {

                System.out.println("&&&&&&&&" + stats[i].getPath().toString());

            }

        }

        FsStatus fsStatus = fs.getStatus(homePath);

    }

    public FileSystem createFileSystem(String user) throws Exception {

        final Configuration conf = loadHadoopConf();

        conf.set("hadoop.job.ugi", user);

//        conf.set("HADOOP_USER_NAME", user);

        if (fileSystemCache.get(user) != null) {

            return fileSystemCache.get(user);

        }

        UserGroupInformation ugi = getProxyUser(user);

        FileSystem fs = ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {

            public FileSystem run() throws Exception {

                return FileSystem.get(conf);

            }

        });

        fileSystemCache.put(user, fs);

        return fs;

    }

    public static final ThreadLocal<SimpleDateFormat> appDateFormat = new ThreadLocal<SimpleDateFormat>() {

        @Override

        public SimpleDateFormat initialValue() {

            SimpleDateFormat dateformat = new java.text.SimpleDateFormat(

                    "yyyy-MM-dd HH:mm:ss");

            return dateformat;

        }

    };

    private static final String[] HADOOP_CONF_FILES = { "core-site.xml",

            "hdfs-site.xml" };

    private Configuration loadHadoopConf() {

        if (hadoopConf != null) {

            return hadoopConf;

        }

        Configuration conf = new Configuration();

        for (String fileName : HADOOP_CONF_FILES) {

            try {

                InputStream inputStream = DataFrameViewController.class

                        .getClassLoader().getResourceAsStream(fileName);

                conf.addResource(inputStream);

            } catch (Exception ex) {

            }

        }

        return conf;

    }

    public void destroy() {

        for (UserGroupInformation ugi : cache.values()) {

            try {

                FileSystem.closeAllForUGI(ugi);

            } catch (IOException ioe) {

//                 Logger.error("Exception occurred while closing filesystems for "

//                 + ugi.getUserName(), ioe);

            }

        }

        cache.clear();

    }

    private UserGroupInformation getProxyUser(String user) throws IOException {

        cache.putIfAbsent(user, UserGroupInformation.createRemoteUser(user));

        return cache.get(user);

    }

}

用java api读取HDFS文件的更多相关文章

java Api 读取HDFS文件内容
package dao; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import java ...
使用JAVA API读取HDFS的文件数据出现乱码的解决方案
使用JAVA api读取HDFS文件乱码踩坑想写一个读取HFDS上的部分文件数据做预览的接口,根据网上的博客实现后,发现有时读取信息会出现乱码,例如读取一个csv时,字符串之间被逗号分割英文字符串 ...
JAVA API 实现hdfs文件操作
java api 实现hdfs 文件操作会出现错误提示: Permission denied: user=hp, access=WRITE, inode="/":hdfs:supe ...
Java API 读取HDFS的单文件
HDFS上的单文件: -bash-3.2$ hadoop fs -ls /user/pms/ouyangyewei/data/input/combineorder/repeat_rec_categor ...
Spark：java api读取hdfs目录下多个文件
需求: 由于一个大文件,在spark中加载性能比较差.于是把一个大文件拆分为多个小文件后上传到hdfs,然而在spark2.2下如何加载某个目录下多个文件呢? public class SparkJo ...
使用java api操作HDFS文件
实现的代码如下: import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import ...
记录一次读取hdfs文件时出现的问题java.net.ConnectException: Connection refused
公司的hadoop集群是之前的同事搭建的,我(小白一个)在spark shell中读取hdfs上的文件时,执行以下指令 >>> word=sc.textFile("hdfs ...
Spark读取HDFS文件，文件格式为GB2312，转换为UTF-8
package iie.udps.example.operator.spark; import scala.Tuple2; import org.apache.hadoop.conf.Configur ...
使用Java API操作HDFS文件系统
使用Junit封装HFDS import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org ...

随机推荐

用VirtualBox构建MySQL测试环境笔记
网络环境: 宿主机:Win7 VirtualBox 4.1.4 + Ubuntu 11.10 server 64bit 宿主机使用网线的时候,客户机在Bridged Adapter模式下,使用Athe ...
Centos 6.4 /usr/src/kernels 目录为空解决方法
/usr/src/kernels 目录下是Linux的内核源码,如果其为空,则需要安装安装 kernel-headers 和 kernel-devel包
centos 7.0 mono&Jexus V5.5.3安装
首先我们需要先配置一下yum源中mono的引用说明: 第一步: vi /etc/yum.repos.d/mono.repo 第二步:在刚打开的文件中编辑如下内容 [mono]name=monobase ...
探究为何rem在chrome浏览器上计算出错
最近在一个项目中,测试同学提了一个bug,说手机上有个页面的某些字体显示偏大.就像这样我用chrome浏览器在pc上测试了一下,发现pc上也有这个问题,但是用其它浏览器打开这个页面就没有发现这个问题 ...
composer php依赖管理工具
#composer是什么 Composer 是 PHP 的一个依赖管理工具.它允许你申明项目所依赖的代码库,它会在你的项目中为你安装他们. composer出现之前我们php项目依赖管理大部分都是手动 ...
C# double float int string 与 byte数组相互转化
在做通信编程的时候,数据发送多采用串行发送方法,实际处理的时候多是以字节为单位进行处理的.在C/C++中多字节变量与Byte进行转化时候比较方便采用UNION即可废话少说看示例:typedef u ...
浏览器九宫格的简单实现 - 蒋宇捷的专栏 - 博客频道 - CSDN.NET
CSS3 来源:http://blog.csdn.net/hfahe/article/details/6125890#1536434-hi-1-22083-42d97150898b1af15ddaae ...
java客户端连接MongoDB数据库的简单使用
1.下载mongoDB的jar包,并引入到工程的CLASSPATH中下载:mongodb2.5驱动包下载如果使用maven项目,最新的依赖如下: <dependency> <gro ...
http://www.ruanyifeng.com/blog/2011/09/restful
http://www.ruanyifeng.com/blog/2011/09/restful
easyui源码翻译1.32--Calendar(日历)
前言前几天加班比较忙未能及时更新翻译的今天多发布几篇..下载该插件翻译源码日历控件显示一个月的日历,允许用户选择日期和移动到下一个或上一个月.默认情况下,一周的第一天是周日.它可以通过设置'f ...

用java api读取HDFS文件

用java api读取HDFS文件的更多相关文章

随机推荐

热门专题