通过HA方式操作HDFS

之前操作hdfs的时候，都是固定namenode的地址，然后去操作。这个时候就必须判断namenode的状态为active还是standby，比较繁琐，如果集群使用了HA的形式，就很方便了

直接上代码，看注释：

package com.ideal.template.openbigdata.util;

import java.io.IOException;

import java.net.URI;

import java.sql.ResultSet;

import java.sql.ResultSetMetaData;

import java.sql.SQLException;

import java.sql.Timestamp;

import java.text.SimpleDateFormat;

import java.util.LinkedList;

import java.util.List;

//import org.anarres.lzo.LzoAlgorithm;

//import org.anarres.lzo.LzoDecompressor;

//import org.anarres.lzo.LzoInputStream;

//import org.anarres.lzo.LzoLibrary;

import org.apache.commons.logging.Log;

import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FSDataOutputStream;

import org.apache.hadoop.fs.FileStatus;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.security.UserGroupInformation;

import org.apache.log4j.Logger;

public class HadoopUse

{

    private static Log log = LogFactory.getLog(HadoopUse.class);

    /**

	 * 设置hdfs配置信息

	 * @return

	 */

	private static Configuration getConf()

	{

		Configuration conf = new Configuration();

		//设置配置相关的信息，分别对应hdfs-site.xml core-site.xml

		conf.set("fs.defaultFS", "hdfs://dragoncluster");

		conf.set("dfs.nameservices", "dragoncluster");

		conf.set("dfs.ha.namenodes.dragoncluster", "nn1,nn2");

		conf.set("dfs.namenode.rpc-address.dragoncluster.nn1", "n01.dragon.com:8020");

		conf.set("dfs.namenode.rpc-address.dragoncluster.nn2", "n02.dragon.com:8020");

		conf.set("dfs.client.failover.proxy.provider.dragoncluster", "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");

		//设置实现类，因为会出现类覆盖的问题

		conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());

		conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

		return conf;

	}

	/**

	 * 设置kerberos认证

	 * @param conf

	 * @throws Exception

	 */

	private static void kerberosLogin(Configuration conf) throws Exception

	{

		conf.set("hadoop.security.authentication", "kerberos");

		UserGroupInformation.setConfiguration(conf);

		UserGroupInformation.loginUserFromKeytab("openbigdata@DRAGON.COM", "/etc/security/keytabs/openbigdata.keytab");

	}

	public static long getSize(String uri, String user)

	{

		Path path = new Path(URI.create(uri));

		Configuration conf = new Configuration();

		try

		{

			FileSystem fs = FileSystem.get(URI.create(uri), conf, user);

			return fs.getContentSummary(path).getLength() / 1024 / 1024; // 单位为MB

		}

		catch (Exception ex)

		{

			log.error("HadoopUse.getSize" + ex.getMessage(), ex);

			return 0;

		}

	}

	/**

	 * 在hdfs上创建文件，并写入内容

	 *

	 * @param uri

	 * @param content

	 * @param user

	 * @return

	 */

	public static boolean createHdfsFile(String uri, String user, String fullName, String content)

	{

		if (fullName == null || fullName.length() == 0)

		{// 本地路径不正确

			return false;

		}

		if (content == null || content.length() == 0)

		{// hdfs路径不正确

			return false;

		}

		try

		{

			Configuration conf = new Configuration();

			FileSystem fs = FileSystem.get(URI.create(uri), conf, user);

			FSDataOutputStream os = null;

			if (fs.exists(new Path(fullName)) == true)

			{// 如果该路径存在

				// os = fs.append(new Path(fullName));

				fs.delete(new Path(fullName), true);

			}

			os = fs.create(new Path(fullName));

			os.write(content.getBytes());

			os.close();

			fs.close();

			return true;

		}

		catch (Exception ex)

		{

			log.error("HadoopUse.createHdfsFile" + ex.getMessage(), ex);

			return false;

		}

	}

	/**

	 * 删除hdfs上的文件

	 * @param uri

	 * @param user

	 * @param fullName

	 * @return

	 */

	public static boolean deleteHdfsFile(String uri, String user, String fullName)

	{

		if (fullName == null || fullName.length() == 0)

		{// 本地路径不正确

			log.error("HadoopUse.deleteHdfsFile文件名不合法");

			return false;

		}

		try

		{

			Configuration conf = new Configuration();

			FileSystem fs = FileSystem.get(URI.create(uri), conf, user);

			//FSDataOutputStream os = null;

			if (fs.exists(new Path(fullName)) == true)

			{// 如果该路径存在

				// os = fs.append(new Path(fullName));

				fs.delete(new Path(fullName), true);

			}

			return true;

		}

		catch (Exception ex)

		{

			log.error("HadoopUse.createHdfsFile" + ex.getMessage(), ex);

		}

		return false;

	}

	/**

	 * 根据resultset将值写入到hdfs上

	 * @param uri

	 * @param user

	 * @param fullName

	 * @param resultSet

	 * @param terminated

	 * @return

	 * @throws InterruptedException

	 * @throws IOException

	 * @throws SQLException

	 */

    public void createHdfsFile(String fullName, ResultSet resultSet, String terminated, FlagUtil flag)

        throws IOException, InterruptedException, SQLException, Exception

    {

        if (resultSet == null)

        { // 如果查询出来的游标为空，直接退出

            return;

        }

        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

        FileSystem fs = null;

        FSDataOutputStream out = null;

        Configuration conf = getConf();

        kerberosLogin(conf);

        fs = FileSystem.get(conf);

        if (fs.exists(new Path(fullName)) == true)

        {// 如果该路径存在

            fs.delete(new Path(fullName), true);

        }

        // 获取文件句柄

        out = fs.create(new Path(fullName));

        // 写入文件内容

        ResultSetMetaData rsmd = resultSet.getMetaData();

        int rowCnt = rsmd.getColumnCount();

        int count = 0;

        while (resultSet.next())

        {

        	count++;

        	if(count  >= 1000)

        	{//每1000条记录检查一次需要终止任务

        		if(flag.getTeminalStatus() == true)

        		{

        			break;

        		}

        		count = 0;

        	}

            for (int i = 1; i <= rowCnt; i++)

            {

                if (resultSet.getObject(i) == null)

                {// 如果是空的数据

                    out.write("".getBytes("utf-8"));

                }

                else

                {

                	String item = null;

                	if("DATE".equals(rsmd.getColumnTypeName(i).toUpperCase()))

                	{//如果是日期类型

                		Timestamp date = resultSet.getTimestamp(i);

                		item = sdf.format(date);

                	}

                	else

                	{

                		item = String.valueOf(resultSet.getObject(i));

                	}

					if (item != null)

					{

						out.write(item.getBytes("utf-8"));

					}

					else

					{

						out.write("".getBytes("utf-8"));

					}

                }

                if (i < rowCnt)

                {// 如果写完一列，则插入分隔符

                    out.write(terminated.getBytes("utf-8"));

                }

            }

            // 切换到下一行

            out.write("\r\n".getBytes("utf-8"));

        }

        log.info("fullName:" + fullName + "写入成功");

        if (out != null)

        {

            out.flush();

            out.close();

        }

        if (fs != null)

        {

            fs.close();

        }

    }

    /**

	 * 查询路径

	 * @param path

	 * @return

	 * @throws Exception

	 */

	public static List<String> listDir(String path) throws Exception

	{

		Configuration conf = getConf();

		kerberosLogin(conf);

		FileSystem fs = FileSystem.get(conf);

		Path hdfs = new Path(path);

		List<String> pathList = null;

		FileStatus files[] = fs.listStatus(hdfs);

		if(files!=null && files.length >0)

		{

			pathList = new LinkedList<String>();

			for (FileStatus file : files)

			{

				pathList.add(file.getPath().toString());

			}

		}

		return pathList;

	}

	public static void main(String[] args) throws Exception

	{

		List<String> pathList = listDir(args[0]);

		for(String path: pathList)

		{

			System.out.println(path);

		}

	}

}

注意，这用到了HA，以及kerberos认证，

通过HA方式操作HDFS的更多相关文章

使用命令行的方式操作hdfs
必须要用打全路径,没有相对路径的概念,或者cd的概念打印报告: 所有的命令显示出来: 以下的操作分别是创建创建文件夹,删除文件夹,显示文件夹,可见删除文件夹只能够使用-rmr . 从本地拷贝文件到h ...
Java API操作HA方式下的Hadoop
通过java api连接Hadoop集群时,如果集群支持HA方式,那么可以通过如下方式设置来自动切换到活动的master节点上.其中,ClusterName 是可以任意指定的,跟集群配置无关,dfs. ...
用流的方式来操作hdfs上的文件
import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import ...
使用javaAPI操作hdfs
欢迎到https://github.com/huabingood/everyDayLanguagePractise查看源码. 一.构建环境在hadoop的安装包中的share目录中有hadoop所有 ...
使用Java方式连接HDFS
IDEA中新建Maven工程,添加POM依赖, 在IDE的提示中, 点击 Import Changes 等待自动下载完成相关的依赖包. <?xml version="1.0" ...
Hadoop Java API操作HDFS文件系统（Mac）
1.下载Hadoop的压缩包 tar.gz https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/stable/ 2.关联jar包在 ...
使用Java API方式连接HDFS Client测试
IDEA中新建Maven工程,添加POM依赖, 在IDE的提示中, 点击 Import Changes 等待自动下载完成相关的依赖包. <?xml version="1.0" ...
Linux -- 之HDFS实现自动切换HA（全新HDFS）
Linux -- 之HDFS实现自动切换HA(全新HDFS) JDK规划 1.7及以上 https://blog.csdn.net/meiLin_Ya/article/details/8065094 ...
（第3篇）HDFS是什么？HDFS适合做什么？我们应该怎样操作HDFS系统？
摘要: 这篇文章会详细介绍HDFS是什么,HDFS的作用,适合和不适合的场景,我们该如何操作HDFS? HDFS文件系统 Hadoop 附带了一个名为 HDFS(Hadoop分布式文件系统)的分布 ...

随机推荐

LR工作原理
LoadRunner的总体架构图,包括各个组件VUGen, Controller和Analysis之间的关系. LoadRunner由四大组件组成:VuGen.控制器.负载发生器和分析器. 1.VuG ...
[Selenium] 操作页面元素等待时间
WebDriver 在操作页面元素等待时间时,提供2种等待方式:一个为显式等待,一个为隐式等待,其区别在于: 1)显式等待:明确地告诉 WebDriver 按照特定的条件进行等待,条件未达到就一直等待 ...
Unreachable code
Unreachable code 错误不可达代码,比如在循环的break或者return后面的代码就是不可达代码,因为在执行它们之前就已经跳出方法了,只要把这段代码移到break或者return之前 ...
PostgreSQL学习之【用户权限管理】说明
背景最近在学习PostgreSQL,看了用户权限管理文档,涉及到的知识点比较多,顺便写篇文章进行整理并不定时更新,也方便自己后续进行查阅. 说明注意:创建好用户(角色)之后需要连接的话,还需要修改 ...
nodejs 打造多人对战游戏服务器（初级入门）
使用socket.set 和 socket.get 在存取玩家信息百牛信息技术bainiu.ltd整理发布于博客园 socket.get('playerinfo', function (err, p ...
安装PostgreSQL数据库(Linux篇)
0.编译环境 Linux: CentOS 5.5 gcc: 4.1.2 1. 安装PostgreSQL 1) 解压postgresql-9.1.7.tar.bz2 #tar jxvf postgres ...
Cascaded pose regression
最近再看face alignment的相关文章,目前比较流行的算法都是基于(Cascaded pose regression,CPR)[1]的框架上做的,该算法之所以流行的原因是简单高效.CPR分为训 ...
XMPP即时通讯基础知识
XMPP参考一.定义 XMPP 是一种很类似于http协议的一种数据传输协议,它的过程就如同“解包装--〉包装”的过程,用户只需要明白它接受的类型,并理解它返回的类型,就可以很好的利用xmpp来进行 ...
A. Hulk
time limit per test 1 second memory limit per test 256 megabytes input standard input output standar ...
View Programming Guide for iOS ---- iOS 视图编程指南(二)---View and Window Architecture
View and Window Architecture 视图和窗口架构 Views and windows present your application’s user interface and ...

通过HA方式操作HDFS

通过HA方式操作HDFS的更多相关文章

随机推荐

热门专题