HDFS-查看文件属性+文件名称过滤

package com.zhen.hdfs;

import java.io.IOException;

import java.io.OutputStream;

import java.net.URI;

import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileStatus;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

/**

 * @author FengZhen

 * @date 2018年8月12日

 *

 */

public class FileSystemStatusAPI {

	/**

	 * 文件元数据：FileStatus

	 * 任何文件系统的一个重要特征都是提供其目录结构浏览和检索它所在文件和目录相关信息的功能。

	 * FileStatus类封装了文件系统中文件和目录的元数据，包括文件长度、块大小、复本、修改时间、所有者以及权限信息

	 */

	private static FileSystem fileSystem;

	public static void main(String[] args) {

		//setUp();

		//fileStatusForFile();

		//tearDown();

		//globbing();

		pathFilter();

	}

	public static void setUp() {

		String uri = "/user/hdfs/MapReduce/data/test_status";

		Configuration configuration = new Configuration();

		try {

			fileSystem = FileSystem.get(new URI(uri), configuration);

			OutputStream outputStream = fileSystem.create(new Path(uri));

			outputStream.write("content".getBytes("UTF-8"));

			outputStream.close();

		} catch (IOException e) {

			e.printStackTrace();

		} catch (URISyntaxException e) {

			e.printStackTrace();

		}

	} 

	public static void tearDown() {

		if (fileSystem != null) {

			try {

				fileSystem.close();

			} catch (IOException e) {

				e.printStackTrace();

			}

		}

	}

	/**

	 *  path=/user/hdfs/MapReduce/data/test_status

		isDir=false

		length=7

		modificationTime=1534080334126

		replication=3

		blockSize=134217728

		owner=FengZhen

		group=hdfs

		permissions=rw-r--r--

	 */

	public static void fileStatusForFile() {

		Path file = new Path("/user/hdfs/MapReduce/data/test_status");

		try {

			FileStatus fileStatus = fileSystem.getFileStatus(file);

			String path = fileStatus.getPath().toUri().getPath();

			System.out.println("path="+path);

			Boolean isDir = fileStatus.isDirectory();

			System.out.println("isDir="+isDir);

			long length = fileStatus.getLen();

			System.out.println("length="+length);

			long modificationTime = fileStatus.getModificationTime();

			System.out.println("modificationTime="+modificationTime);

			int replication = fileStatus.getReplication();

			System.out.println("replication="+replication);

			long blockSize = fileStatus.getBlockSize();

			System.out.println("blockSize="+blockSize);

			String owner = fileStatus.getOwner();

			System.out.println("owner="+owner);

			String group = fileStatus.getGroup();

			System.out.println("group="+group);

			String permissions = fileStatus.getPermission().toString();

			System.out.println("permissions="+permissions);

		} catch (IOException e) {

			e.printStackTrace();

		}

	}

	/**

	 * 文件模式

	 * 在单个操作中处理一批文件是一个很常见的需求。

	 * 在一个表达式中使用通配符来匹配多个文件是比较方便的，无需列举每个文件和目录来指定输入，该操作称为"通配"(globbing)。

	 * Hadoop为执行通配提供了两个FileSystem方法

	 *  public FileStatus[] globStatus(Path pathPattern) throws IOException {

		  return new Globber(this, pathPattern, DEFAULT_FILTER).glob();

		}

		public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException {

		  return new Globber(this, pathPattern, filter).glob();

		}

		globStatus方法返回与其路径匹配于指定模式的所有文件的FileStatus对象数组，并按路径排序。

		PathFilter命令作为可选项可以进一步对匹配结果进行限制

	 */

	public static void globbing() {

		String uri = "/user/hdfs/MapReduce/data";

		Configuration configuration = new Configuration();

		try {

			fileSystem = FileSystem.get(new URI(uri), configuration);

			// /2018/08/12   /2017/08/11

			FileStatus[] fileStatus = fileSystem.globStatus(new Path("/user/hdfs/MapReduce/data/*/*/{11,12}"));

			// 1./user/hdfs/MapReduce/data/201[78](201[7-8] 、 201[^01234569])  hdfs://fz/user/hdfs/MapReduce/data/2017 hdfs://fz/user/hdfs/MapReduce/data/2018

			// 2./user/hdfs/MapReduce/data/*/*/11  hdfs://fz/user/hdfs/MapReduce/data/2017/08/11

			// 3./user/hdfs/MapReduce/data/*/*/{11,12}  hdfs://fz/user/hdfs/MapReduce/data/2017/08/11 hdfs://fz/user/hdfs/MapReduce/data/2018/08/12

			for (FileStatus fileStatus2 : fileStatus) {

				System.out.println(fileStatus2.getPath().toString());

			}

			fileSystem.close();

		} catch (Exception e) {

			e.printStackTrace();

		}

	}

	/**

	 * PathFilter

	 * 通配符模式并不总能够精确地描述我们想要访问的字符集。比如，使用通配格式排除一个特定文件就不太可能。

	 * FileSystem中的listStatus和globStatus方法提供了可选的pathFilter对象，以编程方式控制通配符

	 */

	public static void pathFilter() {

		String uri = "/user/hdfs/MapReduce/data";

		Configuration configuration = new Configuration();

		try {

			fileSystem = FileSystem.get(new URI(uri), configuration);

			// /2018/08/12   /2017/08/11 新增一个/2017/08/12

			FileStatus[] fileStatus = fileSystem.globStatus(new Path("/user/hdfs/MapReduce/data/201[78]/*/*"), new RegexExcludePathFilter("^.*/2017/08/11$"));

			//FileStatus[] fileStatus = fileSystem.globStatus(new Path("/user/hdfs/MapReduce/data/2017/*/*"), new RegexExcludePathFilter("/user/hdfs/MapReduce/data/2017/08/11"));

			for (FileStatus fileStatus2 : fileStatus) {

				System.out.println(fileStatus2.getPath().toString());

			}

			fileSystem.close();

		} catch (Exception e) {

			e.printStackTrace();

		}

	}

}

HDFS-查看文件属性+文件名称过滤的更多相关文章

HDFS的Java客户端操作代码(查看HDFS下的文件是否存在)
1.查看HDFS目录下得文件是否存在 package Hdfs; import java.io.IOException; import java.net.URI; import org.apache. ...
hadoop学习；大数据集在HDFS中存为单个文件；安装linux下eclipse出错解决；查看.class文件插件
sudo apt-get install eclipse 安装后打开eclipse,提示出错 An error has occurred. See the log file /home/pengeor ...
【HDFS API编程】查看目标文件夹下的所有文件、递归查看目标文件夹下的所有文件
使用hadoop命令:hadoop fs -ls /hdfsapi/test 我们能够查看HDFS文件系统/hdfsapi/test目录下的所有文件信息那么使用代码怎么写呢?直接先上代码:(这之后 ...
HDFS设计思路，HDFS使用，查看集群状态，HDFS，HDFS上传文件，HDFS下载文件，yarn web管理界面信息查看，运行一个mapreduce程序，mapreduce的demo
26 集群使用初步 HDFS的设计思路 l 设计思想分而治之:将大文件.大批量文件,分布式存放在大量服务器上,以便于采取分而治之的方式对海量数据进行运算分析: l 在大数据系统中作用: 为各类分布式 ...
linux stat 命令显示文件和文件系统状态（查看文件属性）查看文件inode
stat 显示文件和文件系统状态(查看文件属性) 查看文件inode详细信息 [root@MongoDB ~]# stat /etc/hosts File: ‘/etc/hosts’ Size: B ...
hadoop学习笔记（十）：hdfs在命令行的基本操作命令（包括文件的上传和下载和hdfs中的文件的查看等）
hdfs命令行 ()查看帮助 hdfs dfs -help ()查看当前目录信息 hdfs dfs -ls / ()上传文件 hdfs dfs -put /本地路径 /hdfs路径 ()剪切文件 hd ...
牛客网Java刷题知识点之File对象常用功能：获取文件名称、获取文件路径、获取文件大小、获取文件修改时间、创建与删除、判断、重命名、查看系统根目录、容量获取、获取某个目录下内容、过滤器
不多说,直接上干货! 获取文件名称.获取文件路径.获取文件大小.获取文件修改时间 FileMethodDemo.java package zhouls.bigdata.DataFeatureSelec ...
Hadoop HDFS (3) JAVA訪问HDFS之二文件分布式读写策略
先把上节未完毕的部分补全,再剖析一下HDFS读写文件的内部原理列举文件 FileSystem(org.apache.hadoop.fs.FileSystem)的listStatus()方法能够列出一 ...
Hadoop之HDFS原理及文件上传下载源码分析（上）
HDFS原理首先说明下,hadoop的各种搭建方式不再介绍,相信各位玩hadoop的同学随便都能搭出来. 楼主的环境: 操作系统:Ubuntu 15.10 hadoop版本:2.7.3 HA:否(随 ...

随机推荐

CodeIgniter框架——源码分析之Config.php
CI框架的配置信息被存储在$config数组中,我们可以添加自己的配置信息或配置文件到$config中: $this->config->load('filename'); //加载配置文件 ...
邱老师玩游戏(树形DP) UESTC - 1136
邱老师最近在玩一种战略游戏,在一个地图上,有N座城堡,每座城堡都有一定的宝物,在每次游戏中邱老师允许攻克M个城堡并获得里面的宝物. 但由于地理位置原因,有些城堡不能直接攻克,要攻克这些城堡必须先攻克其 ...
坑爹的 HTTPClient java.lang.NoSuchFieldError: INSTANCE
项目中需要用到httpclient ,maven配置如下 <dependency> <groupId>org.apache.httpcomponents</groupId ...
linux环境配置nginx导致页面不刷新
在linux环境下,配置了nginx负载均衡,由于可能在虚拟主机的配置文件nginx.conf中,对缓存机制未配置成功,导致页面不刷新,仍然显示缓存中的内容. 最后通过注释nginx.conf文件中的 ...
mysql练手
1.根据图创建下列表格没有外键的表先创建,创建顺序为teacher,class,course,student CREATE TABLE class ( cid INT NOT NULL auto_i ...
servlet3.0 的新特性之二注解代替了web.xml配置文件
servlet3.0 的新特性: 注解代替了 web.xml 文件支持了对异步的处理对上传文件的支持 1．注解代替了配置文件 1.删除了web.xml 文件 2. 在Servlet类上添加@Web ...
Insert Buffering
14.5.13.4 Insert Buffering Database applications often insert new rows in the ascending order of the ...
学习Hive和Impala必看经典解析
Hive和Impala作为数据查询工具,它们是怎样来查询数据的呢?与Impala和Hive进行交互,我们有哪些工具可以使用呢? 我们首先明确Hive和Impala分别提供了对应查询的接口: (1)命令 ...
如何配置一个路径，能够既适合Linux平台，又适合Windows平台，可以从这个路径中读取文件
如何配置一个路径,能够既适合Linux平台,又适合Windows平台,可以从这个路径中读取文件? 目的:就是希望在项目的配置文件中配上一样的路径,不管协作者使用的是什么平台,都能够读到文件. 比如:L ...
__init__和__new__
一.__init__方法是什么 __init__方法通常用在初始化一个类实例的时候, class Person(object): """Silly Person" ...

HDFS-查看文件属性+文件名称过滤

HDFS-查看文件属性+文件名称过滤的更多相关文章

随机推荐

热门专题