hdfs对文件的增删改查

源代码：

pom.xml:

<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0"

         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"

         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

    <modelVersion>4.0.0</modelVersion>

    <groupId>cn.idcast</groupId>

    <artifactId>hdfs_api_demo</artifactId>

    <version>1.0-SNAPSHOT</version>

    <properties>

        <maven.compiler.source>8</maven.compiler.source>

        <maven.compiler.target>8</maven.compiler.target>

    </properties>

    <dependencies>

        <dependency>

            <groupId>org.apache.hadoop</groupId>

            <artifactId>hadoop-common</artifactId>

            <version>3.1.4</version>

        </dependency>

        <dependency>

            <groupId>org.apache.hadoop</groupId>

            <artifactId>hadoop-hdfs</artifactId>

            <version>3.1.4</version>

        </dependency>

        <dependency>

            <groupId>org.apache.hadoop</groupId>

            <artifactId>hadoop-client</artifactId>

            <version>3.1.4</version>

        </dependency>

        <dependency>

            <groupId>org.apache.hadoop</groupId>

            <artifactId>hadoop-mapreduce-client-core</artifactId>

            <version>3.1.4</version>

        </dependency>

        <dependency>

            <groupId>junit</groupId>

            <artifactId>junit</artifactId>

            <version>RELEASE</version>

        </dependency>

    </dependencies>

    <build>

        <plugins>

            <!--java编译插件-->

            <plugin>

                <groupId>org.apache.maven.plugins</groupId>

                <artifactId>maven-compiler-plugin</artifactId>

                <version>3.1</version>

                <configuration>

                    <source>1.8</source>

                    <target>1.8</target>

                    <encoding>UTF-8</encoding>

                </configuration>

            </plugin>

            <plugin>

                <groupId>org.apache.maven.plugins</groupId>

                <artifactId>maven-shade-plugin</artifactId>

                <version>2.4.3</version>

                <executions>

                    <execution>

                        <phase>package</phase>

                        <goals>

                            <goal>shade</goal>

                        </goals>

                        <configuration>

                            <minimizeJar>true</minimizeJar>

                        </configuration>

                    </execution>

                </executions>

            </plugin>

        </plugins>

    </build>

</project>

java:

package cn.idcast.hdfs_api;

import com.jcraft.jsch.IO;

import org.apache.commons.io.IOUtils;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.*;

import org.apache.kerby.util.IOUtil;

import org.apache.log4j.BasicConfigurator;

import org.junit.Test;

import java.io.FileOutputStream;

import java.io.IOException;

import java.net.URI;

import java.net.URISyntaxException;

public class HdfsApiDemo {

    //获取FileSystem--方法1

    @Test

    public void getFileSystem1() throws IOException {

        Configuration configuration=new Configuration();

        configuration.set("fs.defaultFS","hdfs://node1:8020");

        FileSystem fileSystem = FileSystem.get(configuration);

        System.out.println(fileSystem.toString());

    }

    //获取FileSystem--方法2

    @Test

    public void getFileSystem2() throws IOException, URISyntaxException {

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration());

        System.out.println(fileSystem);

    }

    //获取FileSystem--方法3

    @Test

    public void getFileSystem3() throws IOException {

        Configuration configuration=new Configuration();

        configuration.set("fs.defaultFS","hdfs://node1:8020");

        FileSystem fileSystem = FileSystem.newInstance(configuration);

        System.out.println(fileSystem.toString());

    }

    //获取FileSystem--方法4

    @Test

    public void getFileSystem4() throws IOException, URISyntaxException {

        FileSystem fileSystem = FileSystem.newInstance(new URI("hdfs://node1:8020"),new Configuration());

        System.out.println(fileSystem.toString());

    }

    //遍历所有文件

    @Test

    public void listMyFiles() throws Exception, URISyntaxException {

       //1:获取FileSystem实例

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");

        //2:调用方法listFiles 获取 /  目录下所有文件信息

        RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fileSystem.listFiles(new Path("/"), true);

       //遍历迭代器

        while(locatedFileStatusRemoteIterator.hasNext()){

            LocatedFileStatus next = locatedFileStatusRemoteIterator.next();

            System.out.println(next.getPath().toString());

        }

        fileSystem.close();

    }

    //创建文件目录

    @Test

    public void mkdirs() throws IOException, URISyntaxException, InterruptedException {

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");

        boolean mkdirs = fileSystem.mkdirs(new Path("/hello/mydir/test"));

        System.out.println(mkdirs);

        fileSystem.close();

    }

    //创建文件夹

    @Test

    public void mkdirsTest() throws IOException, URISyntaxException, InterruptedException {

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");

        fileSystem.create(new Path("/hello/mydir/test/a.txt"));

       // System.out.println(mkdirs);

        //fileSystem.close();

    }

    //实现文件的下载

    @Test

    public void downloadFile() throws URISyntaxException, IOException, InterruptedException {

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");

        FSDataInputStream inputStream = fileSystem.open(new Path("/hello/mydir/test/a.txt"));

        FileOutputStream outputStream = new FileOutputStream("D://a.txt");

        IOUtils.copy(inputStream,outputStream);

        IOUtils.closeQuietly(inputStream);

        IOUtils.closeQuietly(outputStream);

        fileSystem.close();

    }

    //实现文件的下载--简单方法

    @Test

    public void downloadFile2() throws URISyntaxException, IOException, InterruptedException {

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");

        fileSystem.copyToLocalFile(new Path("/hello/mydir/test/a.txt"),new Path("D://a.txt"));

        fileSystem.close();

    }

    //实现文件的上传

    @Test

    public void uploadFile() throws URISyntaxException, IOException, InterruptedException {

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");

        fileSystem.copyFromLocalFile(new Path("D://hdfs-site.txt"),new Path("/"));

        fileSystem.close();

    }

    //小文件的合并

    @Test

    public void mergeFile() throws URISyntaxException, IOException, InterruptedException {

        //1:获取FileSystem(分布式文件系统)

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");

        //2:获取hdfs大文件的输出流

        FSDataOutputStream outputStream = fileSystem.create(new Path("/big_txt.txt"));

        //3:获取一个本地文件系统

        LocalFileSystem localFileSystem = FileSystem.getLocal(new Configuration());

        //4:获取本地文件夹下所有文件的详情

        FileStatus[] fileStatuses = localFileSystem.listStatus(new Path("D://input"));

        //5:遍历每个文件，获取每个文件的输入流

        for (FileStatus fileStatus : fileStatuses) {

            FSDataInputStream inputStream = localFileSystem.open(fileStatus.getPath());

            //6:将小文件的数据复制到文件

            IOUtils.copy(inputStream,outputStream);

            IOUtils.closeQuietly(inputStream);

        }

        //7:关闭流

        IOUtils.closeQuietly(outputStream);

        localFileSystem.close();

        fileSystem.close();

    }

}

hdfs对文件的增删改查的更多相关文章

Hadoop基础-HDFS的API实现增删改查
Hadoop基础-HDFS的API实现增删改查作者:尹正杰版权声明:原创作品,谢绝转载!否则将追究法律责任. 本篇博客开发IDE使用的是Idea,如果没有安装Idea软件的可以去下载安装,如何安装 ...
MyBatis学习（二）、SQL语句映射文件(2)增删改查、参数、缓存
二.SQL语句映射文件(2)增删改查.参数.缓存 2.2 select 一个select 元素非常简单.例如:  <select id=" ...
java对xml文件做增删改查------摘录
java对xml文件做增删改查 package com.wss; import java.io.File;import java.util.ArrayList;import java.util.Lis ...
MyBatis学习之二、SQL语句映射文件(2)增删改查、参数、缓存
目录(?)[-] 二SQL语句映射文件2增删改查参数缓存 select insert updatedelete sql parameters 基本类型参数 Java实体类型参数 Map参数多参数的实 ...
【练习】Python第四次：实现对文件的增删改查
一,实现对文件的增删改查 (一),三级菜单的处理结构及退出技巧:使用TAG标记 tag=True while tag: print('leve1') choice=input("level1 ...
基于SpringMVC的文件（增删改查）上传、下载、更新、删除
一.项目背景摘要:最近一直在忙着项目的事,3个项目过去了,发现有一个共同的业务,那就是附件的处理,附件包括各种文档,当然还有图片等特殊文件,由于时间的关系,每次都是匆匆忙忙的搞定上线,称这项目的空档 ...
Python文件操作-文件的增删改查
需求:对文件进行增删改查由于时间原因,本次代码没有增加任何注释,如有疑问,请联系编辑者:闫龙其实我也是醉了,看着这些个代码,我脑袋也特么大了,没办法,大神说了,不让用新知识,只可以使用学过的,所以 ...
使用dom4j对xml文件进行增删改查
1.使用dom4j技术对dom_demo.xml进行增删改查首选要下载dom4j的jar包在官网上找不到,网上搜索了一下在这个链接:http://sourceforge.net/projects/ ...
Python 模拟SQL对文件进行增删改查
#!/usr/bin/env python # _*_ coding:UTF-8 _*_ # __auth__: Dalhhin # Python 3.5.2,Pycharm 2016.3.2 # 2 ...

随机推荐

logging日志模块、配置字典
logging日志模块知识点很多但是需要掌握的很少(会用即可) import logging # 日志有五个等级(从上往下重要程度不一样) # logging.debug('debug级别') # ...
laravel 实现详情分页
选择合适的PHP框架及前端框架布局页面(10分) 首先展示出分类列表,每个分类下只显示3条信息,无需分页 (30分) 在列表页点击文章标题进入详细页面,对应的文章点击量+1(30分) 在详细页面点击 ...
还不会用springboot写接口？快看这里，手把手操作，一发入魂~
1.springboot简介 Spring Boot 可以轻松创建可以"直接运行"的独立的.生产级的基于 Spring 的应用程序. 特征创建独立的 Spring 应用程序直接 ...
还在写SQL做SAP二开？通过RFC调用NetWeaver，让HANA数据库操作更可靠
相比于从零开始构建全套信息化系统,基于成熟的ERP等行业软件做二次开发是更多中大型企业应对个性化软件需求的首选方案.如何在二开模块中,可靠地对成品软件的数据库进行读写操作,以满足单据自动创建.元数据自 ...
使用vscode Container开发调试envoy
由于我最近在研究 envoy 这个项目,这是个cpp的项目,对于我这种cpp新人来说还是比较有压力的,感觉处处都是坑,开个引导文章记录一下. 如果要研究 envoy 项目源码,那肯定是需要代码跳转的, ...
ArcMap操作随记（15）
1.字段顺序的调整 [图层属性]→[字段]([高级排序]功能也在这里) 2.所谓热点地区: 本身是高值,邻居也是高值,才成为热点地区(这是操作中总结出的,但其实了解原理就很简单了,详细请查看空间自相关 ...
Java基础—构造方法
1.构造方法概述构造方法是一种特殊的方法,用来创建对象,当我们不定义时,系统会默认给出一个无参构造方法:一旦我们定义了任意的构造方法,系统就不会给出默认的无参构造方法格式如下: public ca ...
6月6日 python复习面向对象
1.面向对象编程 1.面向过程编程核心是"过程"二字,过程指的是解决问题的步骤,即先干什么再干什么基于该思想编写程序就好比在编写一条流水线,是一种机械式的思维方式优点:复杂的问题 ...
C 上楼梯中国石油大学新生训练赛#11
问题 C: 上楼梯时间限制: 1 Sec 内存限制: 128 MB提交状态题目描述明明上n 级台阶可用四种步幅, 当然每种步幅花费的体力也不一样, 对应关系如下明明开始有m 个体力, 求他最 ...
一种基于USB转串口的设备如何赋予权限
1. 利用open打开USB转串口的设备遇到打开异常. 若用vs调试代码,则需要usermod 对应归到watson组下,因为watson这个是vs ssh连接虚拟机的用户名,若用root则无法使用改 ...

hdfs对文件的增删改查

hdfs对文件的增删改查的更多相关文章

随机推荐

热门专题