大数据(5) - HDFS中的常用API操作
12.配置 File >> Settings... (Ctrl + Alt + S)
1.关闭reopen选项,打开Intellij IDEA 时不会默认打开上次关闭的项目
2.1 解压apache-maven-3.3.9文件夹放到D盘根目录 apache-maven-3.3.9压缩文件下载
2.1修改maven的settings.xml配置,在apache-maven-3.3.9/conf/settings.xml 已经配置好了,直接使用即可
2.2 在D盘目录下,创建一个m2文件夹
2.3 将刚才修改后的settings.xml文件移动到该m2文件夹下
2.4 修改idea中关于maven的配置(检查setttings和defaultSettings中的maven设置是否一样)
File >> Settings ...
File >> Other settings >> Settings for New Projects ...
File >> Projects structure ... (Ctrl + Shift + Alt + S) >> 如下两图,设置你自己java的安装路径
View >> Tool windows >> Maven Projects
ctrl + shift + enter:补全代码后方缺失的符号
shift + enter:直接换行
ctrl + alt + t:弹出可以包裹当前代码的语法列表
alt + enter:相当于eclipse的ctrl + 1,错误智能修复提示
ctrl + p:提示当前方法可以传递的参数类型以及参数个数
ctrl + d:复制当前行到下一行,类似eclipse中的ctrl + alt + 光标下
ctrl + x:剪切当前光标所在行
ctrl + y:删除当前光标所在行
ctrl + shift + 光标上或下:移动当前行代码
ctrl + alt + 光标左右:可以在查看代码的时候,切换上一次或下一次查看的视图
ctrl + o::弹出当前类中可以覆写的方法列表
ctrl + alt + v:生成一个变量接受某个方法返回的值
ctrl + shift + i : 查看方法定义代码
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.*;
- import org.apache.hadoop.io.IOUtils;
- import org.junit.Test;
- import java.io.IOException;
- import java.net.URI;
- import java.net.URISyntaxException;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileOutputStream;
- /*
- * 使用javaAPI来操作hdfs
- */
- public class HDFSClient {
- /*
- * 在hdfs中创建文件夹
- */
- @Test
- public void testMkdir() throws URISyntaxException, IOException, InterruptedException {
- /*
- * util.Shell: Failed to locate the winutils binary in the hadoop binary path
- * 解决办法:
- * 1.下载winutils.exe 链接:http://public-repo-1.hortonworks.com/hdp-win-alpha/winutils.exe
- * 2.创建文件夹,比方说 D:\winutils\bin
- * 3.复制winutils.exe里面D:\winutils\bin
- * 4.将环境变量设置HADOOP_HOME为D:\winutils
- * 5.System.setProperty("hadoop.home.dir", "D://winutils");
- */
- System.setProperty("hadoop.home.dir", "D://winutils");
- // 创建配置文件对象
- Configuration conf = new Configuration();
- FileSystem fileSystem = FileSystem.get(new URI("hdfs://"), conf, "admin");
- fileSystem.mkdirs(new Path("/user/admin/404"));
- fileSystem.close();
- }
- /*
- * 下载文件
- */
- @Test
- public void testCopytoLocalFile() throws URISyntaxException, IOException, InterruptedException{
- System.setProperty("hadoop.home.dir", "D://winutils");
- // 创建配置文件对象
- Configuration conf = new Configuration();
- FileSystem fileSystem = FileSystem.get(new URI("hdfs://"), conf, "admin");
- fileSystem.copyToLocalFile(
- false,
- new Path("/user/admin/403/hello_world2.txt"),
- new Path("d:\\copy_words.txt")
- );
- fileSystem.close();
- }
- /**
- * 文件删除
- */
- @Test
- public void testDelete() throws URISyntaxException, IOException, InterruptedException {
- System.setProperty("hadoop.home.dir", "D://winutils");
- Configuration conf = new Configuration();
- FileSystem fileSystem = FileSystem.get(new URI("hdfs://"), conf, "admin");
- fileSystem.delete(new Path("/user/admin/403/"), true);
- fileSystem.close();
- }
- /**
- * 上传文件
- * JavaAPI在操作文件 上传时,如果文件已经存在于HDFS中,则先删除HDFS中的文件,再上传
- * 但是如果使用shell操作,则会提示,该文件已存在
- */
- @Test
- public void testCopyFromLocal() throws URISyntaxException, IOException, InterruptedException {
- System.setProperty("hadoop.home.dir", "D://winutils");
- Configuration conf = new Configuration();
- FileSystem fileSystem = FileSystem.get(new URI("hdfs://"), conf, "admin");
- // fileSystem.copyFromLocalFile(new Path("d:\\copy_words.txt"), new Path("/"));
- fileSystem.copyFromLocalFile(new Path("d:" + File.separator + "copy_words.txt"), new Path("/"));
- fileSystem.close();
- }
- /**
- * 重命名文件
- */
- @Test
- public void testReanme() throws URISyntaxException, IOException, InterruptedException {
- System.setProperty("hadoop.home.dir", "D://winutils");
- Configuration conf = new Configuration();
- FileSystem fileSystem = FileSystem.get(new URI("hdfs://"), conf, "admin");
- fileSystem.rename(new Path("/user/admin/403/abc.txt")
- , new Path("/user/admin/403/aaa.txt"));
- fileSystem.close();
- }
- /**
- * 展示目录列表
- */
- @Test
- public void testListFile() throws URISyntaxException, IOException, InterruptedException {
- System.setProperty("hadoop.home.dir", "D://winutils");
- Configuration conf = new Configuration();
- FileSystem fileSystem = FileSystem.get(new URI("hdfs://"), conf, "admin");
- RemoteIterator<LocatedFileStatus> listFiles = fileSystem.listFiles(new Path("/"), true);
- while(listFiles.hasNext()){
- LocatedFileStatus fileStatus = listFiles.next();
- System.out.println("文件名称:" + fileStatus.getPath().getName());
- System.out.println("文件长度:" + fileStatus.getLen());
- System.out.println("文件权限:" + fileStatus.getPermission());
- System.out.println("文件所属组" + fileStatus.getGroup());
- //文件块信息
- BlockLocation[] blockLocations = fileStatus.getBlockLocations();
- for(BlockLocation blockLocation : blockLocations){
- String[] hosts = blockLocation.getHosts();
- for(String host : hosts){
- System.out.println(host);
- }
- }
- System.out.println("--------------这是一个毫无用处的分割线--------------------");
- }
- }
- /**
- * 罗列目录或文件
- */
- @Test
- public void testListStatus() throws URISyntaxException, IOException, InterruptedException {
- System.setProperty("hadoop.home.dir", "D://winutils");
- Configuration conf = new Configuration();
- FileSystem fileSystem = FileSystem.get(new URI("hdfs://"), conf, "admin");
- FileStatus[] listStatus = fileSystem.listStatus(new Path("/"));
- for(FileStatus status : listStatus){
- if(status.isFile()){
- System.out.println("文件:" + status.getPath().getName());
- }else{
- System.out.println("目录:" + status.getPath().getName());
- }
- }
- fileSystem.close();
- }
- /**
- * 通过流的操作上传一个文件到HDFS
- */
- @Test
- public void putFileToHDFS() throws URISyntaxException, IOException, InterruptedException {
- System.setProperty("hadoop.home.dir", "D://winutils");
- Configuration conf = new Configuration();
- FileSystem fileSystem = FileSystem.get(new URI("hdfs://"), conf, "admin");
- //读取当前操作系统本地的文件
- FileInputStream inputStream = new FileInputStream(new File("d:\\copy_words.txt"));
- //创建HDFS的输出流,用于将本地文件流中的数据拷贝到HDFS中
- FSDataOutputStream outputStream = fileSystem.create(new Path("/jinji_words.txt"));
- //流的对拷
- IOUtils.copyBytes(inputStream, outputStream, conf);
- fileSystem.close();
- }
- /**
- * 通过流的方式,下载文件
- */
- @Test
- public void getFileFromHDFS() throws URISyntaxException, IOException, InterruptedException {
- System.setProperty("hadoop.home.dir", "D://winutils");
- Configuration conf = new Configuration();
- FileSystem fileSystem = FileSystem.get(new URI("hdfs://"), conf, "admin");
- FSDataInputStream inputStream = fileSystem.open(new Path("/hello_world.txt"));
- FileOutputStream outputStream = new FileOutputStream(new File("d:\\demo.txt"));
- IOUtils.copyBytes(inputStream, outputStream, conf);
- fileSystem.close();
- }
- /**
- * 按照文件块进行下载
- * 可以在下载文件的过程中,设置每次要下载的字节数
- * 例如:我们下载一个文件的一个文件块 自己上传一个bin/hdfs dfs -put ~/softwares/installtions/hadoop-2.7.2.tar.gz /
- * 下载hadoop安装包(200多兆)的第一个文件块(128M)
- */
- @Test
- public void readFileSeek1() throws URISyntaxException, IOException, InterruptedException {
- System.setProperty("hadoop.home.dir", "D://winutils");
- Configuration conf = new Configuration();
- FileSystem fileSystem = FileSystem.get(new URI("hdfs://"), conf, "admin");
- FSDataInputStream inputStream = fileSystem.open(new Path("/hadoop-2.7.2.tar.gz"));
- FileOutputStream outputStream = new FileOutputStream(new File("d:\\hadoop-2.7.2.tar.gz.part1"));
- byte[] bytes = new byte[1024];//一次读取1KB的数据
- for(int i = 0; i < 1024 * 128; i++){
- inputStream.read(bytes);
- outputStream.write(bytes);
- }
- IOUtils.closeStream(inputStream);
- IOUtils.closeStream(outputStream);
- }
- /**
- * 下载第二个文件块
- */
- @Test
- public void readFileSeek2() throws URISyntaxException, IOException, InterruptedException {
- System.setProperty("hadoop.home.dir", "D://winutils");
- Configuration conf = new Configuration();
- FileSystem fileSystem = FileSystem.get(new URI("hdfs://"), conf, "admin");
- FSDataInputStream inputStream = fileSystem.open(new Path("/hadoop-2.7.2.tar.gz"));
- FileOutputStream outputStream = new FileOutputStream(new File("d:\\hadoop-2.7.2.tar.gz.part2"));
- inputStream.seek(128 * 1024 * 1024);
- // IOUtils.copyBytes(inputStream, outputStream, conf);
- //与上边的操作等价:
- byte[] bytes = new byte[1024];//一次读取1KB的数据
- for(int i = 0; i < 77829046/1024; i++){
- inputStream.read(bytes);
- outputStream.write(bytes);
- }
- IOUtils.closeStream(inputStream);
- IOUtils.closeStream(outputStream);
- fileSystem.close();
- }
- }
注意:把linux下面/home/admin/modules/hadoop-2.7.2/etc/hadoop文件夹中的log4j.properties、core-site.xml、 hdfs-site.xml、mapred-site.xml、yarn-site.xml五个文件复制到wiondw下面项目中的:项目路径\src\main\resources 文件夹中
