hdfs文件按修改时间下载
应用于:对于不同用户创建的表目录,进行文件的下载,程序中执行hadoop cat命令 下载文件到本地,随后通过ftp传至目标服务器,并将hdfs文件目录的修改时间存入mysql中。每次修改前将mysql中记录的数据,与本批次下载的HDFS文件路径修改时间对比,如果改变,则决定是否下载文件:
入口:
- package edm.spark.download.edm.spark.download;
- import java.io.BufferedReader;
- import java.io.InputStreamReader;
- import java.util.Date;
- import java.util.List;
- import org.apache.hadoop.fs.Path;
- import edm.spark.download.edm.spark.util.HdfsFileProcessor;
- import edm.spark.download.edm.spark.util.JdbcDirectUtils;
- public class FileDownload {
- public static void main(String[] args) throws Exception {
- String local_path = args[0];//"/home/hdfs/ysy/";
- String hdfs_path = args[1];//"hdfs://hdp/user/";
- ;
- HdfsFileProcessor fileProcessor = new HdfsFileProcessor();
- List<String> userLists = fileProcessor.getUserUnderFolder(hdfs_path);
- List<Path> listPath = fileProcessor.getFileUnderFolder(userLists);
- if (null != listPath && listPath.size() > 0) {
- for (Path path : listPath) {
- String pathName = path.toString();
- String[] nameList = pathName.split("/");
- String time = JdbcDirectUtils.DateTimeFormat(new Date());
- String tableName = nameList[nameList.length - 1] + "_" + time
- + ".txt";
- String userName = nameList[nameList.length - 3];
- Process ps = null;
- try {
- // 提交本地进程
- ps = Runtime.getRuntime().exec(
- local_path + "download.sh " + pathName + " "
- + tableName + " " + userName);
- System.out.println(local_path + "download.sh " + pathName
- + " " + tableName);
- // 更新mysql中记录的时间
- JdbcDirectUtils jdbcForTime = new JdbcDirectUtils();
- long dateTime = jdbcForTime
- .queryDate("select modify_time,path from download_time where path="
- + "'" + path.toString() + "'");
- long insertTime = fileProcessor.getModifycationTime(path);
- if (dateTime != 0) {
- jdbcForTime.updateDateTime(insertTime, pathName);
- } else {
- // 第一次插入写入当前文件目录时间
- jdbcForTime.insertDate(insertTime, path.toString());
- }
- jdbcForTime.destroy();
- } catch (Exception e) {
- e.printStackTrace();
- }
- BufferedReader br = new BufferedReader(new InputStreamReader(
- ps.getInputStream()));
- String line;
- StringBuffer sb = new StringBuffer();
- while ((line = br.readLine()) != null) {
- sb.append(line).append("\n");
- }
- String result = sb.toString();
- System.out.println(result);
- ps.destroy();
- }
- } else {
- System.out.println("no file to download");
- }
- // submit download cmd
- }
- }
HdfsFileProcessor:
- package edm.spark.download.edm.spark.util;
- import java.io.IOException;
- import java.sql.SQLException;
- import java.util.List;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.FileStatus;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.ipc.RemoteException;
- import org.apache.hadoop.security.AccessControlException;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- import com.google.common.collect.Lists;
- public class HdfsFileProcessor {
- static final Logger logger = LoggerFactory.getLogger(HdfsFileProcessor.class);
- protected FileSystem fileSystem;
- private Configuration conf;
- public HdfsFileProcessor(){
- init();
- }
- public void init(){
- conf = new Configuration();
- conf.addResource("resources/hdfs-site.xml");
- conf.addResource("resources/core-site.xml");
- try {
- fileSystem = FileSystem.get(conf);
- } catch (IOException e) {
- logger.error("init error.......",e);
- e.printStackTrace();
- }
- }
- public final boolean checkFile(String filePath){
- boolean exists = false;
- try{
- Path path = new Path(filePath);
- exists = fileSystem.exists(path);
- }catch(IOException e){
- logger.error("",e);
- }catch(Exception e){
- logger.error("",e);
- }
- return exists;
- }
- public List<Path> getFileUnderFolder(List<String> names) throws IOException, SQLException{
- JdbcDirectUtils jdbcForTime = new JdbcDirectUtils();
- List<Path> paths = Lists.newArrayList();
- for(String name : names){
- Path folderPath = new Path("hdfs://hdp/user/" + name +"/");
- if(fileSystem.exists(folderPath)){
- try{
- FileStatus[] fileStatus = fileSystem.listStatus(folderPath);
- for(int i = 0; i< fileStatus.length;i++){
- FileStatus fileStatu = fileStatus[i];
- Path path = fileStatu.getPath();
- if(path.toString().contains("tosas")){
- FileStatus[] tableStatus = fileSystem.listStatus(path);
- for(int j = 0; j < tableStatus.length;j++){
- FileStatus tableStatu = tableStatus[i];
- Path tablePath = tableStatu.getPath();
- long modifycationTime = fileSystem.getFileStatus(tablePath).getModificationTime();
- long dataTime = jdbcForTime.queryDate("select modify_time,path from download_time where path="
- +"'"
- +tablePath.toString()
- +"'");
- if(modifycationTime > dataTime){
- paths.add(tablePath);
- }
- }
- }
- }
- }catch(RemoteException e){
- logger.error("",e);
- }catch(AccessControlException e){
- logger.error("",e);
- }
- }
- }
- return paths;
- }
- /**
- * 查找文件目录属于哪个用户
- * @param path
- * @return
- * @throws IOException
- */
- public long getModifycationTime(Path path) throws IOException{
- long modifycationTime = fileSystem.getFileStatus(path).getModificationTime();
- return modifycationTime;
- }
- public List<String> getUserUnderFolder(String Path) throws Exception{
- List<String> userList = Lists.newArrayList();
- Path userPath = new Path(Path);
- if(fileSystem.exists(userPath)){
- FileStatus[] fileStatus = fileSystem.listStatus(userPath);
- for(int i = 0 ;i< fileStatus.length;i++){
- FileStatus fileStatu = fileStatus[i];
- String path = fileStatu.getPath().toString();
- String pathes[] = path.split("/");
- if(pathes.length > 4){
- userList.add(pathes[4]);
- }
- }
- }
- return userList;
- }
- public void destory() throws IOException{
- if(fileSystem != null){
- fileSystem.close();
- }
- fileSystem = null;
- }
- }
JdbcDirectUtils:
- package edm.spark.download.edm.spark.util;
- import java.io.IOException;
- import java.sql.DriverManager;
- import java.sql.ResultSet;
- import java.sql.SQLException;
- import java.text.SimpleDateFormat;
- import java.util.Date;
- import java.util.Map;
- import com.google.common.collect.Maps;
- import com.mysql.jdbc.Connection;
- import com.mysql.jdbc.Statement;
- public class JdbcDirectUtils {
- private static Connection conn ;
- private Statement stmt;
- private String file_dir = "/template/download_mysql.txt";
- private Map<String,String> jdbcConfMap = Maps.newHashMap();
- private LoadHdfsConf mysqlConf;
- public JdbcDirectUtils(){
- initDriver();
- }
- public void initDriver(){
- try{
- if(conn == null){
- mysqlConf = new LoadHdfsConf();
- jdbcConfMap = mysqlConf.readHdfsFile(file_dir);
- Class.forName("com.mysql.jdbc.Driver");
- String url = "jdbc:mysql://" + jdbcConfMap.get("url") + ":"
- + jdbcConfMap.get("port") + "/"
- + jdbcConfMap.get("schema") + "?user="
- + jdbcConfMap.get("user") + "@password="
- + jdbcConfMap.get("password")
- + "&useUnicode=true&characterEncoding="
- + jdbcConfMap.get("characterEncoding");
- conn = (Connection) DriverManager.getConnection(url);
- }
- }catch(ClassNotFoundException e){
- e.printStackTrace();
- }catch(IOException e){
- e.printStackTrace();
- }catch(SQLException e){
- e.printStackTrace();
- }
- }
- /**
- * 查询最新更新记录
- * @param date
- * @param path
- * @throws SQLException
- */
- public void updateDateTime(long date,String path) throws SQLException{
- stmt.executeUpdate("update download_time set modify_time=" + date + "where path="+"'" + path + "'");
- }
- public long queryDate(String sql) throws SQLException{
- ResultSet rs = stmt.executeQuery(sql);
- long dateTime = 0;
- while(rs.next()){
- dateTime = rs.getLong("modify_time");
- }
- return dateTime;
- }
- public void insertDate(Long date,String path) throws SQLException{
- stmt.executeUpdate("insert into download_time(path,modify_time) values " + "('" + path + "'" + "," + date + ")");
- }
- /**
- * String格式转Long
- * @param date
- * @return
- */
- public long convert2Long(String date){
- long time = 0;
- String format = "yyyyMMdd";
- SimpleDateFormat sf = new SimpleDateFormat(format);
- try{
- time = sf.parse(date).getTime();
- }catch(java.text.ParseException e){
- e.printStackTrace();
- }
- return time;
- }
- public static String DateTimeFormat(Date date){
- SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
- String time = sdf.format(date);
- return time;
- }
- public void destroy() throws SQLException{
- if(conn != null){
- conn.close();
- }
- conn = null;
- }
- }
LoadHdfsConf:
- package edm.spark.download.edm.spark.util;
- import java.io.IOException;
- import java.io.InputStream;
- import java.util.List;
- import java.util.Map;
- import org.apache.commons.io.IOUtils;
- import org.apache.commons.lang3.StringUtils;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- import com.google.common.collect.Maps;
- public class LoadHdfsConf {
- static final Logger logger = LoggerFactory.getLogger(LoadHdfsConf.class);
- protected FileSystem fileSystem;
- public final boolean checkFile(String filePath){
- boolean exists = false;
- try{
- Path path = new Path(filePath);
- exists = fileSystem.equals(path);
- }catch(Exception e){
- logger.error("",e);
- }
- return exists;
- }
- public Map<String,String> readHdfsFile(String hdfsPath) throws IOException{
- Configuration conf = new Configuration();
- conf.addResource("resources/hdfs-site.xml");
- conf.addResource("resources/core-site.xml");
- fileSystem = FileSystem.get(conf);
- Path path = new Path(hdfsPath);
- InputStream in = fileSystem.open(path);
- List<String> lines = IOUtils.readLines(in);
- if(null == lines || lines.isEmpty()){
- return null;
- }
- Map<String,String> map = Maps.newHashMap();
- int rowNum = 0;
- for(String line : lines){
- rowNum++;
- String[] content = line.split("=");
- String code = content[0];
- String value = content[1];
- if(StringUtils.isEmpty(line) || StringUtils.isEmpty(value)){
- logger.error("{}",rowNum,line);
- continue;
- }
- map.put(code, value);
- }
- return map;
- }
- }
hdfs文件按修改时间下载的更多相关文章
- touch修改文件的修改时间和访问时间,ls --full-time显示文件详细,stat命令
1. 同时修改文件的修改时间和访问时间 touch -d "2010-05-31 08:10:30" test.doc 2. 只修改文件的修改时间 touch -m -d &quo ...
- png文件格式详解,获取文件的修改时间,创作时间
http://dev.gameres.com/Program/Visual/Other/PNGFormat.htmhttp://www.360doc.com/content/11/0428/12/10 ...
- python 获取文件的修改时间
os.path.getmtime(name) #获取文件的修改时间 os.stat(path).st_mtime#获取文件的修改时间 os.stat(path).st_ctime #获取文件修改时间 ...
- Powershell按文件最后修改时间删除多余文件
Powershell按文件最后修改时间删除多余文件 1. 删除目录内多余文件,目录文件个数大于$count后,按最后修改时间倒序排列,删除最旧的文件. Sort-Object -Property La ...
- C# 读取文件的修改时间、访问时间、创建时间
C# 获取文件的各个时间如下: 表2<ccid_nobr> 属性 功能和用途 Attributes 返回和文件相关的属性值,运用了FileAttributes枚举类型值 CreationT ...
- C#实现对指定文件夹中文件按修改时间排序
string path = "~/Document/Introduction/团队管理制度/"; DirectoryInfo dirinfo = new Di ...
- C++ 设置文件最近修改时间
利用VS开发C++项目,经常发现修改系统时间后,每次编译过程会变得很慢,其原因就是当你把系统时间调到未来的一个时间点,然后有意或者无意编辑过一些代码文件,那么这些文件的时间戳就停留在未来. 当你把系统 ...
- C#获取ftp文件最后修改时间
public static DateTime GetFileModifyDateTime(string ftpServerIP,string ftpFolder,string ftpUserID,st ...
- Java对文件夹中的文件按修改时间排序
import java.io.File; import java.util.Arrays; import java.util.Comparator; import java.util.Date; pu ...
随机推荐
- 算法-java代码实现插入排序
插入排序
- iptables命令 高级网络
http://man.linuxde.net/iptables iptables命令是Linux上常用的防火墙软件,是netfilter项目的一部分.可以直接配置,也可以通过许多前端和图形界面配置. ...
- dedecms织梦首页如何调用文章列表?
如果冯耀宗博客类似,首页调用文章列表,同时也有许多企业站需要调用文章列表,今天我与大家来分享一下dedecms织梦首页如何调用文章列表? {dede:arclist row='16' tit ...
- ThinkPhp_5框架开发【整理】
================================================== ThinkPhp_5心得笔记 ---------------------------------- ...
- ROM、SDRAM、RAM、DRAM、SRAM、FLASH区别
body, table{font-family: 微软雅黑; font-size: 13.5pt} table{border-collapse: collapse; border: solid gra ...
- linux mysql 修改 UTF-8编码
版本大于5.5 [mysqld]下添加的应该为: character-set-server=utf8 collation-server=utf8_general_ci 版本小于5.5 [cli ...
- MySQL 水平拆分(读书笔记整理)
转:http://blog.csdn.net/mchdba/article/details/46278687 1,水平拆分的介绍 一般来说,简单的水平切分主要是将某个访问极其平凡的表再按照某个字段的某 ...
- mybatis一级缓存二级缓存
一级缓存 Mybatis对缓存提供支持,但是在没有配置的默认情况下,它只开启一级缓存,一级缓存只是相对于同一个SqlSession而言.所以在参数和SQL完全一样的情况下,我们使用同一个SqlSess ...
- [转]sysctl -P 报错解决办法
问题症状 修改 linux 内核文件 #vi /etc/sysctl.conf后执行sysctl -P 报错 error: "net.bridge.bridge-nf-call-ip6ta ...
- CSS3 动画及过渡详解
今天开始我们一起来学习有关于CSS3制作动画的几个属性:变形(transform).过渡(transition)和动画(animation)等CSS3技术. 首先我们先来了解一下变形(transfor ...