1. split file into several files

 """
this is aa customizable version of the standard unix split command-line
utility;because it is written in python,it also works on windows and can be
easily modifyed;because it export a function,its logic can also be imported
and resued in other applications
"""
import sys,os
kilobytes =1024
megabytes = kilobytes*1000
chunksize = int(1.4* megabytes) #default roughtly a floppy def split(fromfile,todir,chunksize=chunksize):
if not os.path.exists(todir): # caller handlers errors
os.mkdir(todir) #make dir,read/write parts
else:
for fname in os.listdir(todir):
os.remove(os.path.join(todir,fname)) #delete any exiting files
partnum =0
input = open(fromfile,'rb')
while True:
chunk = input.read(chunksize)
if not chunk:break
partnum +=1
filename = os.path.join(todir,('part%04d' % partnum))
fileobj = open(filename,'wb')
fileobj.write(chunk)
fileobj.close()
input.close()
assert partnum<=9999
return partnum if __name__ =='__main__':
if len(sys.argv) == 2 and sys.argv[1]== '-help':
print('use:split.py [file to split target-dir [chunksize]]')
else:
if len(sys.argv) <3:
interactive =True
fromfile =input('File to be split?')
todir = input('directory to store part files?')
else:
interactive = False
fromfile,todir = sys.argv[1:3]
if len(sys.argv) == 4:chunksize =int(sys.argv[3])
absfrom,absto = map(os.path.abspath,[fromfile,todir])
print('splitting',absfrom,'to',absto,'by',chunksize)
try:
parts = split(fromfile,todir,chunksize)
except:
print('error during split:')
print(sys.exc_info()[0],sys.exc_info()[1])
else:
print('split finished:',parts,'parts are in ',absto)
if interactive:
input('press enter key') #pause if clicked

split to 200k

2.CopyAllFiles:

 """
Usage: 'python cpall.py dirFrom dirTo'
recursive copy of a directory tree. works like a 'cp -r dirFrom/* dirTo'
unix command,and assumes that dirFrom and dirTo are both directory.
was written to get around fatal error messages under windows drag-and-drop
copies(the first bad file ends the entire copy operation immediately).
but also allows for coding more customized copy operations in python
""" import os,sys
maxfileload =100000
blksize =1024*500 def copyfile(pathFrom,pathTo,maxfileload =maxfileload):
"""
copy one file pathFrom to pathTo,byte for byte;
use binary file mode to supress unicde decode and endline transform
"""
if os.path.getsize(pathFrom) <=maxfileload:
bytesFrom = open(pathFrom,'rb').read()
open(pathTo,'wb').write(bytesFrom)
else:
fileFrom = open(pathFrom,'rb')
fileTo = open(pathTo,'wb')
while True:
bytesFrom = fileFrom.read(blksize)
if not bytesFrom:
break
fileTo.write(bytesFrom) def copytree(dirFrom,dirTo,verbose=0):
"""
copy contents of dirFrom and below to dirTo ,return(files,dirs) counts;
may need to use bytes for dirnames if undecodable on other platforms;
may need to do more file type checking on unix:skip links,fifos,etc;
"""
fcount = dcount =0
for filename in os.listdir(dirFrom):
pathFrom = os.path.join(dirFrom,filename)
pathTo = os.path.join(dirTo,filename)
if not os.path.isdir(pathFrom):
try:
if verbose >1:
print('copying',pathFrom,'to',pathTo)
copyfile(pathFrom,pathTo)
fcount +=1
except:
print('error copying',pathFrom,'to',pathTo,'--skipped')
print(sys.exc_info()[0],sys.exc_info()[1])
else:
if verbose:
print('copying dir',pathFrom,'to',pathTo)
try:
os.mkdir(pathTo)
below = copytree(pathFrom,pathTo)
fcount += below[0]
dcount += below[1]
dcount+=1
except:
print('error creating',pathTo,'--skipped')
print(sys.exc_info()[0],sys.exc_info()[1])
return (fcount,dcount) def getargs():
"""
get and verify directory name arguments ,return default none on errors
"""
try:
dirFrom,dirTo = sys.argv[1:]
except:
print('usage error:cpall.py dirFrom dirTo')
else:
if not os.path.isdir(dirFrom):
print('error:dirFrom is not a dir')
elif not os.path.exists(dirTo):
os.mkdir(dirTo)
print('note:dirTo was created')
return (dirfrom,dirTo)
else:
print('warning:dirto already xists')
if hasattr(os.path,'samefile'):
same = os.path.samefile(dirFrom,dirTo)
else:
same = os.path.abspath(dirFrom) == os.path.abspath(dirTo)
if same:
print('error :dirfrom same as dirTo')
else:
return (dirFrom,dirTo) if __name__ =='__main__':
import time
distuple =getargs()
if distuple:
print('copying...')
start = time.clock()
fcount,dcount = copytree(*distuple)
print('copied',fcount,'files,',dcount,'directories')
print('in ',time.clock()-start,' seconds')

3. compare directory and list all files if not same

 """
############################################################################
usage :python dirdiff.py dir1-path dir2-path
compare two directories to find files that exist in one but not the other
this version uses the os.listdir function and list difference. note that
this script checks only filenames,not file contents --see diffall.py for an
extension that does the latter by comparing .read() results
#############################################################################
""" import os,sys def reportdiffs(unique1,unique2,dir1,dir2):
"""
generate diffs report for one dir: part of comparedirs output
"""
if not (unique1 or unique2):
print('directory lists are identical')
else:
if unique1:
print('files unique to',dir1)
for file in unique1:
print('......',file)
if unique2:
print('files unique to',dir2)
for file in unique2:
print('......',file) def difference(seq1,seq2):
"""
return all items in seq1 only
a set(seq1) - set(seq2) would work too,but sets are randomly
ordered,so any platform-depent directory order would be lost
"""
return [item for item in seq1 if item not in seq2] def comparedirs(dir1,dir2,files1=None,files2=None):
"""
compare directory contents,but not actual files;
may need bytes listdir arg for undecodable filenames on some platforms
"""
print('comparing',dir1,'to',dir2)
files1 = os.listdir(dir1) if files1 is None else files1
files2 = os.listdir(dir2) if files2 is None else files2
unique1 = difference(files1,files2)
unique2 = difference(files2,files1)
reportdiffs(unique1,unique2,dir1,dir2)
return not (unique1,unique2) def getargs():
"args for command line mode"
try:
dir1,dir2 = sys.argv[1:]
except:
print('usage:dirdiff.py dir1 dir2')
sys.exit(1)
else:
return dir1,dir2 if __name__=='__main__':
dir1,dir2 = getargs()
comparedirs(dir1,dir2)

splitFile2SmallFile的更多相关文章

随机推荐

  1. 转:AngularJS的Filter用法详解

    Filter简介 Filter是用来格式化数据用的. Filter的基本原型( '|' 类似于Linux中的管道模式): {{ expression | filter }} Filter可以被链式使用 ...

  2. .NET Framework中Object基类有哪些方法?

    ToString(),虚方法,任何子类可重写自定义 GetType(),非虚,返回类型名 Equals(),虚方法,默认情况下判定两个引用是否指向同一实例.(ReferenceEquals()功能相同 ...

  3. sqlserver2008 函数1

    SQL2008 表达式:是常量.变量.列或函数等与运算符的任意组合. 1. 字符串函数 函数 名称 参数 示例 说明 ascii(字符串表达式) select ascii('abc') 返回 97 返 ...

  4. [转]starling教程-触摸事件(Touch Events)(四)

    在前面提到过,Starling是Sparrow的姊妹篇,正因为这样,Starling里的touch事件的机制其实是为移动设备的触摸交互设计的,所以当你使用它进行使用鼠标交互的桌面应用开发时,第一眼会感 ...

  5. Learn ZYNC (6)

    最近在关注的问题是怎么样从ps端丢数据到ram, 然后用ip核进行处理后再输送到ram,ps端可以读取. 参考文献:[OpenHW参赛手记]AXI-Stream接口开发详细流程 首先按照作者的探索思路 ...

  6. 20145215实验二 Java面向对象程序设计

    一.实验内容 初步掌握单元测试和TDD 理解并掌握面向对象三要素:封装.继承.多态 初步掌握UML建模 熟悉S.O.L.I.D原则 了解设计模式 二.实验步骤 (一)单元测试 (1)三种代码 伪代码: ...

  7. 【完整靠谱版】结合公司项目,仔细总结自己使用百度编辑器实现FTP上传的完整过程

    说在前面 工作中会遇到很多需要使用富文本编辑器的地方,比如我现在发布这篇文章离不开这个神器,而且现在网上编辑器太多了.记得之前,由于工作需要自己封装过一个编辑器的公共插件,是用ckeditor改版的, ...

  8. 树莓派+移动硬盘搭建NAS服务器

    由于树莓派的USB接口不足以给移动硬盘供电,因此需要另外给移动硬盘提供电源. 显示当前已有的存储设备 # fdisk -l Disk /dev/mmcblk0: 7876 MB, 7876902912 ...

  9. mysql 使用存储过程批量插数据

    #创建测试表 DROP TABLE IF EXISTS test.test; CREATE TABLE test.test( id int(10) not null auto_increment, a ...

  10. 带你玩转JavaWeb开发之四 -如何用JS做登录注册页面校验

    今日内容 使用JQuery完成页面定时弹出广告 使用JQuery完成表格的隔行换色 使用JQuery完成复选框的全选效果 使用JQuery完成省市联动效果 使用JQuery完成下列列表左右选择 使用J ...