1. split file into several files

 """
this is aa customizable version of the standard unix split command-line
utility;because it is written in python,it also works on windows and can be
easily modifyed;because it export a function,its logic can also be imported
and resued in other applications
"""
import sys,os
kilobytes =1024
megabytes = kilobytes*1000
chunksize = int(1.4* megabytes) #default roughtly a floppy def split(fromfile,todir,chunksize=chunksize):
if not os.path.exists(todir): # caller handlers errors
os.mkdir(todir) #make dir,read/write parts
else:
for fname in os.listdir(todir):
os.remove(os.path.join(todir,fname)) #delete any exiting files
partnum =0
input = open(fromfile,'rb')
while True:
chunk = input.read(chunksize)
if not chunk:break
partnum +=1
filename = os.path.join(todir,('part%04d' % partnum))
fileobj = open(filename,'wb')
fileobj.write(chunk)
fileobj.close()
input.close()
assert partnum<=9999
return partnum if __name__ =='__main__':
if len(sys.argv) == 2 and sys.argv[1]== '-help':
print('use:split.py [file to split target-dir [chunksize]]')
else:
if len(sys.argv) <3:
interactive =True
fromfile =input('File to be split?')
todir = input('directory to store part files?')
else:
interactive = False
fromfile,todir = sys.argv[1:3]
if len(sys.argv) == 4:chunksize =int(sys.argv[3])
absfrom,absto = map(os.path.abspath,[fromfile,todir])
print('splitting',absfrom,'to',absto,'by',chunksize)
try:
parts = split(fromfile,todir,chunksize)
except:
print('error during split:')
print(sys.exc_info()[0],sys.exc_info()[1])
else:
print('split finished:',parts,'parts are in ',absto)
if interactive:
input('press enter key') #pause if clicked

split to 200k

2.CopyAllFiles:

 """
Usage: 'python cpall.py dirFrom dirTo'
recursive copy of a directory tree. works like a 'cp -r dirFrom/* dirTo'
unix command,and assumes that dirFrom and dirTo are both directory.
was written to get around fatal error messages under windows drag-and-drop
copies(the first bad file ends the entire copy operation immediately).
but also allows for coding more customized copy operations in python
""" import os,sys
maxfileload =100000
blksize =1024*500 def copyfile(pathFrom,pathTo,maxfileload =maxfileload):
"""
copy one file pathFrom to pathTo,byte for byte;
use binary file mode to supress unicde decode and endline transform
"""
if os.path.getsize(pathFrom) <=maxfileload:
bytesFrom = open(pathFrom,'rb').read()
open(pathTo,'wb').write(bytesFrom)
else:
fileFrom = open(pathFrom,'rb')
fileTo = open(pathTo,'wb')
while True:
bytesFrom = fileFrom.read(blksize)
if not bytesFrom:
break
fileTo.write(bytesFrom) def copytree(dirFrom,dirTo,verbose=0):
"""
copy contents of dirFrom and below to dirTo ,return(files,dirs) counts;
may need to use bytes for dirnames if undecodable on other platforms;
may need to do more file type checking on unix:skip links,fifos,etc;
"""
fcount = dcount =0
for filename in os.listdir(dirFrom):
pathFrom = os.path.join(dirFrom,filename)
pathTo = os.path.join(dirTo,filename)
if not os.path.isdir(pathFrom):
try:
if verbose >1:
print('copying',pathFrom,'to',pathTo)
copyfile(pathFrom,pathTo)
fcount +=1
except:
print('error copying',pathFrom,'to',pathTo,'--skipped')
print(sys.exc_info()[0],sys.exc_info()[1])
else:
if verbose:
print('copying dir',pathFrom,'to',pathTo)
try:
os.mkdir(pathTo)
below = copytree(pathFrom,pathTo)
fcount += below[0]
dcount += below[1]
dcount+=1
except:
print('error creating',pathTo,'--skipped')
print(sys.exc_info()[0],sys.exc_info()[1])
return (fcount,dcount) def getargs():
"""
get and verify directory name arguments ,return default none on errors
"""
try:
dirFrom,dirTo = sys.argv[1:]
except:
print('usage error:cpall.py dirFrom dirTo')
else:
if not os.path.isdir(dirFrom):
print('error:dirFrom is not a dir')
elif not os.path.exists(dirTo):
os.mkdir(dirTo)
print('note:dirTo was created')
return (dirfrom,dirTo)
else:
print('warning:dirto already xists')
if hasattr(os.path,'samefile'):
same = os.path.samefile(dirFrom,dirTo)
else:
same = os.path.abspath(dirFrom) == os.path.abspath(dirTo)
if same:
print('error :dirfrom same as dirTo')
else:
return (dirFrom,dirTo) if __name__ =='__main__':
import time
distuple =getargs()
if distuple:
print('copying...')
start = time.clock()
fcount,dcount = copytree(*distuple)
print('copied',fcount,'files,',dcount,'directories')
print('in ',time.clock()-start,' seconds')

3. compare directory and list all files if not same

 """
############################################################################
usage :python dirdiff.py dir1-path dir2-path
compare two directories to find files that exist in one but not the other
this version uses the os.listdir function and list difference. note that
this script checks only filenames,not file contents --see diffall.py for an
extension that does the latter by comparing .read() results
#############################################################################
""" import os,sys def reportdiffs(unique1,unique2,dir1,dir2):
"""
generate diffs report for one dir: part of comparedirs output
"""
if not (unique1 or unique2):
print('directory lists are identical')
else:
if unique1:
print('files unique to',dir1)
for file in unique1:
print('......',file)
if unique2:
print('files unique to',dir2)
for file in unique2:
print('......',file) def difference(seq1,seq2):
"""
return all items in seq1 only
a set(seq1) - set(seq2) would work too,but sets are randomly
ordered,so any platform-depent directory order would be lost
"""
return [item for item in seq1 if item not in seq2] def comparedirs(dir1,dir2,files1=None,files2=None):
"""
compare directory contents,but not actual files;
may need bytes listdir arg for undecodable filenames on some platforms
"""
print('comparing',dir1,'to',dir2)
files1 = os.listdir(dir1) if files1 is None else files1
files2 = os.listdir(dir2) if files2 is None else files2
unique1 = difference(files1,files2)
unique2 = difference(files2,files1)
reportdiffs(unique1,unique2,dir1,dir2)
return not (unique1,unique2) def getargs():
"args for command line mode"
try:
dir1,dir2 = sys.argv[1:]
except:
print('usage:dirdiff.py dir1 dir2')
sys.exit(1)
else:
return dir1,dir2 if __name__=='__main__':
dir1,dir2 = getargs()
comparedirs(dir1,dir2)

splitFile2SmallFile的更多相关文章

随机推荐

  1. PHP的排序算法跟查找算法

    排序算法: (1)冒泡排序 $arr = array(15,8,20,50,37,85,10,5,11,4); //冒泡排序 function maoPao($arr){ for($i = 0; $i ...

  2. Android -- ImageView(控制图片的大小以及旋转的角度)

    1. 

  3. CSS居中布局总结【转】

    居中布局 <div class="parent"> <div class="child">demo</div> </d ...

  4. UVa #11582 Colossal Fibonacci Numbers!

    巨大的斐波那契数 The i'th Fibonacci number f (i) is recursively defined in the following way: f (0) = 0 and  ...

  5. struts基于ognl的自动类型转换需要注意的地方

    好吧,坎坷的过程我就不说了,直接上结论: 在struts2中使用基于ognl的自动类型转换时,Action中的对象属性必须同时添加get/set方法. 例如: 客户端表单: <s:form ac ...

  6. “神马”框架之LigerUI

    我曾经参与一个产品的研发,前端框架用的就是LigerUI,之前我也没有听过这个框架. 因为是项目期初设计就定下来用LigerUI,根据系统的功能前端页面有跟多丰富的表现,所以需要改进前端效果,这次发现 ...

  7. 基于fab自动化部署

    fab是一个python库,强大好使,可以做很多帮助你减轻工作量的事情,比如在多台服务器上部署web项目,这里就讲讲使用它简单的方法来执行部署的过程. 关于fab的安装的基本使用,网上一搜一大把,内容 ...

  8. DP HDU1421

    搬寝室 Time Limit: 2000/1000 MS (Java/Others)    Memory Limit: 65536/32768 K (Java/Others)Total Submiss ...

  9. iOS AutoLayout自动布局&Masonry介绍与使用实践

    Masonry介绍与使用实践:快速上手Autolayout http://www.cnblogs.com/xiaofeixiang/p/5127825.html http://www.cocoachi ...

  10. Python 打包工具cx_freeze 问题记录及解决办法

    在节前的最后一天,解决了打包过程中遇到的所有问题,可以成功运行了!真是个好彩头,希望新的一年一切顺利! 以下是在使用cx_freeze过程中遇到的问题及解决办法(Win7) 问题描述:运行exe,启动 ...