LZ77.py
import math
from bitarray import bitarray class LZ77Compressor:
"""
A simplified implementation of the LZ77 Compression Algorithm
"""
MAX_WINDOW_SIZE = 400 def __init__(self, window_size=20):
self.window_size = min(window_size, self.MAX_WINDOW_SIZE)
self.lookahead_buffer_size = 15 # length of match is at most 4 bits def compress(self, input_file_path, output_file_path=None, verbose=False):
"""
Given the path of an input file, its content is compressed by applying a simple
LZ77 compression algorithm. The compressed format is:
0 bit followed by 8 bits (1 byte character) when there are no previous matches
within window
1 bit followed by 12 bits pointer (distance to the start of the match from the
current position) and 4 bits (length of the match) If a path to the output file is provided, the compressed data is written into
a binary file. Otherwise, it is returned as a bitarray if verbose is enabled, the compression description is printed to standard output
"""
data = None
i = 0
output_buffer = bitarray(endian='big') # read the input file
try:
with open(input_file_path, 'rb') as input_file:
data = input_file.read()
except IOError:
print 'Could not open input file ...'
raise while i < len(data):
#print i match = self.findLongestMatch(data, i) if match:
# Add 1 bit flag, followed by 12 bit for distance, and 4 bit for the length
# of the match
(bestMatchDistance, bestMatchLength) = match output_buffer.append(True)
output_buffer.frombytes(chr(bestMatchDistance >> 4))
output_buffer.frombytes(chr(((bestMatchDistance & 0xf) << 4) | bestMatchLength)) if verbose:
print "<1, %i, %i>" % (bestMatchDistance, bestMatchLength), i += bestMatchLength else:
# No useful match was found. Add 0 bit flag, followed by 8 bit for the character
output_buffer.append(False)
output_buffer.frombytes(data[i]) if verbose:
print "<0, %s>" % data[i], i += 1 # fill the buffer with zeros if the number of bits is not a multiple of 8
output_buffer.fill() # write the compressed data into a binary file if a path is provided
if output_file_path:
try:
with open(output_file_path, 'wb') as output_file:
output_file.write(output_buffer.tobytes())
print "File was compressed successfully and saved to output path ..."
return None
except IOError:
print 'Could not write to output file path. Please check if the path is correct ...'
raise # an output file path was not provided, return the compressed data
return output_buffer def decompress(self, input_file_path, output_file_path=None):
"""
Given a string of the compressed file path, the data is decompressed back to its
original form, and written into the output file path if provided. If no output
file path is provided, the decompressed data is returned as a string
"""
data = bitarray(endian='big')
output_buffer = [] # read the input file
try:
with open(input_file_path, 'rb') as input_file:
data.fromfile(input_file)
except IOError:
print 'Could not open input file ...'
raise while len(data) >= 9: flag = data.pop(0) if not flag:
byte = data[0:8].tobytes() output_buffer.append(byte)
del data[0:8]
else:
byte1 = ord(data[0:8].tobytes())
byte2 = ord(data[8:16].tobytes()) del data[0:16]
distance = (byte1 << 4) | (byte2 >> 4)
length = (byte2 & 0xf) for i in range(length):
output_buffer.append(output_buffer[-distance])
out_data = ''.join(output_buffer) if output_file_path:
try:
with open(output_file_path, 'wb') as output_file:
output_file.write(out_data)
print 'File was decompressed successfully and saved to output path ...'
return None
except IOError:
print 'Could not write to output file path. Please check if the path is correct ...'
raise
return out_data def findLongestMatch(self, data, current_position):
"""
Finds the longest match to a substring starting at the current_position
in the lookahead buffer from the history window
"""
end_of_buffer = min(current_position + self.lookahead_buffer_size, len(data) + 1) best_match_distance = -1
best_match_length = -1 # Optimization: Only consider substrings of length 2 and greater, and just
# output any substring of length 1 (8 bits uncompressed is better than 13 bits
# for the flag, distance, and length)
for j in range(current_position + 2, end_of_buffer): start_index = max(0, current_position - self.window_size)
substring = data[current_position:j] for i in range(start_index, current_position): repetitions = len(substring) / (current_position - i) last = len(substring) % (current_position - i) matched_string = data[i:current_position] * repetitions + data[i:i+last] if matched_string == substring and len(substring) > best_match_length:
best_match_distance = current_position - i
best_match_length = len(substring) if best_match_distance > 0 and best_match_length > 0:
return (best_match_distance, best_match_length)
return None
LZ77.py的更多相关文章
- python调用py中rar的路径问题。
1.python调用py,在py中的os.getcwd()获取的不是py的路径,可以通过os.path.split(os.path.realpath(__file__))[0]来获取py的路径. 2. ...
- Python导入其他文件中的.py文件 即模块
import sys sys.path.append("路径") import .py文件
- LZ77压缩算法编码原理详解(结合图片和简单代码)
前言 LZ77算法是无损压缩算法,由以色列人Abraham Lempel发表于1977年.LZ77是典型的基于字典的压缩算法,现在很多压缩技术都是基于LZ77.鉴于其在数据压缩领域的地位,本文将结合图 ...
- import renumber.py in pymol
cp renumber.py /usr/local/lib/python2.7/dist-packages/pymol import renumber or run /path/to/renumber ...
- python gettitle.py
#!/usr/bin/env python # coding=utf-8 import threading import requests import Queue import sys import ...
- 解决 odoo.py: error: option --addons-path: The addons-path 'local-addons/' does not seem to a be a valid Addons Directory!
情况说明 odoo源文件路径-/odoo-dev/odoo/: 我的模块插件路径 ~/odoo-dev/local-addons/my-module 在my-module中创建了__init__.py ...
- caffe机器学习自带图片分类器classify.py实现输出预测结果的概率及caffe的web_demo例子运行实例
caffe机器学习环境搭建及python接口编译参见我的上一篇博客:机器学习caffe环境搭建--redhat7.1和caffe的python接口编译 1.运行caffe图片分类器python接口 还 ...
- 【转】Windows下使用libsvm中的grid.py和easy.py进行参数调优
libsvm中有进行参数调优的工具grid.py和easy.py可以使用,这些工具可以帮助我们选择更好的参数,减少自己参数选优带来的烦扰. 所需工具:libsvm.gnuplot 本机环境:Windo ...
- MySqlNDB使用自带的ndb_setup.py安装集群
在用Mysql做集群时,使用Mysql的NDB版本更易于集群的扩展,稳定和数据的实时性. 我们可以使用Mysql自带的工具进行集群安装与管理:ndb_setup.py.位于Mysql的安装目录bin下 ...
随机推荐
- android 视频录制 混淆打包 之native层 异常的解决
原文地址:http://www.cnblogs.com/linguanh/ (滑至文章末,直接看解决方法) 问题起因: 前5天,因为项目里面有个类似 仿微信 视频录制的功能, 先是上网找了个 开 ...
- 1、.NET平台概述
本学习主要参考Andrew Troelsen的C#与.NET4高级程序设计,这小节主要述说以下几个东西: 宏观上讨论一下.net相关的主题:程序集.CIL(Common Interme ...
- Java:IO流与文件基础
Java:IO流与文件基础 说明: 本章内容将会持续更新,大家可以关注一下并给我提供建议,谢谢啦. 走进流 什么是流 流:从源到目的地的字节的有序序列. 在Java中,可以从其中读取一个字节序列的对象 ...
- webpack配置别名alias出现的错误匹配
@(webpack) webpack是一款功能强大的前端构建工具,不仅仅是针对js,它也可通过各种loader来构建相关的less,html,image等各种资源,将webpack配合流程制定工具gu ...
- [收藏]IntelliJ Idea快捷键
Alt+回车 导入包,自动修正 Ctrl+N 查找类 Ctrl+Shift+N 查找文件 Ctrl+Alt+L 格式化代码 Ctrl+Alt+O 优化导入的类和包 Alt+Insert 生成代码(如g ...
- ASP.NET Core 中文文档 第三章 原理(17)为你的服务器选择合适版本的.NET框架
原文:Choosing the Right .NET For You on the Server 作者:Daniel Roth 翻译:王健 校对:谢炀(Kiler).何镇汐.许登洋(Seay).孟帅洋 ...
- NSIS 打包脚本基础
简介 NSIS(Nullsoft Scriptable Install System)是一个开源的 Windows 系统下安装程序制作程序.它提供了安装.卸载.系统设置.文件解压缩等功能.这如其名字所 ...
- Elasticsearch —— bulk批量导入数据
在使用Elasticsearch的时候,一定会遇到这种场景--希望批量的导入数据,而不是一条一条的手动导入.那么此时,就一定会需要bulk命令! 更多内容参考我整理的Elk教程 bulk批量导入 批量 ...
- CLR via C# 摘要二:IL速记
最简单的IL程序 .assembly test {} .method void Func() { .entrypoint ldstr "hello world" call void ...
- Angular2 小贴士 Name
Angular2 正式版已经发布了一个月了,我也是通过各种方式在进行验证是否可以满足我们的需求,今天我就发现了一个问题.现在我们来一起说明一下,这个可能不算是bug,而应该需要我们记住就可以了. 我们 ...