哈夫曼树压缩txt失败原因

哈夫曼树压缩txt失败原因,第1张

哈夫曼山谨树压缩txt失败是传输过程有问绝唯册题。传统压缩方式中,字符的编码是以等字长的方式进行压缩,相比于哈夫曼并宏编码的可变字长而言压缩率会降低,传输过程有问题,可以重新传输或下载。

这是本人写的动态哈夫曼压缩乱空算法实现,压缩与解压缩时,

根据文件内容自动生成哈夫曼树,并动态调整节点的权重

和树的形状。900MHZ的PIII赛扬每秒钟可以压缩的好几MB

的数据,只是压缩率不高,文本文件的压缩后容量一般可

以减少25%,比RAR差远了。

源文件共三个缺简,你在VC6.0中新建一个空的命令行项目,

将它们加进去,编译完就可以用了。

===========hfm.cpp===================

#include <stdio.h>

#include <string.h>

#include <io.h>

#include <sys/stat.h>

#include <fcntl.h>

#include "Huffman.h"

int wh

int rh

bool Write(unsigned char *s,int len){

_write(wh,s,len)

return true

}

bool OpenFile(char* source,char* target){

int w_flag=_O_WRONLY | _O_CREAT | _O_EXCL | _O_BINARY

int r_flag=_O_RDONLY | _O_BINARY

rh=_open(source,r_flag,_S_IREAD | _S_IWRITE)

wh=_open(target,w_flag,_S_IREAD | _S_IWRITE)

if(rh==-1 || wh==-1){

if(rh!=-1){

_close(rh)

printf("\n打开文件:'%s'失败!",target)

}

if(wh!=-1){

_close(wh)

printf("\n打开文件:'%s'失伏陪裤败!",source)

}

return false

}else{

return true

}

}

void PrintUsage(){

printf("\n以动态哈夫曼算法压缩或解压缩文件。\n\n")

printf("\thfm -?\t\t\t\t显示帮助信息\n")

printf("\thfm -e -i source -o target\t压缩文件\n")

printf("\thfm -d -i source -o target\t解压缩文件\n\n")

}

void main(int argc,char *args[]){

int mode,i,j,K=0

char src[4096]

char target[4096]

unsigned char buffer[BUFFER_SIZE]

Huffman *h

mode=0

for(i=1i<argci++){

if(args[i][0]=='-' || args[i][0]=='/'){

switch(args[i][1]){

case '?':

mode=0//帮助

break

case 'e':

case 'E':

mode=1//压缩

break

case 'd':

case 'D':

mode=2//解压缩

break

case 'o':

case 'O':

if(i+1>=argc){

mode=0

}else{//输出文件

j=0

while(args[i+1][j]!='\0' &&j<4096){

target[j++]=args[i+1][j]

}

if(j==4096){

mode=0

}else{

target[j]='\0'

K |= 1

}

}

break

case 'i':

case 'I':

if(i+1>=argc){

mode=0

}else{//输入文件

j=0

while(args[i+1][j]!='\0' &&j<4096){

src[j++]=args[i+1][j]

}

if(j==4096){

mode=0

}else{

src[j]='\0'

K |=2

}

}

break

}

}

}

if(K!=3)mode=0

switch(mode){

case 0:

PrintUsage()

return

case 1://压缩

if(!OpenFile(src,target))return

h=new Huffman(&Write,true)

i=BUFFER_SIZE

while(i==BUFFER_SIZE){

i=_read(rh,buffer,BUFFER_SIZE)

h->Encode(buffer,i)

}

delete h

_close(rh)

_close(wh)

printf("压缩完毕!")

break

case 2://解压缩

if(!OpenFile(src,target))return

h=new Huffman(&Write,false)

i=BUFFER_SIZE

while(i==BUFFER_SIZE){

i=_read(rh,buffer,BUFFER_SIZE)

h->Decode(buffer,i)

}

delete h

_close(rh)

_close(wh)

printf("解压缩完毕!")

break

}

}

=======end of hfm.cpp=======================

=======Huffman.cpp=============================

// Huffman.cpp: implementation of the Huffman class.

//

//////////////////////////////////////////////////////////////////////

#include "Huffman.h"

//////////////////////////////////////////////////////////////////////

// Construction/Destruction

//////////////////////////////////////////////////////////////////////

Huffman::Huffman(Output *output,bool mode)

{

Hbtree *tmp

int i

this->mode=mode

//设置输出函数,当缓冲区满时,将调用该函数输出

this->output=output

//初始化列表

for(i=0i<LIST_LENGTHi++)this->list[i]=NULL

//初始化哈夫曼树

this->root=this->NewNode(NOT_CHAR,LEFT,NULL)

this->current=this->root

tmp=this->NewNode(CODE_ESCAPE,RIGHT,root)

tmp->count=1

tmp=this->NewNode(CODE_FINISH,LEFT,root)

tmp->count=0

root->count=root->child[LEFT]->count+root->child[RIGHT]->count

//设置缓冲区指针

this->char_top=BOTTOM_BIT

this->bit_top=TOP_BIT

this->buffer[0]=0

//重构哈夫曼树的最大计数值

this->max_count=MAX_COUNT

this->shrink_factor=SHRINK_FACTOR

this->finished=false

}

Huffman::~Huffman()

{

if(this->mode==true){//如果是编码

//输出结束码

this->OutputEncode(CODE_FINISH)

this->char_top++

}

//强制清空缓冲区

this->Flush()

//释放空间

this->ReleaseNode(this->root)

}

Hbtree * Huffman::NewNode(int value, int index, Hbtree *parent)

{

Hbtree *tmp=new Hbtree

tmp->parent=parent

tmp->child[0]=NULL

tmp->child[1]=NULL

tmp->count=(1 <<SHRINK_FACTOR)

tmp->index=(index==0) ? 0 : 1

tmp->value=value

if(value!=NOT_CHAR)this->list[tmp->value]=tmp

if(parent!=NULL)parent->child[tmp->index]=tmp

return tmp

}

void Huffman::ReleaseNode(Hbtree *node)

{

if(node!=NULL){

this->ReleaseNode(node->child[LEFT])

this->ReleaseNode(node->child[RIGHT])

delete node

}

}

//输出一位编码

int Huffman::OutputBit(int bit)

{

unsigned char candidates[]={1,2,4,8,16,32,64,128}

if(bit!=0)

this->buffer[this->char_top] |= candidates[this->bit_top]

this->bit_top--

if(this->bit_top <BOTTOM_BIT){

this->bit_top=TOP_BIT

this->char_top++

if(this->char_top >= BUFFER_SIZE){//输出缓冲区

this->output(this->buffer,BUFFER_SIZE)

this->char_top=0

}

this->buffer[this->char_top]=0

}

return 0

}

//输出缓冲区

int Huffman::Flush()

{

this->output(this->buffer,this->char_top)

this->char_top=0

return 0

}

int Huffman::Encode(unsigned char c)

{

int value=c,

candidates[]={128,64,32,16,8,4,2,1},

i

if(this->list[value]==NULL){//字符不存在于哈夫曼树中

//输出转义码

this->OutputEncode(CODE_ESCAPE)

//输出字符

for(i=0i<8i++)this->OutputBit(value &candidates[i])

this->InsertNewNode(value)

}else{

//输出字符编码

this->OutputEncode(value)

//重新调整哈夫曼树

this->BalanceNode(this->list[value]->parent)

}

//重组哈夫曼树

if(this->root->count>=this->max_count)

this->RearrangeTree()

return 0

}

void Huffman::BalanceNode(Hbtree *node)

{

Hbtree *parent,*child,*brother

int i,j

parent=node->parent

if(parent==NULL)return//根节点无需调整

if(node->value==NOT_CHAR){//非叶子节点

child=node->child[LEFT]->count >node->child[RIGHT]->count ?

node->child[LEFT] : node->child[RIGHT]

if(child->count >parent->count - node->count){

//失衡

i=!(node->index)

j=child->index

node->count=parent->count - child->count

brother=parent->child[i]

node->child[j]=brother

brother->index=j

brother->parent=node

parent->child[i]=child

child->index=i

child->parent=parent

}

}

this->BalanceNode(parent)

}

//输出一个字符的编码

int Huffman::OutputEncode(int value)

{

int stack[CODE_FINISH+2],top=0

Hbtree *tmp=this->list[value]

//输出编码

if(value<=MAX_VALUE){//字符

while(tmp!=NULL){

stack[top++]=tmp->index

tmp->count++

tmp=tmp->parent

}

}else{//控制码

while(tmp!=NULL){

stack[top++]=tmp->index

tmp=tmp->parent

}

}

top--

while(top>0){

this->OutputBit(stack[--top])

}

return 0

}

void Huffman::PrintNode(Hbtree *node,int level)

{

int i

if(node){

for(i=0i<level*3i++)printf(" ")

printf("%p P:%p L:%p R:%p C:%d",node,node->parent,node->child[0],node->child[1],node->count)

if(node->value!=NOT_CHAR)printf(" V:%d",node->value)

printf("\n")

this->PrintNode(node->child[LEFT],level+1)

this->PrintNode(node->child[RIGHT],level+1)

}

}

int Huffman::Encode(unsigned char *s, int len)

{

int i

for(i=0i<leni++)this->Encode(s[i])

return 0

}

void Huffman::PrintTree()

{

this->PrintNode(this->root,0)

}

int Huffman::RecountNode(Hbtree *node)

{

if(node->value!=NOT_CHAR)return node->count

node->count=

this->RecountNode(node->child[LEFT]) +

this->RecountNode(node->child[RIGHT])

return node->count

}

void Huffman::RearrangeTree()

{

int i,j,k

Hbtree *tmp,*tmp2

//所有非控制码的计数值右移shrink_factor位,并删除计数值为零的节点

for(k=0k<=MAX_VALUEk++){

if(this->list[k]!=NULL){

tmp=this->list[k]

tmp->count >>= this->shrink_factor

if(tmp->count ==0){

this->list[k]=NULL

tmp2=tmp->parent

i=tmp2->index

j=!(tmp->index)

if(tmp2->parent!=NULL){

tmp2->parent->child[i]=tmp2->child[j]

tmp2->child[j]->parent=tmp2->parent

tmp2->child[j]->index=i

}else{

this->root=tmp2->child[j]

this->current=this->root

this->root->parent=NULL

}

delete tmp

delete tmp2

}

}

}

//重新计数

this->RecountNode(this->root)

//重新调整平衡

for(i=0i<=MAX_VALUEi++){

if(this->list[i]!=NULL)

this->BalanceNode(this->list[i]->parent)

}

}

void Huffman::InsertNewNode(int value)

{

int i

Hbtree *tmp,*tmp2

//将字符加入哈夫曼树

tmp2=this->list[CODE_FINISH]

tmp=this->NewNode(NOT_CHAR, tmp2->index, tmp2->parent)

tmp->child[LEFT]=tmp2

tmp2->index=LEFT

tmp2->parent=tmp

tmp2=this->NewNode(value,RIGHT,tmp)

tmp->count=tmp->child[LEFT]->count+tmp->child[RIGHT]->count

i=tmp2->count

while((tmp=tmp->parent)!=NULL)tmp->count+=i

//从底向上调整哈夫曼树

this->BalanceNode(tmp2->parent)

}

int Huffman::Decode(unsigned char c)

{

this->Decode(c,7)

return 0

}

int Huffman::Decode(unsigned char *s,int len)

{

int i

for(i=0i<leni++)this->Decode(s[i])

return 0

}

int Huffman::Decode(unsigned char c, int start)

{

int value=c,

candidates[]={1,2,4,8,16,32,64,128},

i,j

Hbtree *tmp

if(this->finished)return 0

i=start

if(this->current==NULL){//转义状态下

while(this->remain >= 0 &&i>=0){

if((candidates[i] &value) !=0){

this->literal |= candidates[this->remain]

}

this->remain--

i--

}

if(this->remain <0){//字符输出完毕

//输出字符

this->OutputChar(this->literal)

//将字符插入哈夫曼树

this->InsertNewNode(literal)

//重组哈夫曼树

if(this->root->count>=this->max_count)

this->RearrangeTree()

//设置环境

this->current=this->root

}

}else{

j=((value &candidates[i])!=0)?1:0

tmp=this->current->child[j]

i--

while(tmp->value==NOT_CHAR &&i>=0){

j=((value &candidates[i])!=0)?1:0

tmp=tmp->child[j]

i--

}

if(tmp->value==NOT_CHAR){//中间节点

this->current=tmp

}else{

if(tmp->value<=MAX_VALUE){//编码内容

j=tmp->value

this->OutputChar((unsigned char)j)

//修改计数器

tmp=this->list[j]

while(tmp!=NULL){

tmp->count++

tmp=tmp->parent

}

//调整平衡度

this->BalanceNode(this->list[j]->parent)

//重组哈夫曼树

if(this->root->count>=this->max_count)

this->RearrangeTree()

//设置环境

this->current=this->root

}else{

if(tmp->value==CODE_ESCAPE){//转义码

this->current=NULL

this->remain=7

this->literal=0

}else{//结束码

this->finished=true

return 0

}

}

}

}

if(i>=0)this->Decode(c,i)

return 0

}

int Huffman::OutputChar(unsigned char c)

{

this->buffer[this->char_top++]=c

if(this->char_top>=BUFFER_SIZE){//输出缓冲区

this->output(this->buffer,BUFFER_SIZE)

this->char_top=0

}

return 0

}

========end of Huffman.cpp==================

========Huffman.h============================

// Huffman.h: interface for the Huffman class.

//

//////////////////////////////////////////////////////////////////////

#if !defined(NULL)

#include <stdio.h>

#endif

#if !defined(AFX_HUFFMAN_H__B1F1A5A6_FB57_49B2_BB67_6D1764CC04AB__INCLUDED_)

#define AFX_HUFFMAN_H__B1F1A5A6_FB57_49B2_BB67_6D1764CC04AB__INCLUDED_

#if _MSC_VER >1000

#pragma once

#endif // _MSC_VER >1000

#define MAX_COUNT 65536 //最大计数值,大于此值时

#define MAX_VALUE 255 //编码的最大值

#define CODE_ESCAPE MAX_VALUE+1 //转义码

#define CODE_FINISH MAX_VALUE+2 //结束码

#define LIST_LENGTH MAX_VALUE+3 //编码列表长度

#define SHRINK_FACTOR 2 //减小的比例,通过右移位实现

#define LEFT 0 //左孩子索引

#define RIGHT 1 //右孩子索引

#define NOT_CHAR -1//非字符

#define TOP_BIT 7//字符最高位

#define BOTTOM_BIT 0 //字符最低位

#define BUFFER_SIZE 81920 //缓冲区大小

//输出函数

typedef bool (Output)(unsigned char *s,int len)

//哈夫曼树的节点定义

typedef struct Hnode{

int count//计数器

int index//父节点的孩子索引(0--左孩子,1--右孩子)

Hnode* child[2]

Hnode* parent

int value

}Hbtree

class Huffman

{

private:

//输出一个解码的字符

int OutputChar(unsigned char c)

//从指定位置开始解码

int Decode(unsigned char c,int start)

//插入一个新节点

void InsertNewNode(int value)

//重新调整哈夫曼树构型

void RearrangeTree()

//对各节点重新计数

int RecountNode(Hbtree *node)

//打印哈夫曼树节点

void PrintNode(Hbtree *node,int level)

//输出一个值的编码

int OutputEncode(int value)

//调节哈夫曼树节点使之平衡

void BalanceNode(Hbtree *node)

//输出一位编码

int OutputBit(int bit)

//释放哈夫曼树节点

void ReleaseNode(Hbtree *node)

//新建一个节点

Hbtree *NewNode(int value,int index, Hbtree *parent)

//输出函数地址

Output *output

//哈夫曼树根地址

Hbtree *root

//哈夫曼编码单元列表

Hbtree *list[LIST_LENGTH]

//输出缓冲区

unsigned char buffer[BUFFER_SIZE]

//缓冲区顶

int char_top,bit_top

//收缩哈夫曼树参数

int max_count,shrink_factor

//工作模式,true--编码,false--解码

bool mode

//解码的当前节点

Hbtree *current

int remain//当前字符剩余的位数

unsigned char literal//按位输出的字符

bool finished

public:

//解码指定长度的字符串

int Decode(unsigned char *s,int len)

//解码一个字符

int Decode(unsigned char c)

//打印哈夫曼树

void PrintTree()

//编码指定长度的字符串

int Encode(unsigned char *s,int len)

//编码一个字符

int Encode(unsigned char c)

//清空缓冲区

int Flush()

//output指输出函数,mode指工作模式,true--编码,false--解码

Huffman(Output *output,bool mode)

//析构函数

virtual ~Huffman()

}

#endif // !defined(AFX_HUFFMAN_H__B1F1A5A6_FB57_49B2_BB67_6D1764CC04AB__INCLUDED_)

================end of Huffman.h==================

祝你好运!

假设用于通信的电文由字符集{a,b,c,d,e,f,g,h}中的字母构成,这8个字母在电文中出现的概率分别为{0.07,0.19,0.02,0.06,0.32,0.03,0.21,0.10}。

哈夫曼编码 根据上面可得编码表:  a:1001  b:01  c:10111  d:1010  e:11  f:10110  g:00  h:1000

用三位二进行数进行的等长编码平均长度为3,而根据哈夫曼树编码的平均码长为:4*0.07+2*0.19+5*0.02+4*0.06+2*0.32+5*0.03+2*0.21+4*0.10=2.61  2.61/3=0.87=87%其平均码长是等长码的87%,所以平均压缩率为13%。

因为定长编码已经用相同的位数这个条件保证了任一个字符的编码都不会成为其它编码的前缀,所以这种情况只会出现在变长编码当中,要想避免这种情况,

就必须用一个条件来制约定长编码,这个条件就是要想成为压缩编码,变长编码就必须是前缀编码,所谓的前缀编码就是任何一个字符的编码都不能是另一个字符编码的前缀。

扩展资料:

实际应配灶用中,除采用定时清洗以消除误差扩散和采用缓冲存储以解决速率匹配以外,主要问题是解决小符号集合的统计匹配,誉游

例如黑(1)、白(0)传真信源的统计匹配,采用0和1不同长度游程组成扩大的符号集合信源。游程,指相同码元的长度(如二进码中连续的一串0或一串1的长度或个数)。按照CCITT标准,需要统计2×1728种游程(长度),

这样,实现时的存储量太大。事实上长游程的概率很小,故CCITT还规定:若l表示游程长度,则l=64q+r。其中q称主码,r为基码。编码时,不小于64的游程长度由主码和基码组成。培虚扮而当l为64的整数倍时,只用主码的代码,已不存在基码的代码。

参考资料来源:百度百科-哈夫曼编码


欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/tougao/8196518.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2023-04-14
下一篇 2023-04-14

发表评论

登录后才能评论

评论列表(0条)

保存