哈夫曼编码_C_内存溢出

前言
读取text文件
计算各字符的权重
编码
- 建立编码表
- 全文编码
译码
主函数例程

前言

本文主要介绍对text文件中的大量英文字母进行哈夫曼编码，并根据编码表对编码进行译码。
编码原理参考哈夫曼编码

读取text文件

思路

（1）先读取一次，获得文件中数据的长度
（2）根据获得的长度申请用于存储数据的字符数组
（3）再次读取文件，将数据存入数组中

/*********************************************************\
*function: 读取text文件
*input: none
*output: int &num:文件中字符串长度
*return: char* str:文件中的字符串
\*********************************************************/
char *ReadFile(int &num)
{
    int n = 0;
    char x;
    ifstream inFile;
    inFile.open("test.txt"); //打开文件

    if(!inFile) //不能打开文件（不存在或被占用）
    {
        cout << "Unable to open file !" << endl;
    }
    while (inFile >> x) //计算字符串长度
    {
        num ++;
    }
    inFile.close(); //关闭文件

    cout << "字符串长度：" << num << endl;
    char *str = new char[num]; //为str申请长度为num的内存
    inFile.open("test.txt");
    while (inFile >> str[n++]); //将文件中的数据存入str中
    inFile.close();

    return str;
}

计算各字符的权重

这里只统计英文字母，如果想要将标点符号等也一并编码，可以自行添加。
思路：
（1）遍历所有字符，判断是否是A-Z和a-z的字符
（2）权重数组weight，其0-25位对应大写字母A-Z，26-51位对应小写字母a-z
（3）每遍历到一个字母，则weight对应的位置+1

/*********************************************************\
*function: 计算各字符的权重(出现的频率)
*input: char *str: 字符串
*output: int Weight[]: A-Z和a-z的权重，0-25位对应A-Z，26-51对应a-z
*return: void
\*********************************************************/
void CalWeight(char *Str, int Weight[])
{
    int Pos = 0;
    while (*Str != ')'//遍历结束则退出 if
    {
        ( *'A'Str >= && * <=Str 'Z' )=
        {
            Pos * -Str 'A' ;[
            Weight]Pos++ ;//对应的位置权重+1 }
        if

        ( *'a'Str >= && * <=Str 'z' )=
        {
            Pos * -Str 'a' + 26 ;// 小写字母对应的位置 [
            Weight]Pos++ ;}
        <<
        cout * ;Str// 输出字符串 ++
        Str ;}

    }
#

编码建立编码表

思路：
（1）首先应该建立哈夫曼树，将52个字符作为叶结点，进行合并。
（2）再根据所得的哈夫曼树进行哈夫曼编码，获得每个字符对应的编码，即编码表。
详细请参考哈夫曼编码
这里将哈夫曼编码程序单独打包在一个.cpp和.h文件中，一个文件中代码尽量不要太多。

头文件：

ifndef__CRHUFFMAN_H #
define__CRHUFFMAN_H #

include# 
includeusing 
namespace ; stdclass

Node //定义哈夫曼树的结点 public
{
  :int
    = lchild - 1,= rchild - 1;int
    = parent - 1;int
    = weight 0 ;}
;class

huffman public
{
://公开函数声明 void
    CreateHuffman (*Node ,pint * ,weightint ) numLeafs;//创建哈夫曼树 void
    HuffmanCode (*Node ,pint , numLeafs* string )codes;//哈夫曼编码 private


://私有函数和变量声明 void
    InitWeight (*Node ,pint * ,weightint ) numLeafs;//初始化各结点的权重 void
    SearchMin (const* Node ,pint , numint & ,min1int & )min2;//搜索权重最小的根结点 *
    Node ;p//结点（叶结点+合并结点） int
    * ;weight//各结点权重 int
    ; num//结点数 int
    ; min1//权重最小结点 int
    ; min2//权重第二小结点 int
    ; numLeafs//叶结点数*
    string ;codes//编码表 }
;#

endif// __CRHUFFMAN_H #

.cpp文件：

include"crhuffman.h" /*********************************************************\
*function: 初始化各结点的权重
*input: Node *p: 结点（叶结点+合并结点）；int numLeafs：叶结点数
*output: int *Weight: A-Z和a-z的权重，0-25位对应A-Z，26-51对应a-z
*return: void
\*********************************************************/

void
:: huffmanInitWeight(*Node ,pint * ,weightint ) numLeafsfor
{
    ( int= i 0 ;< i ; numLeafs++ i)[
    {
        p]i.=weight [ weight]i;}
    }
/*********************************************************\
*function: 搜索权重最小的叶结点
*input: Node *p: 结点（叶结点+合并结点）；int num：结点数
*output: int &min1: 最小的叶结点；int &min2: 第二小的叶结点
*return: void
\*********************************************************/

void
:: huffmanSearchMin(const* Node ,pint , numint & ,min1int & )min2for
{
    ( int= i 0 ;< i ; num++ i)//找到第一个根结点 if
    {
        ( [p]i.==parent - 1)=
        {
            min1 ; ibreak
            ;}
        }
    for

    ( int= i 0 ;< i ; num++ i)//遍历全部根结点，找到权重最小的根结点 if
    {
        ( ([p]i.==parent - 1)&& ( [p]i.<weight [ p]min1.)weight)=
        {
            min1 ; i}
        }
    for

    ( int= i 0 ;< i ; num++ i)//找到第二个根结点 if
    {
        ( ([p]i.==parent - 1)&& ( !=i ) min1)=
        {
            min2 ; ibreak
            ;}
        }
    for

    ( int= i 0 ;< i ; num++ i)//遍历全部根结点，找到权重第二小的根结点 if
    {
        ( ([p]i.==parent - 1)&& ( [p]i.<weight [ p]min2.)weight&& ( !=i ) min1)=
        {
            min2 ; i}
        }
    }
/*************************************************************************************\
*function: 创建哈夫曼树
*input: Node *p: 结点（叶结点+合并结点）；int *weight：各结点的权重；int numLeafs：叶结点数
*output: none
*return: void
\*************************************************************************************/

void
:: huffmanCreateHuffman(*Node ,pint * ,weightint ) numLeafsint
{
    , PosMin1; PosMin2InitWeight
    (,p, weight) numLeafs;//初始化叶结点权重 for

    ( int= i 0 ;< i - numLeafs 1 ;++ i)SearchMin
    {
        (,p+ numLeafs , i, PosMin1) PosMin2;//搜索最小的两个根结点 [

        p+numLeafs ] i.=weight [ p]PosMin1.+weight [ p]PosMin2.;weight[
        p+numLeafs ] i.=lchild ; PosMin1[
        p+numLeafs ] i.=rchild ; PosMin2[
        p]PosMin1.=parent + numLeafs ; i[
        p]PosMin2.=parent + numLeafs ; i}
    }
/*********************************************************\
*function: 哈夫曼编码
*input: Node *p: 结点（叶结点+合并结点）；int numLeafs：叶结点数
*output: string *codes: 编码表
*return: void
\*********************************************************/

void
:: huffmanHuffmanCode(*Node ,pint , numLeafs* string )codesint
{
    ; parentfor

    ( int= i 0 ;< i ; numLeafs++ i)int
    {
        = j ; iwhile
        ( [p]j.!=parent - 1)=
        {
            parent [ p]j.;parentif

            ( [p]parent.==lchild ) j[
            {
                codes]i.insert([codes]i.begin(),'0');}
            else
            [
            {
                codes]i.insert([codes]i.begin(),'1');}
            =

            j ; parent}

        }
    }

strcode

全文编码

思路：
（1）就是根据编码表对整个text文章进行编码
（2）因为需要创建一个字符数组用于存储编码，因此先计算所需的长度
（3）遍历所有字符，将每个字符对应的编码拼接到/*********************************************************\ *function: 计算全文编码存储在字符串中，所需的长度 *input: int *weight：各字符的权重；string *codes：编码表 *output: none *return: int Len: 长度 \*********************************************************/中。

int
GetCodeLen (int* ,weight* string )codesint
{
    = Len 0 ;for
    ( int= i 0 ;< i 52 ;++ i)//总共52个英文字符 //sum(每个字符的数量*对应的编码长度)
    {
        +=
        Len [ weight]i* [ codes]i.length();}
    return
    ; Len}
/*********************************************************\
*function: 全文编码
*input: char *str：读入的字符串；int *weight：各字符的权重；
*       string *codes：编码表；int num: 全文字符数
*output: none
*return: char *strcode: 编码后的字符串
\*********************************************************/

char
* Encode(char* ,strint * ,weight* string ,codesint ) numint
{
    = Len GetCodeLen (,weight) codes;//计算编码后所占用的长度 <<
    cout "编码长度：" << << Len ; endl//打印编码长度 char

    * =strcode new char []Len;//为strcode申请长度为Len的字符数组 [
    strcode0]= ';' //清空字符串for(

    int =0 i ; <; i ++ num) i//遍历所有字符if (
    {
        [ ]str'A'i&& >= [ ] str<=i'Z' ) char*
        {
           = (m char *) [(codesint)([]str-i'A' ) ].data();//当前字符对应的编码（string转换为char[])strcat (

           ,)strcode; m//将当前对应的编码拼接在strcode尾部} else
        if
        ( [ ]str'a'i&& >= [ ] str<=i'z' ) char*
        {
            = (m char *) [(codesint)([]str-i'a'+26)].data();strcat(
 
            ,)strcode; m}else
        }
        }
        {
        return
    ;
    
    } strcodestring
string

写到这里的时候，发现可以使用append字符串来存储编码更好，因此C++中operator +=在插入字符串货字符时，会自动为其分配合适的内存。
利用库中的strcode函数或者*来将字符串拼接在EncodeRepl尾部。


string (char*, intstr* , *weight, string intcodes) * num=
{
    string newstrcode string ( );for(
    int =0 i ; <; i ++ num) iif(
    {
        [ ]str'A'i&& >= [ ] str<=i'Z' ) //(*strcode).append(codes[(int)(str[i] - 'A')]);(
        {
            *
            )+=strcode[ ( codesint)([]str-i'A' ) ];}else

        if
        ( [ ]str'a'i&& >= [ ] str<=i'z' ) //(*strcode).append(codes[(int)(str[i]-'a'+26)]);(
        {
            *
            )+=strcode[ ( codesint)([]str-i'a'+26)];}else
        }
        }
        {

        return

    ;
    } strcode注
string

char *str：如果使用这个函数，其返回的是string str类型，因此对于译码函数的输入参数，需要将string改为push_back。

译码

思路：
（1）遍历全文编码段，将编码表依次与编码段进行匹配，如果匹配成功，则在继续匹配下一位；
（2）如果匹配不成功，则回溯到当前字符编码段的开头位置，继续与下一个编码表匹配。
（3）这里使用的是链表结构存储译码的字符串，链表代码参考线性表总结。简单点可以使用operator +=字符串存储，利用#或者ifndef将单个字符拼接在字符串的结尾即可。

.h文件如下：

__DECODE_H# define
__DECODE_H# include
#include 
"LinkList.h"using namespace
; class stddecode


public :
{
*Readhuffman
    LinkList (char*, *str) string ;codes//译码函数private :

char*
    ; //编码字符串str* ;
    string //编码表codes} ;
#endif

// __DECODE_H# include

.cpp文件如下：

"decode.h"/*********************************************************\
*function: 全文译码
*input: char *strcode：编码后的字符串；string *codes：编码表；
*output: none
*return: LinkList *stList：译码后的链表
\*********************************************************/ *

::
LinkList Readhuffmandecode(char*, *strcode) string intcodes=
{
    0 i ; //编码当前匹配的位置int =
    0 k ; //编码表的某一字母编码的匹配位置int =
    0 j ; //当前匹配的编码表的某一字母int =
    0 CurPos ; //当前正在匹配中的编码段的开头位置char ;
    //当前匹配成功的字符 Alp; //Link类对象创建
    Link DeLink* =

    LinkList newstList LinkList ( );//创建链表while (

    [ ]strcode!=i')' //遍历编码字符串 if( [
    {
        ] ==strcode[i] [ codes]j)//匹配成功k++; ++
        {
            i;if
            k([
            ] .k >= codeslengthj())if(<
            {
                26 )j = (char
                {
                    Alp ) (+'A')j ; }else=
                (
                char
                {
                    Alp ) (-26+j 'a' ) ; }.CreatList
                (
                
                DeLink,);Alp//将译码成功的字符添加到链表尾部 stList=0 ;

                k //从编码表的头开始 =; //下一个编码匹配的第一位
                CurPos = i0 ;
                j } }else
            //匹配不成功

        =
        ; //返回当前编码段的开头
        {
            i ++ CurPos;//编码表的下一位
            jif(  52
            ) <<j >= "\n decode error !"<<
            {
                cout ; break ; endl}
                =0
            ;

            k } }return
        ;

    }

    # stListinclude
#

主函数例程

include# 
include# 
include# 
include"calweight.h" 
#include "crhuffman.h"
#include "decode.h"
usingnamespace ;
int main std(

) int=0
{
    ; num char *=
    NULL ;str = ReadFile(
    str ) ;//读取文件数据numint[ 52

    ] weight=0} ; {;.CalWeight
    CalW Cal(
    Cal,);str//计算权重 weight<<"\n------------------------------------------------------------------------------\n" <<

    cout "权重：" << ;
    << setw endl(
    cout 8 );for(int
    = 0; i < 52; i ++ )//打印各个字符及其对应的权重 iif( <
    {
        26 )i << (char
        {
          cout ) (+'A')i<<[] << weightsetwi( 8 );}else<<
        (
        char
        {
            cout ) (-26+i'a') <<[] << weightsetwi( 8 );}if(
        !
        ( (+1)i%10)&&)<< ; i}
        {
            cout } endl/******************************编码********************************/
        [
    2
    
    *
    Node N52-1] ; //创建哈夫曼树的结点（52个叶结点+51个合并结点）[52 ]
    string codes;//52个编码表;. CreateHuffman
    
    huffman hu(
    hu,,52N) weight; //创建哈夫曼树.HuffmanCode (
    hu,52,N) ;//哈夫曼编码 codes<<"\n哈夫曼编码表：" <<

    cout ; for ( endlint
    = 0; i < 52; i ++ )//打印编码表 iif( <
    {
        26 )i << (char
        {
            cout ) (+'A')i<<setw( 20 )<<[] << codes;i} else endl<<
        (
        char
        {
            cout ) (-26+i'a') <<setw( 20 )<<[] << codes;i} } endlchar
        *
    =

    Encode (strcode , ,,str) weight; codes//全文编码 num<<"编码: \n" <<
    cout << ; //打印编码 strcode << endl"\n----------------------------------------------------------------------" <<
    cout ; /***************************译码***********************************/ ; endl*
    
    =
    decode DecNULL
    LinkList ;DecList //定义链表 ;<< "译码："
    Link L<<
    cout ; = . endlReadhuffman
    DecList ( Dec,);strcode//译码并将结构保存在链表中 codes.ScanList (
    L);//打印链表数据DecList/* 释放申请的内存，防止内存泄露 */delete  [
    
    ]
    ; delete[ strcode]
    ; delete; strreturn
    0 DecList;

    }

欢迎分享，转载请注明来源：内存溢出

原文地址: http://outofmemory.cn/langs/740986.html

哈夫曼编码

发表评论

评论列表（0条）