PTA 05-树9 Huffman Codes (30分)

题目地址

https://pta.patest.cn/pta/test/16/exam/4/question/671

5-9 Huffman Codes (30分)

In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification:

Each input file contains one test case. For each case, the first line gives an integer NN (2\le N\le 632≤N≤63), then followed by a line that contains all the NN distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] ... c[N] f[N]

where c[i] is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i] is the frequency of c[i]and is an integer no more than 1000. The next line gives a positive integer MM (\le 1000≤1000), then followed by MM student submissions. Each student submission consists of NN lines, each in the format:

c[i] code[i]

where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0's and '1's.

Output Specification:

For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.

Sample Input:

7

A 1 B 1 C 1 D 3 E 3 F 6 G 6

4

A 00000

B 00001

C 0001

D 001

E 01

F 10

G 11

A 01010

B 01011

C 0100

D 011

E 10

F 11

G 00

A 000

B 001

C 010

D 011

E 100

F 101

G 110

A 00000

B 00001

C 0001

D 001

E 00

F 10

G 11

Sample Output:

Yes

Yes

No

No

/*

检查霍夫曼编码

	读取字母表

	构建自己的霍夫曼树

		利用最小堆管理节点

			堆的插入——放在最后，然后上滤

			删除——弹出顶后，将最后的元素放到堆顶，然后下滤

	计算最小编码开销

	读取学生构建的编码表

		如果是0，往左查，如果1，往右查。查不到就申请节点

			如果路上遇到带word的节点，直接把flag设成ERROR

			如果没有遇到flag，看看最后落下的位置有没有子树，如果有，那么这个节点是别人的父节点，不能插入word，报ERROR

			如果一切正常，在该位置设置word

		如不出错，查表找出当前字符的frequency，乘上码长得到开销 ，累加到总开销上 

评测结果

时间	结果	得分	题目	编译器	用时（ms）	内存（MB）	用户

2017-07-01 01:34	正在评测	0	5-9	gcc	无	无

测试点结果

测试点	结果	得分/满分	用时（ms）	内存（MB）

测试点1	答案正确	16/16	2	1

测试点2	答案正确	7/7	1	1

测试点3	答案正确	3/3	1	1

测试点4	答案错误	0/1	27	1   <————实在不知道什么情况

测试点5	答案正确	1/1	18	1

测试点6	答案正确	1/1	1	1

测试点7	答案正确	1/1	1	1

*/

#include<stdio.h>

#include<stdlib.h>

#define DBG //

#define NOTLEAF '*'

#define ERROR 8

typedef struct HuffNode *HuffTree;

struct HuffNode

{

	char word;

	int freq;

	HuffTree left;

	HuffTree right;

} ;

struct HuffNode codeTable[100];

HuffTree gHeap[100];

int gHeapLen=0;

void InsertIntoHeap(HuffTree T) //向堆中插入数据

{

	int i;

	gHeapLen++;

	gHeap[gHeapLen]=T;

	i=gHeapLen;

	while(i>1)

	{

		if(gHeap[i]->freq < gHeap[i/2]->freq)

		{

			gHeap[i]=gHeap[i/2];

		DBG("InsertIntoHeap+-+-[%d]=[%d]\n",i/2,i);

			i=i/2;

		}

		else break;

	}

	gHeap[i]=T;

	DBG("doneInsertIntoHeap[%d]\n",i);

	return;

}

void DBG_showstatus(int n) //debug用函数，打印编码表和堆的状态

{

	int i;

	for(i=0;i<n;i++)

		DBG("showstatus_codeTable:+%c : %d\n",codeTable[i].word,codeTable[i].freq);

	DBG("+gHeapLen:%d",gHeapLen);

	for(i=1;i<=gHeapLen;i++)

		DBG("showstatus_Heap:++%c : %d\n",gHeap[i]->word,gHeap[i]->freq);

}

HuffTree PopHeap() //弹出堆顶元素并整堆

{

	if(gHeapLen<1) return NULL;//当前存量小于1说明有问题 

	int i,parent,child;

	HuffTree heapTop,temp;

	heapTop=gHeap[1];

	gHeap[1]=gHeap[gHeapLen];

	gHeapLen--;

	parent=1;

	temp=gHeap[parent];

	while(parent*2<=gHeapLen)

	{

		child=2*parent;

		if(child*2!=gHeapLen)

		{

			if(gHeap[child]->freq  > gHeap[child+1]->freq)

				child++;

		}

		if (temp->freq > gHeap[child]->freq)

		{

			gHeap[parent]=gHeap[child];

			parent=child;

		}

		else break;

	}

	gHeap[parent]=temp;

	return heapTop;

}

void DestroyHuffTree(HuffTree A) //回收内存

{

	if(A == NULL)

		return;

	DestroyHuffTree(A->left);

	DestroyHuffTree(A->right);

	free(A);

}

HuffTree CreateHuffTreeNode() //申请新的节点。此函数刚开始忘了给申到的节点赋初值，导致不少错误

{

	HuffTree T=malloc(sizeof(struct HuffNode));

	T->word=NOTLEAF;

	T->left=NULL;

	T->right=NULL;

	T->freq=0;

	return T;

}

HuffTree BuildHuffTree() //把堆里的数据处理成一颗编码树

{

	HuffTree T,A,B;

	while(gHeapLen>=2)

	{

		A=PopHeap();

		B=PopHeap();

		T=CreateHuffTreeNode();

		T->word=NOTLEAF;

		T->freq=A->freq+B->freq;

		T->left=A;

		T->right=B;

		InsertIntoHeap(T);

		DBG("In BuildHuffTree %d T->word\n",T->word);

	}

	return T;

}

int GetFreq(char c,int n) //查询指定字符的频率值

{

	int i;

	for(i=0;i<n;i++)

	{

		if((codeTable[i].word) == c)

		{

			return codeTable[i].freq;

		}

	}

}

int Calcwpl(HuffTree T,int deepth) //计算整棵树的wpl

{

	if(T==NULL)

	{

		DBG("In Calcwpl return a null\n");

		return 0;

	}

	if((T->word) != NOTLEAF)

	{

		DBG("In Calcwpl T->word = %c,return %d*depth %d=%d\n",T->word,T->freq,deepth,T->freq*deepth);

		return T->freq*deepth;

	}

	if((T->word) == NOTLEAF)

		DBG("In Calcwpl return a NOTLEAF\n");

		return Calcwpl(T->left,deepth+1)+Calcwpl(T->right,deepth+1);

}

void CheckCodes(int len,int wpl) //判断一系列的编码是否符合huffman

{

	char tempc[100],tempbin[1000],bin;

	int i,j,pt;

	int flag=0,count=0,totalcost=0;

	HuffTree TOP,A;

	A=CreateHuffTreeNode();

	TOP=A;

	A->word=NOTLEAF;

	for(i=0;i<len;i++)

	{

		scanf("%s%s",tempc,tempbin);

		getchar();

		DBG("-%c-",tempc[0]);

		count=0;

		pt=0;

		while((bin=tempbin[pt++])!='\0')

		{

			DBG("bin-%c-\n",bin);

			if (flag==ERROR)

				continue;

			count++;

			if(A->word == NOTLEAF)

			{

				//左边的情况

				if (bin=='0')

				{

					if(A->left==NULL)

					{

						A->left=CreateHuffTreeNode();

						A->left->word=NOTLEAF;

						A=A->left;

					}

					else

					{

						if (A->left->word != NOTLEAF)

						{

							flag=ERROR;

							DBG("setflag in left ,word='%c'\n",A->left->word);

							continue;

						}

						A=A->left;

					}

				}

				//右边的情况

				if (bin=='1')

				{

					if(A->right==NULL)

					{

						A->right=CreateHuffTreeNode();

						A->right->word=NOTLEAF;

						A=A->right;

					}

					else

					{

						if (A->right->word != NOTLEAF)

						{

							flag=ERROR;

							DBG("setflag in right ,word='%c'\n",A->right->word);

							continue;

						}

						A=A->right;

					}

				}

			}

		}

		if(flag!=ERROR)

		{

			totalcost+=count*GetFreq(tempc[0],len);

			if(A->left!=NULL || A->right !=NULL)  //此时如果发现节点还有子树，那么编码是有问题的。

				flag=ERROR;

			A->word=tempc[0];

			A=TOP;

		}

			DBG("totalcost=%d\n",totalcost);

	}

	if(flag!=ERROR && totalcost==wpl)

		printf("Yes\n");

	else

		printf("No\n");

	DestroyHuffTree(TOP);

	return;

}

int main()

{

	int i,j,tmpi,N,M,wpl;

	char tmpc;

	HuffTree T;

	scanf("%d\n",&N);

	for(i=0;i<N;i++)

	{

		tmpc=getchar();

		scanf("%d ",&tmpi);

		codeTable[i].word=tmpc; //填编码表

		codeTable[i].freq=tmpi;

		T=CreateHuffTreeNode(); //创建节点然后往堆里插

		T->freq=tmpi;

		T->word=tmpc;

		InsertIntoHeap(T);

	}

									DBG_showstatus(N);

	T=BuildHuffTree();

									DBG_showstatus(N);

	wpl=Calcwpl(T,0);

									DBG("WPL=%d\n",wpl);

	scanf("%d\n",&M);

	for(j=0;j<M;j++)

		CheckCodes(N,wpl);

									DBG_showstatus(N);

	return 0;

}