[C++] 二叉树计算文件单词数

使用二叉树计算文件单词数：

您的程序必须接受命令行上不确定数量的参数，第一个参数指定输出文件，其余指定输入文件。例如，参数可能如下所示：

输出文件输入文件1 输入文件 2 …

有了这些参数，程序将依次打开和读取每个输入文件，建立一个二叉树（在构建时, 应将树保持为完整的二叉树），并在其进行过程中进行计数。

读取并关闭所有文件后，它必须创建输出文件并写出树中的单词（您应该使用6种方法遍历树：顺序、预序和后序，包括递归和非递归），每行一个单词，以及这个词的出现。

您的程序应忽略单词的大小写，以便 "This" 和 "this" 被视为相同。然而，实际上拼写不同的单词，如 "car" 和 "cars" 被认为是不同的单词。

为了允许很长的输入文件，每个单词出现次数的字段宽度应至少为六位十进制数字。您还应该汇总并打印不同单词的数量。

上述要求都是英文必应翻译的，意思差不多到了，主要就是用二叉树统计单词数量。那么为什么选用二叉树呢？我个人的看法是因为二叉树便于搜索，如果是线性表的话，进行搜索难免会出现从头检索到尾的情况，即使做了许多优化也不一定能有二叉树的效率。

前置技能

构造和遍历二叉树

这些可以在上一篇前中后序遍历二叉树中找到相应的介绍。此外，本次的程序还需要实现递归遍历。递归遍历与非递归相比简单很多，但与所有的递归算法相同，开销相对较大在数据量很大的情况下会造成栈溢出。（本文之后将在同文集转载一篇博文来描述这件事情https://www.cnblogs.com/bakari/p/5349383.html）

文件的打开、读取和写入

在C++中可以方便地调用#include <fstream>进行文件的读写，它定义了三种数据类型：

ofstream 表示输出文件流，用于创建文件并向文件写入信息。
ifstream 表示输入文件流，用于从文件读取信息。
fstream 表示文件流，且同时具有 ofstream 和 ifstream 两种功能。

我们使用open()函数打开文件，使用close()函数关闭文件；读取和写入则与cin、cout相同，使用<<和>>。

    #include<fstream>

    //读取指定位置文件

    string inf;

    ifstream infile;

    infile.open(inf.data());

    char c;

    infile >> c;

    infile.close();

end of file：文件的末尾会有一个叫做eof的标记，我们可以使用infile.eof() 方便地判断是否到达文件末尾。

需求描述

原理上我们需要进行文件的写入，但是我写这个代码时间比较短，又懒得debug，这个功能就被我遗弃了。

读取文件

读取文件，并一个一个字符地进行单词判断（此时应注意忽略大小写）。

    string s = " ";

    char c;

    while (!infile.eof()){

        infile >> c;

        //将字符链接到字符串s后

        if((c >= 'a' && c <= 'z') || ( c >= '0' && c <= '9')){

            if(s == " ")	s = c;

            else			s += c;

        }

        //忽略大小写

        else if(c >= 'A' && c <= 'Z'){

            if(s == " ")	s = c;

            else			s += (c - 32);

        }

        //当读到非字母或数字时，将非空的字符串s插入到二叉树中

        else if(s != " "){

            BinaryTreeNode * temp = new BinaryTreeNode(s);

            n = t.sTreeNode(temp);

            s = " ";

        }

    }

构建二叉树

以第一个单词构建二叉树，比较并插入其它单词的二叉树节点。这要求我们的二叉树节点类包括以下内容：在这里直接友元二叉树类很明显是我偷懒啦

class BinaryTreeNode{

private:

	int count;                      //计数君

	string word;                    //单词

	BinaryTreeNode * leftChild;     //左孩子

	BinaryTreeNode * rightChild;    //右孩子

public:

	BinaryTreeNode();

	BinaryTreeNode(string & str);   //重载构造函数

	friend class BinaryTree;

};

插入时修改了层次遍历的代码，遍历二叉树时直接用 string 标准库的重载函数进行比较，如果相同则计数并删除。

if(pointer->word == node->word){

    pointer->count++;

    delete node;

    return true;

}

格式化输入输出

鉴于代码直接从上面二叉树拉过来的，直接修改visit()函数，以及在遍历时做好计数即可。

在老师给的文档里特别声明了，因为节点类中包括 string 类型成员变量，我们需要在析构的时候特别注意……对不起我忘了注意了！但因为没有 error 所以最后也没注意

具体实现

main.cpp

#include"binarytree.h"

#include <iostream>

#include <fstream>

#include <string>

using namespace std;

int main(){

	string inf;

	BinaryTree t;

	while(cin >> inf){

		ifstream infile;

		infile.open(inf.data());

		string s = " ";

		char c;

		bool n;

		while (!infile.eof()){

			infile >> c;

			if((c >= 'a' && c <= 'z') || ( c >= '0' && c <= '9')){

				if(s == " ")	s = c;

				else			s += c;

			}

			else if(c >= 'A' && c <= 'Z'){

				if(s == " ")	s = c;

				else			s += (c - 32);

			}

			else if(s != " "){

				BinaryTreeNode * temp = new BinaryTreeNode(s);

				n = t.sTreeNode(temp);

				s = " ";

			}

		}

		infile.close();

	}

	cout<<"preorder"<<endl;

	t.preOrder();

	cout<<"inorder"<<endl;

	t.inOrder();

	cout<<"postorder"<<endl;

	t.postOrder();

	cout<<"finish"<<endl;

	t.dpreOrder(t.root);

	t.dinOrder(t.root);

	t.dpostOrder(t.root);

	return 0;

}

binarytree.h

#ifndef BINARYTREE_H

#define BINARYTREE_H

#include <string>

using namespace std;

class BinaryTreeNode;

class BinaryTree;

class BinaryTreeNode{

private:

	int count;                      //计数君

	string word;                    //单词

	BinaryTreeNode * leftChild;     //左孩子

	BinaryTreeNode * rightChild;    //右孩子

public:

	BinaryTreeNode();

	BinaryTreeNode(string & str);

	friend class BinaryTree;

};

class BinaryTree{

public:

	BinaryTreeNode * root;      //根节点

	BinaryTree();               //默认构造

	BinaryTree(BinaryTreeNode * r);

	~BinaryTree();              //析构

	bool sTreeNode(BinaryTreeNode * node);     //前序遍历并插入节点

	void visit(BinaryTreeNode * pointer);      //访问

	void dpreOrder(BinaryTreeNode * node);     //先序遍历

	void dinOrder(BinaryTreeNode * node);      //中序遍历

	void dpostOrder(BinaryTreeNode * node);    //后序遍历

	void preOrder();            //先序遍历

	void inOrder();             //中序遍历

	void postOrder();           //后序遍历

	void levelOrder();

};

#endif //BINARYTREE_H

binarytree.cpp

#include"binarytree.h"

#include<iostream>

#include<queue>

#include<stack>

using namespace std;

BinaryTreeNode::BinaryTreeNode(){

	count = 0;

	word = " ";

	leftChild = nullptr;

	rightChild = nullptr;

}

BinaryTreeNode::BinaryTreeNode(string & str){

	count = 1;

	word = str;

	leftChild = nullptr;

	rightChild = nullptr;

}

BinaryTree::BinaryTree(){

	root = nullptr;

}

BinaryTree::BinaryTree(BinaryTreeNode * r){

	root = r;

}

BinaryTree::~BinaryTree(){

	root = nullptr;

}

void BinaryTree::visit(BinaryTreeNode * pointer){

	cout<<pointer -> count<<" | "<< pointer -> word <<endl;

}

void BinaryTree::dpreOrder(BinaryTreeNode * node){

	if (node == nullptr)

		return;

	visit(node);

	dpreOrder(node->leftChild);  //递归遍历左子树

	dpreOrder(node->rightChild); //递归遍历右子树

}

void BinaryTree::dinOrder(BinaryTreeNode * node){

	if (node == nullptr)

		return;

	dpreOrder(node->leftChild);  //递归遍历左子树

	visit(node);

	dpreOrder(node->rightChild); //递归遍历右子树

}

void BinaryTree::dpostOrder(BinaryTreeNode* node){

	if (node == nullptr)

		return;

	dpreOrder(node->leftChild);  //递归遍历左子树

	dpreOrder(node->rightChild); //递归遍历右子树

	visit(node);

}

void BinaryTree::preOrder(){

	int word = 0,num = 0;

	stack<BinaryTreeNode * > nodeStack;

	BinaryTreeNode * pointer = root;

	while(!nodeStack.empty()||pointer != nullptr){

		if(pointer != nullptr){

			visit(pointer);

			word += pointer->count;

			num++;

			if(pointer -> rightChild !=  nullptr)

				nodeStack.push(pointer -> rightChild);

			pointer = pointer -> leftChild;

		}

		else{

			pointer = nodeStack.top();

			nodeStack.pop();

		}

	}

	std::cout<<word<<" | "<<num<<endl;

}

void BinaryTree::inOrder(){

	int word = 0,num = 0;

	using std::stack;

	stack<BinaryTreeNode * > nodeStack;

	BinaryTreeNode * pointer = root;

	while(!nodeStack.empty()||pointer){

		if(pointer){

			nodeStack.push(pointer);

			pointer = pointer -> leftChild;

		}

		else{

			pointer = nodeStack.top();

			visit(pointer);

			word += pointer->count;

			num++;

			pointer = pointer -> rightChild;

			nodeStack.pop();

		}

	}

	std::cout<<word<<" | "<<num<<endl;

}

void BinaryTree::postOrder(){

	int word = 0,num = 0;

	using std::stack;

	stack<BinaryTreeNode * > nodeStack;

	BinaryTreeNode * pointer = root;

	BinaryTreeNode * pre = root;

	while(pointer != nullptr){

		while(pointer -> leftChild != nullptr){

			nodeStack.push(pointer);

			pointer = pointer -> leftChild;

		}

		while(pointer != nullptr && (pointer -> rightChild == nullptr

             || pre == pointer -> rightChild)){

			visit(pointer);

			word += pointer->count;

			num++;

			pre = pointer;

			if (nodeStack.empty()){

				pointer = nullptr;

			}

			else{

				pointer = nodeStack.top();

				nodeStack.pop();

			}

		}

		if (pointer != nullptr){

			nodeStack.push(pointer);

			pointer = pointer -> rightChild;

		}

	}

	std::cout<<word<<" | "<<num<<endl;

}

bool BinaryTree::sTreeNode(BinaryTreeNode * node){

	if(root == nullptr){

		root = node;

		return true;

	}

	using std::queue;

	queue<BinaryTreeNode *>nodeQueue;

	BinaryTreeNode * pointer = root;

	if(pointer != nullptr){

		nodeQueue.push(pointer);

	}

	while(!nodeQueue.empty()){

		pointer = nodeQueue.front();

		if(pointer->word == node->word){

			pointer->count++;

			delete node;

			return true;

		}

		nodeQueue.pop();

		if (pointer->leftChild){

			nodeQueue.push(pointer->leftChild);

		}

		else{

			pointer -> leftChild = node;

			return true;

		}

		if (pointer->rightChild){

			nodeQueue.push(pointer->rightChild);

		}

		else{

			pointer -> rightChild = node;

			return true;

		}

	}

	delete node;

	return false;

}

void BinaryTree::levelOrder(){

	int word = 0,num = 0;

	using std::queue;

	queue<BinaryTreeNode*>nodeQueue;

	BinaryTreeNode * pointer = root;

	if(pointer)

		nodeQueue.push(pointer);

	while(!nodeQueue.empty()){

		pointer = nodeQueue.front();

		nodeQueue.pop();

		word += pointer->count;

		num++;

		if(pointer->leftChild)	nodeQueue.push(pointer->leftChild);

		if(pointer->rightChild)	nodeQueue.push(pointer->rightChild);

	}

	std::cout<<word<<" | "<<num<<endl;

}

都说软院学生是最乖的，因为我们天天在想：我哪错了？

前置技能
- 构造和遍历二叉树
- 文件的打开、读取和写入
需求描述
具体实现