POJ 2050 Searching the Web

题意简述：做一个极其简单的搜索系统，对以下四种输入进行分析与搜索：

　　　　1. 只有一个单词：如 term，只需找到含有这个单词的document，然后把这个document的含有这个单词term的那些行输出。

　　　　2.term1 AND term2，找到同时含有term1 和 term2 的document，然后把这个document的含有这个单词term1 或 term2 的那些行输出。

　　　　3.term1 OR term2，找到含有term1 或 term2 的document，然后把这个document的含有这个单词term1 或 term2 的那些行输出。

　　　　4.NOT term，将不含有 term的document全部输出

思路简述：

　　　　做一个set集合 Src，记录了一个单词出现在哪些文件的哪些行；用一个map做映射，指出一个单词出现在哪些文件中，这样子，如果是两个单词，可以分别求出各个单词属于哪些文件，根据“AND”则进行set_intersection运算，根据“OR”进行set_union运算。

/*

	Poj 2050

	Emerald

	10 May 2015

*/

#include <iostream>

#include <cstring>

#include <cstdio>

#include <cctype>

#include <sstream>

#include <string>

#include <vector>

#include <map>

#include <set>

#include <algorithm>

using namespace std;

class Figure{ // meaning : a word can be found in the lineOrder'th line of the docOrder'th Doc

public:

	int docOrder, lineOrder;

	Figure() {}

	Figure( int d, int l ) {

		docOrder = d;

		lineOrder = l;

	}

};

bool operator < ( const Figure f1, const Figure f2 ) { // the comparisions used in set

	if( f1.docOrder!=f2.docOrder ) {

		return f1.docOrder < f2.docOrder;

	} else {

		return f1.lineOrder < f2.lineOrder;

	}

}

class Doc{ // meaning : the order of a Doc, and how many lines the Doc contains

public :

	int docOrder, lineLimit;

	Doc() {}

	Doc( int d, int l ) {

		this->docOrder = d;

		this->lineLimit = l;

	}

};

// set < Figure > src; // this defination defines an accurate instant

typedef set < Figure > Src; // a src contains the figures of a word

typedef set < int > docSrc;

map < string, docSrc > docMap; // referring to a word, wordLine get a line

map < string, Src > dict;

map < Figure, string > wordLine; // referring to a word, wordLine get a line

vector < Doc > docs;

const string DOC_END = "**********";

#define ALL(x) x.begin(),x.end()

#define INS(x) inserter(x,x.begin())

void Standard( string &line ); // make words tolower and other chars to whitespace

void WordRecord( string &line, int docOrder, int lineOrder ); // transfer words into src

void PrintSrc( Src &src ); // print as the problem commands

void WordsToFigure( Src &src, docSrc &dsrc, string& w1, string& w2 ); // know the docs and words, get target figures

void NotWord( string& word, Src &src ); // not the word w

int main() {

	int allDocs, queries;

	// input docs

	scanf( "%d", &allDocs );

	cin.get();

	for( int i=0; i<allDocs; i++ ) {

		string line;

		int lineCounter = 0;

		while( getline( cin, line ) && line != DOC_END ) { // read until end

			wordLine[ Figure( i, lineCounter ) ] = line;

			Standard( line );

			WordRecord( line, i, lineCounter ++ );

		}

		docs.push_back( Doc( i, lineCounter ) );

	}

	// input queries

	string command;

	scanf( "%d", &queries );

	cin.get();

	while( queries -- ) {

		getline( cin, command );

		Standard( command );

		Src src;

		if( command.find_last_of( ' ' ) == string::npos ) { // no whitespace

			Standard( command );

			src = dict[ command ];

		} else if( command.find_last_of( ' ' ) != command.find_first_of( ' ' ) ) { // if there're two different whitespaces

			stringstream ss( command );                                          // xxx AND/OR xxx

			string w1, w2, connected;

			ss >> w1 >> connected >> w2;

			docSrc dSrc1 = docMap[ w1 ];

			docSrc dSrc2 = docMap[ w2 ];

			docSrc dsrc;

			if( connected == "and" ) {

				set_intersection( ALL( dSrc1 ), ALL( dSrc2 ), INS( dsrc ) ); // intersection

			} else {

				set_union( ALL( dSrc1 ), ALL( dSrc2 ), INS( dsrc ) ); // union

			}

			WordsToFigure( src, dsrc, w1, w2 );

		} else { // only one whitespace -> Not xxx

			stringstream ss( command );

			string w1;

			ss >> w1 >> w1;

			NotWord( w1, src );

		}

		PrintSrc( src );

	}

	return 0;

}

void Standard( string &line ) {

	int length = line.length();

	for( int i=0; i<length; i++ ) {

		if( isalpha( line[i] ) ) {

			line[i] = tolower( line[i] ); // tolower, such as 'A' to 'a'

		} else {

			line[i] = ' '; // if c isn't a alpha, c will be transferred to a whitespace

		}

	}

}

void WordRecord( string &line, int docOrder, int lineOrder ) {

	stringstream ss( line );

	string word;

	while( ss >> word ) {

		if( dict.count( word ) ) { // whether the word has been found in the total input

			if( !dict[word].count( Figure( docOrder, lineOrder ) ) ) { // whether the word has been found in this line

				dict[word].insert( Figure( docOrder, lineOrder ) ) ;

			}

		} else {

			Src src;

			src.insert( Figure( docOrder, lineOrder ) );

			dict[ word ] = src;

		}

		if( docMap.count( word ) ) { // whether the word has been found in this document

			docMap[word].insert( docOrder );

		} else {

			docSrc ds;

			docMap[word] = ds;

			docMap[word].insert( docOrder );

		}

	}

}

void PrintSrc( Src &src ) { // print the result

	if( src.size() == 0 ) {

		printf("Sorry, I found nothing.\n");

		printf( "==========\n" );

		return ;

	}

	Src :: iterator it = src.begin(), bef; // bef represents the former one

	printf( "%s\n", wordLine[ *it ].c_str() );

	bef = it++;

	while( it != src.end() ) {

		if( it->docOrder != bef->docOrder ) {

			printf( "----------\n" );

		}

		printf( "%s\n", wordLine[ *it ].c_str() );

		bef = it;

		it ++;

	}

	printf( "==========\n" );

}

void WordsToFigure( Src &src, docSrc &dsrc, string& w1, string& w2 ) {

	docSrc :: iterator it;

	for( it=dsrc.begin(); it != dsrc.end(); it ++ ) {

		Src :: iterator it2 ;

		for( it2 = dict[ w1 ].begin(); it2 !=dict[w1].end(); it2 ++ ) {

			if( *it == it2 -> docOrder ) {

				src.insert( *it2 ); // the w1 appears in this line of this document

			}

		}

		for( it2 = dict[ w2 ].begin(); it2 !=dict[w2].end(); it2 ++ ) {

			if( *it == it2 -> docOrder ) {

				src.insert( *it2 );	// the w1 appears in this line of this document

			}

		}

	}

}

void NotWord( string& word, Src &src ) { // not this word

	docSrc dsrc = docMap[ word ];

	vector< Doc > :: iterator it;

	for( it = docs.begin(); it != docs.end(); it ++ ) {

		if( !dsrc.count( it->docOrder ) ) {

			for( int i=0; i< it->lineLimit; i ++ ) {

				src.insert( Figure( it->docOrder, i ) );

			}

		}

	}

}

POJ 2050 Searching the Web的更多相关文章

[刷题]算法竞赛入门经典(第2版) 5-10/UVa1597 - Searching the Web
题意:不难理解,照搬题意的解法. 代码:(Accepted,0.190s) //UVa1597 - Searching the Web //#define _XIENAOBAN_ #include&l ...
Searching the Web论文阅读
Searching the Web (Arvind Arasu etc.) 1. 概述 2000年,23%网页每天更新,.com域内网页40%每天更新.网页生存半衰期是10天.描述方法可用Pois ...
uva 1597 Searching the Web
The word "search engine" may not be strange to you. Generally speaking, a search engine se ...
Searching the Web UVA - 1597
The word "search engine" may not be strange to you. Generally speaking, a search engine ...
【习题 5-10 UVA-1597】Searching the Web
[链接] 我是链接,点我呀:) [题意] 在这里输入题意 [题解] 用map < string,vector < int > >mmap[100];来记录每一个数据段某个字符串 ...
poj很好很有层次感（转）
OJ上的一些水题(可用来练手和增加自信) (POJ 3299,POJ 2159,POJ 2739,POJ 1083,POJ 2262,POJ 1503,POJ 3006,POJ 2255,POJ 30 ...
POJ题目分类推荐（很好很有层次感）
著名题单,最初来源不详.直接来源:http://blog.csdn.net/a1dark/article/details/11714009 OJ上的一些水题(可用来练手和增加自信) (POJ 3299 ...
Multiple actions were found that match the request in Web Api
https://stackoverflow.com/questions/14534167/multiple-actions-were-found-that-match-the-request-in-w ...
zz A list of open source C++ libraries
A list of open source C++ libraries < cpp‎ | links http://en.cppreference.com/w/cpp/links/libs Th ...

随机推荐

教你使用python获得字符串的md5值
最近需要使用python获取字符串的md5值. 今天把代码贴出来和大家分享一下. 01 #!/usr/bin/env python 02 # -*- coding: cp936 -*- 03 impo ...
vs2010更改默认环境设置
今天刚刚装vs2010手欠点击了新建团队项目,在百度上各种查找说让我去 visual studio tools的命令提示中进行 devenv命令行修改 ResetString但是没找到我设置文件的路径 ...
C++ const 限定符
C++ const 限定符作用:把一个对象转换成一个常量用法:const type name = value; 性质:1. 定义时必须初始化,定义后不能被修改.2. 类中的const成员变量必须通 ...
Oracle SQL篇（四）group by 分组与分组的加强 rollup
分组操作group by 和分组的强化(rollup) 分组操作和分组函数的使用,对于编写SQL语句的人来说,是最基本的概念. 我们来看下面的例子: 在这里我们使用员工表EMP scott@D ...
如何将windows版的vim界面语言（默认为中文）设置成英文
用安装包安装windows版本的vim(下载地址:http://www.vim.org/download.php),vim会自动根据windows的语言设置vim的界面语言.如何将其改为英文呢? 在v ...
C# 方法的可选参数、命名参数
原文 http://www.cnblogs.com/lonelyxmas/admin/EditPosts.aspx?opt=1 C#方法的可选参数是.net 4.0最新提出的新的功能,对应简单的重载可 ...
简识UML语言（转）
在学习过程中用到了一些框图,用于绘画框图的语言数不胜数,本篇将就学习中用的一款绘画框图的语言做个简单的介绍. 直奔主题,采用一种可视化的面向对象的建模语言---UML,UML使用一些标准的圆形元素直观 ...
Jass 技能模型定义(转)
Jass是什么? 先阐释一下什么是jass吧,百度:JASS(正确地说是JASS 2)是魔兽3的程序语言,用于控制游戏和地图的进行,也是魔兽游戏和地图的基础. 地图编辑器中摆放的单位(Un ...
[Python]Unicode转ascii码的一个好方法
写这篇文章的是一位外国人,他遇到了什么问题呢?比如有一个 Unicode 字符串他需要转为 ascii码: >>> title = u"Klüft skräms inför ...
python利用utf-8编码判断中文英文字符(转)
下面这个小工具包含了判断unicode是否是汉字.数字.英文或者其他字符,全角符号转半角符号,unicode字符串归一化等工作. #!/usr/bin/env python # -*- coding: ...

POJ 2050 Searching the Web

POJ 2050 Searching the Web的更多相关文章

随机推荐

热门专题