poj_1743 后缀数组

题目大意

给定一串数字，长度为N。定义数字中的某个连续的子串为一个"theme"，只要子串满足:
(1)长度 >= 5
(2)和该子串相同或者该子串的“变种串”在整串数字中出现次数大于1
(3)假设整串中有k个该子串及其“变种串”，那么其中至少有两个不相重叠
求满足要求的 "theme" 串的最长长度。

题目分析

(1)首先考虑将“变种”串和原子串相互比较的问题，对字符串中所有索引大于等于1的字符都用该字符减去前一个字符，这样得到串的差串之后，原theme和其“变种”就一样了，此时只需要求差串中的最长相同子串，且这些子串之间不重叠。

求最长相同子串，可以考虑使用后缀数组和height数组。显然，height越大，则两个子串的公共前缀越长，越有可能是最长相同子串。但是，题目对"theme"串的要求(3)至少两个不重叠，因此需要考虑height[i]在尽可能大的同时，保证SA[i]和SA[i-1]之间的差值要大于height[i]以保证不重叠。

(2)然后，试图求解是否存在长度为M的"theme"串。
容易看出，后缀Suffix(j)和Suffix(k)的最长公共前缀的长度为 height[rank[j]+1], height[rank[j]+2]...height[rank[k]]的最小值。i从1到N遍历，通过height[i]>=M将i分开，即将后缀分成若干组，每组中的后缀的公共前缀长度均大于等于M，且可以肯定组A中的某后缀t1和组B中的某后缀t2的公共前缀长度小于M。若存在这样的组，则可以确定找到了公共前缀大于等于M的子串，下一步需要确定这些子串不重叠。只需要在组内寻找 SA[i] 之间最大的查看，看是否大于子串的长度，若大于则可以确定不重叠。

(3)最后，求解"theme"串长度M的最大值，用二分法对"theme"串的可能长度进行二分求解，长度范围为0到N。每次二分得到中值M，先判断能否找到长度为k的"theme"串，若不能，则减小M，否则增加M。直到找到长度M最大的"theme"串。

实现(c++)

#define _CRT_SECURE_NO_WARNINGS

#include<stdio.h>

#include<string.h>

#define MAX(a, b) a>b? a:b

#define MAX_ARRAY_SIZE 20005

#define LETTERS 10000

int gStrLen;

int gStr[MAX_ARRAY_SIZE];

int gCount[MAX_ARRAY_SIZE];

int gSuffixArray[MAX_ARRAY_SIZE];

int gRank[MAX_ARRAY_SIZE];

int gOrderBySecondKey[MAX_ARRAY_SIZE];

int gFirstKeyArray[MAX_ARRAY_SIZE];

int gHeight[MAX_ARRAY_SIZE];

bool Compare(int* arr, int a, int b, int step){

	return arr[a] == arr[b] && arr[a + step] == arr[b + step];

}

void GetStr(char* str){

	memset(gStr, 0, sizeof(gStr));

	gStrLen = strlen(str);

	for (int i = 0; i < gStrLen; i++){

		gStr[i] = str[i] - 'a' + 1;

	}

	gStr[gStrLen++] = 0;

}

void GetSuffixArray(){

	int n = gStrLen;

	memset(gCount, 0, sizeof(gCount));

	for (int i = 0; i < n; i++){

		gRank[i] = gStr[i];

		gCount[gRank[i]] ++;

	}

	for (int i = 1; i < LETTERS; i++){

		gCount[i] += gCount[i - 1];

	}

	for (int i = n - 1; i >= 0; i--){

		gSuffixArray[--gCount[gRank[i]]] = i;

	}

	int step = 1;

	int* rank = gRank, *order_by_second_key = gOrderBySecondKey;

	int m = LETTERS;

	while (step < n){

		int p = 0;

		for (int i = n - step; i < n; i++){

			order_by_second_key[p++] = i;

		}

		for (int i = 0; i < n; i++){

			if (gSuffixArray[i] >= step){

				order_by_second_key[p++] = gSuffixArray[i] - step;

			}

		}

		for (int i = 0; i < n; i++){

			gFirstKeyArray[i] = rank[order_by_second_key[i]];

		}

		for (int i = 0; i < m; i++){

			gCount[i] = 0;

		}

		for (int i = 0; i < n; i++){

			gCount[gFirstKeyArray[i]] ++;

		}

		for (int i = 1; i < m; i++){

			gCount[i] += gCount[i - 1];

		}

		for (int i = n - 1; i >= 0; i--){

			gSuffixArray[--gCount[gFirstKeyArray[i]]] = order_by_second_key[i];

		}

		int* tmp = rank;

		rank = order_by_second_key;

		order_by_second_key = tmp;

		rank[gSuffixArray[0]] = 0;

		p = 0;

		for (int i = 1; i < n; i++){

			if (Compare(order_by_second_key, gSuffixArray[i], gSuffixArray[i - 1], step)){

				rank[gSuffixArray[i]] = p;

			}

			else{

				rank[gSuffixArray[i]] = ++p;

			}

		}

		m = p + 1;

		step *= 2;

	}

}

void GetHeight(){

	int n = gStrLen;

	for (int i = 1; i < n; i++){

		gRank[gSuffixArray[i]] = i;

	}

	int k = 0, j;

	gHeight[0] = 0;

	for (int i = 0; i < n - 1; i++){

		j = gSuffixArray[gRank[i] - 1];

		if (k){

			k--;

		}

		while (i + k < n && j + k < n && gStr[i + k] == gStr[j + k]){

			k++;

		}

		gHeight[gRank[i]] = k;

	}

}

bool Find(int k){

	int end = 1;

	int min_pos, max_pos;

	while (end < gStrLen){

		max_pos = min_pos = gSuffixArray[end-1];

		while (end < gStrLen && gHeight[end] >= k - 1){

			if (min_pos > gSuffixArray[end]){

				min_pos = gSuffixArray[end];

			}

			if (max_pos < gSuffixArray[end]){

				max_pos = gSuffixArray[end];

			}

			end ++;

		}

		if (max_pos - min_pos >= k){

			return true;

		}

		end ++;

	}

	return false;

}

void printstr(int n){

	printf("string = \n");

	for (int i = 0; i < n; i++){

		printf("%d ", gStr[i]);

	}

	printf("\n");

}

void printsuffix(int n){

	printf("suffix = \n");

	for (int i = 0; i < n; i++){

		printf("%d ", gSuffixArray[i]);

	}

	printf("\n");

}

void printheigt(int n){

	printf("height = \n");

	for (int i = 0; i < n; i++){

		printf("%d ", gHeight[i]);

	}

	printf("\n");

}

int main(){

	int n;

	while (true){

		scanf("%d", &n);

		if (n == 0){

			break;

		}

		for (int i = 0; i < n; i++){

			scanf("%d", &gStr[i]);

		}

		int min = 100;

		for (int i = 1; i < n; i++){

			gStr[i - 1] = gStr[i] - gStr[i - 1];

			min = gStr[i - 1] < min ? gStr[i - 1] : min;

		}

		min--;

		for (int i = 0; i < n; i++){

			gStr[i] -= min;

		}

		gStr[n-1] = 0;

		gStrLen = n;

		GetSuffixArray();

		GetHeight();

//		printstr(n);

//		printsuffix(n);

//		printheigt(n);

		int beg = 0, end = n, mid, max;

		bool flag = true;

		while (beg < end){

			mid = (beg + end) / 2;

			if (Find(mid)){

				beg = mid + 1;

				max = mid;

			}

			else{

				if (mid <= 5){

					flag = false;

					break;

				}

				end = mid;

			}

		}

		if (!flag){

			printf("0\n");

		}

		else{

			printf("%d\n", max);

		}

	}

	return 0;

}