SPOJ Distinct Substrings（后缀数组求不同子串个数，好题）

DISUBSTR - Distinct Substrings

no tags

Given a string, we need to find the total number of its distinct substrings.

Input

T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000

Output

For each test case output one number saying the number of distinct substrings.

Example

Sample Input:
2
CCCCC
ABABA

Sample Output:
5
9

Explanation for the testcase with string ABABA:
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.

Submit solution!

题目链接：SPOJ DISUBSTR

一开始想用字典树，结果静态建树的Trie超时了（懒的写动态指针版……）真相是用后缀数组做的，因为每一个后缀的贡献原本为其长度，原本总贡献为$(len + 1) * len / 2$，但由于一些串重复，我们要减掉，再想一想，这些重复的是后缀的前缀，也就是$Suffix(x)$和$Suffix(y)$的公共前缀$LCP(x,y)$，但是x与y如何确定才能准确不遗漏地算出这些重复的串呢？按字典序排，然后height数组就是基于字典序排序的后缀，因此把所有height值减掉就好了。不过似乎有人用指针写的Trie过了，果然指针除了爆内存的风险，速度确实快啊。

想了一下用后缀数组只要$O(Nlog_{2}N)$，而字典树至少$O(N*N)$，果然不是一个档次……

代码：

#include <stdio.h>

#include <iostream>

#include <algorithm>

#include <cstdlib>

#include <cstring>

#include <bitset>

#include <string>

#include <stack>

#include <cmath>

#include <queue>

#include <set>

#include <map>

using namespace std;

#define INF 0x3f3f3f3f

#define LC(x) (x<<1)

#define RC(x) ((x<<1)+1)

#define MID(x,y) ((x+y)>>1)

#define fin(name) freopen(name,"r",stdin)

#define fout(name) freopen(name,"w",stdout)

#define CLR(arr,val) memset(arr,val,sizeof(arr))

#define FAST_IO ios::sync_with_stdio(false);cin.tie(0);

typedef pair<int, int> pii;

typedef long long LL;

const double PI = acos(-1.0);

const int N = 1010;

int wa[N], wb[N], cnt[N], sa[N];

int ran[N], height[N];

char s[N];

inline int cmp(int r[], int a, int b, int d)

{

    return r[a] == r[b] && r[a + d] == r[b + d];

}

void DA(int n, int m)

{

    int i;

    int *x = wa, *y = wb;

    for (i = 0; i < m; ++i)

        cnt[i] = 0;

    for (i = 0; i < n; ++i)

        ++cnt[x[i] = s[i]];

    for (i = 1; i < m; ++i)

        cnt[i] += cnt[i - 1];

    for (i = n - 1; i >= 0; --i)

        sa[--cnt[x[i]]] = i;

    for (int k = 1; k <= n; k <<= 1)

    {

        int p = 0;

        for (i = n - k; i < n; ++i)

            y[p++] = i;

        for (i = 0; i < n; ++i)

            if (sa[i] >= k)

                y[p++] = sa[i] - k;

        for (i = 0; i < m; ++i)

            cnt[i] = 0;

        for (i = 0; i < n; ++i)

            ++cnt[x[y[i]]];

        for (i = 1; i < m; ++i)

            cnt[i] += cnt[i - 1];

        for (i = n - 1; i >= 0; --i)

            sa[--cnt[x[y[i]]]] = y[i];

        swap(x, y);

        x[sa[0]] = 0;

        p = 1;

        for (i = 1; i < n; ++i)

            x[sa[i]] = cmp(y, sa[i - 1], sa[i], k) ? p - 1 : p++;

        m = p;

        if (p >= n)

            break;

    }

}

void getght(int n)

{

    int i, k = 0;

    for (i = 1; i <= n; ++i)

        ran[sa[i]] = i;

    for (i = 0; i < n; ++i)

    {

        if (k)

            --k;

        int j = sa[ran[i] - 1];

        while (s[i + k] == s[j + k])

            ++k;

        height[ran[i]] = k;

    }

}

int main(void)

{

    int T, i;

    scanf("%d", &T);

    while (T--)

    {

        scanf("%s", s);

        int len = strlen(s);

        DA(len + 1, *max_element(s, s + len) + 1);

        getght(len);

        int ans = (len + 1) * len >> 1;

        for (i = 1; i <= len; ++i)

            ans -= height[i];

        printf("%d\n", ans);

    }

    return 0;

}