字符串匹配？kmp ： hash

给定一个模式串S，以及一个模板串P，所有字符串中只包含大小写英文字母以及阿拉伯数字。

模板串P在模式串S中多次作为子串出现。

求出模板串P在模式串S中所有出现的位置的起始下标。

输入格式

第一行输入整数N，表示字符串P的长度。

第二行输入字符串P。

第三行输入整数M，表示字符串S的长度。

第四行输入字符串M。

输出格式

共一行，输出所有出现位置的起始下标（下标从0开始计数），整数之间用空格隔开。

一个字符串匹配的模板，求字符串A在字符串B中各次出现的位置， kmp分为两步：

1.对字符串A进行自我匹配， next[i]表示"A中以i结尾的非结尾前缀子串"与"A的前缀"能够匹配的最大长度，即：

next[i] = max{j}, j < i 且 A[i - j + 1 ~ i] == A[1 ~ j]

当j不存在时， next[i] = 0;

2.对字符串A与B进行匹配， f[i]表示"B中以i结尾的非结尾前缀子串"与"A的前缀"能够匹配的最大长度，即：

f[i] = max{j}, j < i 且 B[i - j + 1 ~ i] == A[1 ~ j]

引理：j是next[i]的一个候选项，即j < i 且 A[i - j +1 ~ i] == A[1 ~ j], 则小于j的最大next[i]的候选项是next[j];

根据优化后计算的next数组，当f[i] = n(A的长度)时， i - n 则是A在B中出现的位置；

#include <bits/stdc++.h>

using namespace std;

typedef long long ll;

const int INF = 0x3f3f3f3f;

const int MAXN = 5e5 + 100;

const int MAXM = 3e3 + 10;

const double eps = 1e-5;

template < typename T > inline void read(T &x) {

    x = 0; T ff = 1, ch = getchar();

    while(!isdigit(ch)) {

        if(ch == '-') ff = -1;

        ch = getchar();

    }

    while(isdigit(ch)) {

        x = (x << 1) + (x << 3) + (ch ^ 48);

        ch = getchar();

    }

    x *= ff;

}

template < typename T > inline void write(T x) {

    if(x < 0) putchar('-'), x = -x;

    if(x > 9) write(x / 10);

    putchar(x % 10 + '0');

}

int n, m, next[MAXN], f[MAXN];

char P[MAXN], M[MAXN];

int main() {

    read(n);

    scanf("%s", P + 1);

    read(m);

    scanf("%s", M + 1);

    for(int i = 2, j = 0; i <= n; ++i) {

        while(j && P[j + 1] != P[i]) j = next[j];

        if(P[j + 1] == P[i]) ++j;

        next[i] = j;

    }

    for(int i = 1, j = 0; i <= m; ++i) {

        while(j && (j == n || P[j + 1] != M[i])) j = next[j];

        if(P[j + 1] == M[i]) ++j;

        f[i] = j;

        if(f[i] == n) {

            write(i - n);

            putchar(' ');

        }

    }

    return 0;

}

其实用hash来做更加显然，在O（N）的时间内预处理字符串的hash值，枚举B中每个位置i(n <= i <= m) ,检查A的hash值和B的子串B[i - n + 1 ~ i] 是否相同即可

#include <bits/stdc++.h>

using namespace std;

typedef long long ll;

const int INF = 0x3f3f3f3f;

const int MAXN = 5e5 + 100;

const int MAXM = 3e3 + 10;

const double eps = 1e-5;

template < typename T > inline void read(T &x) {

    x = 0; T ff = 1, ch = getchar();

    while(!isdigit(ch)) {

        if(ch == '-') ff = -1;

        ch = getchar();

    }

    while(isdigit(ch)) {

        x = (x << 1) + (x << 3) + (ch ^ 48);

        ch = getchar();

    }

    x *= ff;

}

template < typename T > inline void write(T x) {

    if(x < 0) putchar('-'), x = -x;

    if(x > 9) write(x / 10);

    putchar(x % 10 + '0');

}

int n, m;

unsigned long long x, q[MAXN], a[MAXN];

char p[MAXN], s[MAXN];

int main() {

    read(n);

    scanf("%s", p + 1);

    read(m);

    scanf("%s", s + 1);

    for(int i = 1; i <= n; ++i) {

        x = x * 131 + (p[i] - 'a' + 1);

    }

    q[0] = 1;

    for(int i = 1; i <= m; ++i) {

        q[i] = q[i - 1] * 131;

        a[i] = a[i - 1] * 131 + (s[i] - 'a' + 1);

    }

    for(int i = n; i <= m; ++i) {

        if(a[i] - a[i - n] * q[n] == x) {

            write(i - n);

            putchar(' ');

        }

    }

    return 0;

}