Maximum repetition substring 后缀数组
Time Limit: 1000MS | Memory Limit: 65536K | |
Total Submissions: 7578 | Accepted: 2281 |
The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.
Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.
The input consists of multiple test cases. Each test case contains exactly one line, which gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.
The last test case is followed by a line containing a '#'.
For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.
Sample Input
- ccabababc
- daabbccaa
- #
Sample Output
- Case 1: ababab
- Case 2: aa

- #include <iostream>
- #include <cstdio>
- #include <cstring>
- #include <cmath>
- #include <algorithm>
- #include <string>
- #include <vector>
- #include <stack>
- #include <queue>
- #include <set>
- #include <map>
- #include <list>
- #include <iomanip>
- #include <cstdlib>
- using namespace std;
- const int INF=0x5fffffff;
- const double EXP=1e-;
- const int MS=;
- int dp[MS][]; // RMQ
- int t1[MS],t2[MS],c[MS],v[MS];
- int rank[MS],sa[MS],height[MS];
- char str[MS],str1[MS];
- int s[MS];
- int cmp(int *r,int a,int b,int k)
- {
- return r[a]==r[b]&&r[a+k]==r[b+k];
- }
- void get_sa(int *r,int *sa,int n,int m)
- {
- int i,j,p,*x=t1,*y=t2;
- for(i=;i<m;i++)
- c[i]=;
- for(i=;i<n;i++)
- c[x[i]=r[i]]++;
- for(i=;i<m;i++)
- c[i]+=c[i-];
- for(i=n-;i>=;i--)
- sa[--c[x[i]]]=i;
- p=;j=;
- for(;p<n;j*=,m=p)
- {
- for(p=,i=n-j;i<n;i++)
- y[p++]=i;
- for(i=;i<n;i++)
- if(sa[i]>=j)
- y[p++]=sa[i]-j;
- for(i=;i<n;i++)
- v[i]=x[y[i]];
- for(i=;i<m;i++)
- c[i]=;
- for(i=;i<n;i++)
- c[v[i]]++;
- for(i=;i<m;i++)
- c[i]+=c[i-];
- for(i=n-;i>=;i--)
- sa[--c[v[i]]]=y[i];
- swap(x,y);
- x[sa[]]=;
- for(p=,i=;i<n;i++)
- x[sa[i]]=cmp(y,sa[i-],sa[i],j)?p-:p++;
- }
- }
- void get_height(int *r,int n)
- {
- int i,j,k=;
- for(i=;i<=n;i++)
- rank[sa[i]]=i;
- //height[i]>=height[i-1]-1;
- for(i=;i<n;i++)
- {
- if(k)
- k--;
- else
- k=;
- j=sa[rank[i]-];
- while(r[i+k]==r[j+k])
- k++;
- height[rank[i]]=k;
- }
- }
- void rmq_init(int n)
- {
- for(int i=;i<=n;i++) dp[i][]=height[i];
- for(int j=;(<<j)<=n;j++)
- for(int i=;i+(<<j)-<=n;i++)
- dp[i][j]=min(dp[i][j-],dp[i+(<<(j-))][j-]);
- }
- int rmq(int ll,int rr)
- {
- int k=;
- ll=rank[ll];
- rr=rank[rr];
- if(ll>rr)
- {
- int tmp=ll;
- ll=rr;
- rr=tmp;
- }
- ll++;
- while((<<(k+))<=rr-ll+) k++;
- return min(dp[ll][k],dp[rr-(<<k)+][k]);
- }
- int main()
- {
- int text=;
- while(scanf("%s",str)>)
- {
- if(str[]=='#')
- break;
- int len=strlen(str);
- for(int i=;i<len;i++)
- s[i]=str[i]-'a'+;
- s[len]=;
- get_sa(s,sa,len+,);
- get_height(s,len);
- rmq_init(len);
- int ans=,pos=,lenn;
- for(int i=;i<=len/;i++)
- {
- for(int j=;j<len-i;j+=i)
- {
- if(str[j]!=str[j+i])
- continue;
- int k=rmq(j,j+i);
- int tol=k/i+;
- //printf("%d\n",tol);
- int r=i-k%i;
- int p=j;
- int cnt=;
- for(int m=j-;m>j-i&&str[m]==str[m+i]&&m>=;m--)
- {
- cnt++;
- if(cnt==r)
- {
- tol++;
- p=m;
- }
- else if(rank[p]>rank[m])
- {
- p=m;
- }
- }
- if(ans<tol)
- {
- ans=tol;
- pos=p;
- lenn=tol*i;
- }
- else if(ans==tol&&rank[pos]>rank[p])
- {
- pos=p;
- lenn=tol*i;
- }
- }
- }
- printf("Case %d: ",++text);
- // printf("%d %d %d\n",ans,pos,lenn);
- if(ans<) //这里,如果字符总长度小于2,那么就在原串中找出一个最小的字符就好
- {
- char ch='z';
- for(int i=;i<len;i++)
- if(str[i]<ch)
- ch=str[i];
- printf("%c\n",ch);
- continue;
- }
- for(int i=pos;i<pos+lenn;i++)
- printf("%c",str[i]);
- printf("\n");
- }
- return ;
- }
