Repeated Substrings

Time Limit: 3000MS Memory Limit: Unknown 64bit IO Format: %lld & %llu

Description

String analysis often arises in applications from biology and chemistry, such as the study of DNA and protein molecules. One interesting problem is to find how many substrings are repeated (at least twice) in a long string. In this problem, you will write a program to find the total number of repeated substrings in a string of at most 100 000 alphabetic characters. Any unique substring that occurs more than once is counted. As an example, if the string is “aabaab”, there are 5 repeated substrings: “a”, “aa”, “aab”, “ab”, “b”. If the string is “aaaaa”, the repeated substrings are “a”, “aa”, “aaa”, “aaaa”. Note that repeated occurrences of a substring may overlap (e.g. “aaaa” in the second case).

Input

The input consists of at most 10 cases. The first line contains a positive integer, specifying the number of
cases to follow. Each of the following line contains a nonempty string of up to 100 000 alphabetic characters.

Output

For each line of input, output one line containing the number of unique substrings that are repeated. You
may assume that the correct answer fits in a signed 32-bit integer.

Sample Input

  1. 3
  2. aabaab
  3. aaaaa
  4. AaAaA

Sample Output

  1. 5
  2. 4
  3. 5

HINT

 

Source

解题:后缀数组lcp的应用,如果lcp[i] > lcp[i-1]那么累加lcp[i] - lcp[i-1]

  1. #include <bits/stdc++.h>
  2. using namespace std;
  3. const int maxn = ;
  4. int rk[maxn],wb[maxn],wv[maxn],wd[maxn],lcp[maxn];
  5. bool cmp(int *r,int i,int j,int k) {
  6. return r[i] == r[j] && r[i+k] == r[j+k];
  7. }
  8. void da(int *r,int *sa,int n,int m) {
  9. int i,k,p,*x = rk,*y = wb;
  10. for(i = ; i < m; ++i) wd[i] = ;
  11. for(i = ; i < n; ++i) wd[x[i] = r[i]]++;
  12. for(i = ; i < m; ++i) wd[i] += wd[i-];
  13. for(i = n-; i >= ; --i) sa[--wd[x[i]]] = i;
  14.  
  15. for(p = k = ; p < n; k <<= ,m = p) {
  16. for(p = ,i = n-k; i < n; ++i) y[p++] = i;
  17. for(i = ; i < n; ++i) if(sa[i] >= k) y[p++] = sa[i] - k;
  18. for(i = ; i < n; ++i) wv[i] = x[y[i]];
  19.  
  20. for(i = ; i < m; ++i) wd[i] = ;
  21. for(i = ; i < n; ++i) wd[wv[i]]++;
  22. for(i = ; i < m; ++i) wd[i] += wd[i-];
  23. for(i = n-; i >= ; --i) sa[--wd[wv[i]]] = y[i];
  24.  
  25. swap(x,y);
  26. x[sa[]] = ;
  27. for(p = i = ; i < n; ++i)
  28. x[sa[i]] = cmp(y,sa[i-],sa[i],k)?p-:p++;
  29. }
  30. }
  31. void calcp(int *r,int *sa,int n) {
  32. for(int i = ; i <= n; ++i) rk[sa[i]] = i;
  33. int h = ;
  34. for(int i = ; i < n; ++i) {
  35. if(h > ) h--;
  36. for(int j = sa[rk[i]-]; i+h < n && j+h < n; h++)
  37. if(r[i+h] != r[j+h]) break;
  38. lcp[rk[i]] = h;
  39. }
  40. }
  41. int r[maxn],sa[maxn];
  42. char str[maxn];
  43. int main() {
  44. int hn,x,y,cs,ret;
  45. scanf("%d",&cs);
  46. while(cs--) {
  47. scanf("%s",str);
  48. int len = strlen(str);
  49. for(int i = ; str[i]; ++i)
  50. r[i] = str[i];
  51. ret = r[len] = ;
  52. da(r,sa,len+,);
  53. calcp(r,sa,len);
  54. for(int i = ; i <= len; ++i)
  55. if(lcp[i] > lcp[i-]) ret += lcp[i] - lcp[i-];
  56. printf("%d\n",ret);
  57. }
  58. return ;
  59. }

后缀自动机

  1. #include <bits/stdc++.h>
  2. using namespace std;
  3. const int maxn = ;
  4. int cnt[maxn],c[maxn],sa[maxn];
  5. struct node{
  6. int son[],f,len;
  7. void init(){
  8. memset(son,-,sizeof son);
  9. f = -;
  10. len = ;
  11. }
  12. };
  13. struct SAM{
  14. node e[maxn];
  15. int tot,last;
  16. int newnode(int len = ){
  17. e[tot].init();
  18. e[tot].len = len;
  19. return tot++;
  20. }
  21. void init(){
  22. tot = last = ;
  23. newnode();
  24. }
  25. void add(int c){
  26. int p = last,np = newnode(e[p].len + );
  27. while(p != - && e[p].son[c] == -){
  28. e[p].son[c] = np;
  29. p = e[p].f;
  30. }
  31. if(p == -) e[np].f = ;
  32. else{
  33. int q = e[p].son[c];
  34. if(e[p].len + == e[q].len) e[np].f = q;
  35. else{
  36. int nq = newnode();
  37. e[nq] = e[q];
  38. e[nq].len = e[p].len + ;
  39. e[q].f = e[np].f = nq;
  40. while(p != - && e[p].son[c] == q){
  41. e[p].son[c] = nq;
  42. p = e[p].f;
  43. }
  44. }
  45. }
  46. last = np;
  47. cnt[np] = ;
  48. }
  49. }sam;
  50. char str[maxn];
  51. int main(){
  52. int kase;
  53. scanf("%d",&kase);
  54. while(kase--){
  55. scanf("%s",str);
  56. sam.init();
  57. memset(cnt,,sizeof cnt);
  58. int len = strlen(str);
  59. for(int i = ; str[i]; ++i)
  60. sam.add(str[i]);
  61. node *e = sam.e;
  62. memset(c,,sizeof c);
  63. for(int i = ; i < sam.tot; ++i) c[e[i].len]++;
  64. for(int i = ; i <= len; ++i) c[i] += c[i-];
  65. for(int i = sam.tot-; i >= ; --i) sa[--c[e[i].len]] = i;
  66. for(int i = sam.tot-; i > ; --i){
  67. int v = sa[i];
  68. cnt[e[v].f] += cnt[v];
  69. }
  70. int ret = ;
  71. for(int i = ; i < sam.tot; ++i){
  72. if(cnt[i] <= ) continue;
  73. ret += e[i].len - e[e[i].f].len;
  74. }
  75. printf("%d\n",ret);
  76. }
  77. return ;
  78. }

UVALive 6869 Repeated Substrings的更多相关文章

  1. UVALive - 6869 Repeated Substrings 后缀数组

    题目链接: http://acm.hust.edu.cn/vjudge/problem/113725 Repeated Substrings Time Limit: 3000MS 样例 sample ...

  2. CSU-1632 Repeated Substrings (后缀数组)

    Description String analysis often arises in applications from biology and chemistry, such as the stu ...

  3. UVALive 6869(后缀数组)

    传送门:Repeated Substrings 题意:给定一个字符串,求至少重复一次的不同子串个数. 分析:模拟写出子符串后缀并排好序可以发现,每次出现新的重复子串个数都是由现在的height值减去前 ...

  4. Repeated Substrings(UVAlive 6869)

    题意:求出现过两次以上的不同子串有多少种. /* 用后缀数组求出height[]数组,然后扫一遍, 发现height[i]-height[i-1]>=0,就ans+=height[i]-heig ...

  5. UVALive 4671 K-neighbor substrings 巧用FFT

    UVALive4671   K-neighbor substrings   给定一个两个字符串A和B B为模式串.问A中有多少不同子串与B的距离小于k 所谓距离就是不同位的个数. 由于字符串只包含a和 ...

  6. UVALive - 4671 K-neighbor substrings (FFT+哈希)

    题意:海明距离的定义:两个相同长度的字符串中不同的字符数.现给出母串A和模式串B,求A中有多少与B海明距离<=k的不同子串 分析:将字符a视作1,b视作0.则A与B中都是a的位置乘积是1.现将B ...

  7. CSU-1632 Repeated Substrings[后缀数组求重复出现的子串数目]

    评测地址:https://cn.vjudge.net/problem/CSU-1632 Description 求字符串中所有出现至少2次的子串个数 Input 第一行为一整数T(T<=10)表 ...

  8. LeetCode 1100. Find K-Length Substrings With No Repeated Characters

    原题链接在这里:https://leetcode.com/problems/find-k-length-substrings-with-no-repeated-characters/ 题目: Give ...

  9. [LeetCode] Repeated DNA Sequences 求重复的DNA序列

    All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACG ...

随机推荐

  1. CorePlot学习六---点击scatterPlot中的symbol点时弹出对应的凝视

    因为项目须要用到用户点击 symbol时,弹出对应的具体信息,发现国内解说的比較少,经过一番搜索验证最终解决,先看效果图: 详细须要改动的代码例如以下: 首先要引用托付方法:CPTScatterPlo ...

  2. RISC设计原则及基本技术

    CISC的特点: 指令系统庞大,指令功能复杂,指令寻址方式多,指令格式多 绝大多数指令须要多个机器周期完毕 各种指令都能够訪问存储器 採用微程序控制 有专用寄存器,少量 难以用优化编译技术生成高效的目 ...

  3. tomcat和nginx相互结合的优化调整

    在工作中遇到这样的情况 Tomcat为后台 nginx为反向代理 需要往后台导入数据,由于处理时间过长,导致访问时出现504和500  通过修改tomcat中maxParameterCount=&qu ...

  4. Most common words

    To find the most common words, we can apply the DSU pattern; most_common takes a histogram and retur ...

  5. 安卓开发--WebView

    package com.zhangxi.test01; import android.app.Activity;import android.app.ProgressDialog;import and ...

  6. java操作文件创建、删除

    java操作文件创建.删除: package test; import java.io.File; import java.io.IOException; import org.slf4j.Logge ...

  7. javascript常用收集一下

    事件源对象 event.srcElement.tagName event.srcElement.type 捕获释放 event.srcElement.setCapture(); event.srcEl ...

  8. UVa 216 Getting in Line【枚举排列】

    题意:给出n个点的坐标,(2<=n<=8),现在要使得这n个点连通,问最小的距离的和 因为n很小,所以可以直接枚举这n个数的排列,算每一个排列的距离的和, 保留下距离和最小的那个排列就可以 ...

  9. php--防止DDos攻击代码

    <?php //查询禁止IP $ip =$_SERVER['REMOTE_ADDR']; $fileht=".htaccess2"; if(!file_exists($fil ...

  10. 几个提高效率的PHOTOSHOP秘密快捷键

    1.拖动选择 使用矩形选框工具,在画布上拖动(不要松开鼠标),这时按住空格键,然后移动鼠标,你会发现选区也跟着移动了. 2.左右流量文档 按住Cmd(Ctrl)键,上下滚动鼠标,你会发现文档的滚动条在 ...