poj 3691 DNA repair(AC自己主动机+dp)
DNA repair
Description Biologists finally invent techniques of repairing DNA that contains segments causing kinds of inherited diseases. For the sake of simplicity, a DNA is represented as a string containing characters 'A', 'G' , 'C' and 'T'. The repairing techniques are simply You are to help the biologists to repair a DNA by changing least number of characters. Input
The input consists of multiple test cases. Each test case starts with a line containing one integers N (1 ≤ N ≤ 50), which is the number of DNA segments causing inherited diseases.
The following N lines gives N non-empty strings of length not greater than 20 containing only characters in "AGCT", which are the DNA segments causing inherited disease. The last line of the test case is a non-empty string of length not greater than 1000 containing only characters in "AGCT", which is the DNA to be repaired. The last test case is followed by a line containing one zeros. Output
For each test case, print a line containing the test case number( beginning with 1) followed by the
number of characters which need to be changed. If it's impossible to repair the given DNA, print -1. Sample Input
Sample Output
Source |
给定N个模式串(1 ≤ N ≤ 50) 最大长度为20,一个主串(长最大为1000),同意涉及的字符为4个 {'A','T','G','C'},求最少改动几个字符 使主串不包括全部模式串。
- #include <cstdio>
- #include <cstring>
- #include <algorithm>
- #include <queue>
- using namespace std;
- const int INF=0x3f3f3f3f;
- const int maxn=10010;
- const int bsz=26;
- typedef long long ll;
- char txt[maxn],cc[]="AGCT";
- bool vis[maxn];
- int ans;
- struct Trie
- {
- bool have[maxn]; // 该节点结尾是否包括单词
- int ch[maxn][bsz],val[maxn],sz,cnt[maxn]; // ch存Trie val-节点相应的单词 cnt-节点结尾单词个数
- int f[maxn],last[maxn];//f-失配指针 last-后缀链接
- int newnode()
- {
- val[sz]=0; cnt[sz]=0; have[sz]=0;
- memset(ch[sz],-1,sizeof ch[sz]);
- return sz++;
- }
- void init()
- {
- sz=0;
- newnode();
- }
- int idx(char c) // 取c的标号 详细看字符为什么
- {
- return c-'A';
- }
- void Insert(char *st,int id)
- {
- int u=0,n=strlen(st),c,i;
- for(i=0;i<n;i++)
- {
- c=idx(st[i]);
- if(ch[u][c]==-1)
- ch[u][c]=newnode();
- u=ch[u][c];
- }
- val[u]=id;
- cnt[u]++;
- have[u]=1;
- }
- void build()
- {
- int u=0,v,i;
- queue<int> q;
- f[0]=0;
- for(i=0;i<bsz;i++)
- {
- v=ch[u][i];
- if(v==-1) ch[u][i]=0;
- else
- {
- f[v]=0;
- q.push(v);
- }
- }
- while(!q.empty())
- {
- u=q.front();
- q.pop();
- last[u]=val[f[u]]?
- f[u]:last[f[u]];
- for(i=0;i<bsz;i++)
- {
- v=ch[u][i];
- if(v==-1) ch[u][i]=ch[f[u]][i]; // 将NULL变为有意义 沿着父亲失配指针走第一个有意义的节点
- else
- {
- f[v]=ch[f[u]][i];
- have[v]|=have[f[v]];
- q.push(v);
- }
- }
- }
- }
- bool Find(char *st,int m,int id)
- {
- int n=strlen(st),i,u=0,c,p,flag=0;
- // vis-可标记哪些单词出现过 相同的单词仅仅标记一个
- for(i=0;i<n;i++)
- {
- c=idx(st[i]);
- u=ch[u][c];
- p=val[u]?
- u:last[u];
- while(p)
- {
- vis[val[p]]=true;
- //if(val[p]){ ans+=cnt[p]; cnt[p]=0; }
- flag=1;
- p=last[p];
- }
- }
- if(!flag) return false;
- //能够将出现的单词标号输出
- // for(i=1;i<=m;i++)
- // if(vis[i])
- // {
- // vis[i]=0;
- // printf(" %d",i);
- // }
- // puts("");
- return true;
- }
- } ac;
- int dp[1005][1005];
- void solve()
- {
- int i,j,k,len=strlen(txt+1);
- int id,next;
- memset(dp,0x3f,sizeof(dp));
- dp[0][0]=0;
- for(i=0;i<len;i++)
- {
- for(j=0;j<ac.sz;j++)
- {
- if(dp[i][j]>=INF) continue ;
- for(k=0;k<4;k++)
- {
- id=ac.idx(cc[k]);
- next=ac.ch[j][id];
- if(ac.have[next]) continue ;
- if(cc[k]==txt[i+1])
- {
- dp[i+1][next]=min(dp[i+1][next],dp[i][j]);
- }
- else
- {
- dp[i+1][next]=min(dp[i+1][next],dp[i][j]+1);
- }
- }
- }
- }
- ans=INF;
- for(j=0;j<ac.sz;j++) ans=min(ans,dp[len][j]);
- if(ans>=INF) ans=-1;
- }
- int main()
- {
- int i,j,n,ca=0;
- while(~scanf("%d",&n))
- {
- if(n==0) break ;
- ac.init();
- for(i=1;i<=n;i++)
- {
- scanf("%s",txt);
- ac.Insert(txt,i);
- }
- ac.build();
- scanf("%s",txt+1);
- solve();
- printf("Case %d: %d\n",++ca,ans);
- }
- return 0;
- }
