All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.

Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.

For example,


  C++ 标准模板库不常用就容易忘,这个就是用hash map 做一个大表统计的,但是直接unordered_map<string, int > 这样会爆内存。
class Solution {
vector<string> findRepeatedDnaSequences(string s) {
unordered_map<string,int > mp;
int len = s.length(),curIdx = ;
string curStr;
vector<string >ret;
while(curIdx + <=len){
curStr = s.substr(curIdx,);
mp[curStr] = ;
curIdx ++;
return ret;

  处理方法是 可以是将其改为 unordered_map<int ,int >,通过 4进制的转换。另外更可以通过 bitset 再次降低内存,最后需要考虑重复问题,如果用 unordered_map 可以直接标记时候已经添加到返回vector 中了, 用 bitset 可以通过 临时变量 set<string> 存储,最后生成返回的  vector。

#include <iostream>
#include <string>
#include <vector>
#include <unordered_map>
#include <bitset>
#include <set>
using namespace std; //class Solution {
// vector<string> findRepeatedDnaSequences(string s) {
// unordered_map<string,int > mp;
// int len = s.length(),curIdx = 0;
// string curStr;
// vector<string >ret;
// while(curIdx + 10<=len){
// curStr = s.substr(curIdx,10);
// if(mp.find(curStr)!=mp.end()){
// ret.push_back(curStr);
// }
// else
// mp[curStr] = 1;
// curIdx ++;
// }
// return ret;
// }
//}; class Solution {
vector<string> findRepeatedDnaSequences(string s) {
bitset<> bst;
set<string > ret;
int sum=;
for(int i =;i<;i++)
sum = sum* + helpFun(s[i]);
for( int i=;i<s.length();i++){
sum = sum* + helpFun(s[i]);
return vector<string>(ret.begin(),ret.end());
} int helpFun(char c)
case 'A': return ;
case 'C': return ;
case 'G': return ;
case 'T': return ;
}; int main()
Solution sol;
vector<string > ret = sol.findRepeatedDnaSequences(s);
for(int i=;i<ret.size();i++)
return ;

