Repeated DNA Sequences

All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.

Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.

Example:

Input: s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"

Output: ["AAAAACCCCC", "CCCCCAAAAA"]

通过对Dicsuss讨论的学习，发现有人说上面native方法超时是因为字符串存储浪费了太多的空间和时间，因此可以考虑用整数存储，即二进制方法。这个思路非常简单，这里一共有四个字母：A，C，G，T。我们转换整数的思路如下：

A = 00，C = 01，G = 10，T = 11。

int key = 0, key = key << 2 | code(A|C|G|T)。

时间复杂度高，应该是O(n^2)

class Solution {
    public List<String> findRepeatedDnaSequences(String s) {
        List<String> res = new ArrayList<>();
        Set<Integer> set = new HashSet<>();
        if(s == null || s.length() < 10){
            return res;
        }
        
        for(int i = 0; i < s.length()-9; i++){
            String subStr = s.substring(i,i+10);
            
            int key = hashcode(subStr);
            
            if(set.contains(key) && !res.contains(subStr)){
                res.add(subStr);
            }else{
                set.add(key);
            }
        }
        
        return res;
    }
    
    public int hashcode(String s){
        int hash = 0;
        
        for(int i =0 ; i < s.length();i++){
            hash = hash << 2 | mapValue(s.charAt(i));
        }
        
        return hash;
    }
    
    public int mapValue(char c){
        switch (c){
            case 'A':
            return 0;
        case 'C':
            return 1;
        case 'G':
            return 2;
        case 'T':
            return 3;
        default:
            return 0;     
        }
        
       
        
    }
}

o(n)

class Solution {
    public List<String> findRepeatedDnaSequences(String s) {

        
        List<String> res = new ArrayList<>();
        
        if(s == null || s.length() < 10){
            return res;
        }
        
        Map<String,Integer> map = new HashMap<>();
        
        for(int i = 0; i < s.length()-9;i++){
            String subStr = s.substring(i,i+10);
            if(!map.containsKey(subStr)){
                map.put(subStr,1);
            }else{
                if(map.get(subStr) == 1){
                    res.add(subStr);
                    map.put(subStr,2);
                }
            }
        }
        
        return res;
    }
}

PreviousExclusive Time of Functions NextValid Number

Last updated 6 years ago

class Solution { public List<String> findRepeatedDnaSequences(String s) { List<String> res = new ArrayList<>(); Set<Integer> set = new HashSet<>(); if(s == null || s.length() < 10){ return res; } for(int i = 0; i < s.length()-9; i++){ String subStr = s.substring(i,i+10); int key = hashcode(subStr); if(set.contains(key) && !res.contains(subStr)){ res.add(subStr); }else{ set.add(key); } } return res; } public int hashcode(String s){ int hash = 0; for(int i =0 ; i < s.length();i++){ hash = hash << 2 | mapValue(s.charAt(i)); } return hash; } public int mapValue(char c){ switch (c){ case 'A': return 0; case 'C': return 1; case 'G': return 2; case 'T': return 3; default: return 0; } } }

class Solution { public List<String> findRepeatedDnaSequences(String s) { List<String> res = new ArrayList<>(); if(s == null || s.length() < 10){ return res; } Map<String,Integer> map = new HashMap<>(); for(int i = 0; i < s.length()-9;i++){ String subStr = s.substring(i,i+10); if(!map.containsKey(subStr)){ map.put(subStr,1); }else{ if(map.get(subStr) == 1){ res.add(subStr); map.put(subStr,2); } } } return res; } }