Compare commits
36 Commits
39 changed files with 1910 additions and 38 deletions
@ -0,0 +1,13 @@
@@ -0,0 +1,13 @@
|
||||
# Binaries for programs and plugins |
||||
*.exe |
||||
*.exe~ |
||||
*.dll |
||||
*.so |
||||
*.dylib |
||||
|
||||
# Test binary, build with `go test -c` |
||||
*.test |
||||
|
||||
# Output of the go coverage tool, specifically when used with LiteIDE |
||||
*.out |
||||
|
@ -0,0 +1,41 @@
@@ -0,0 +1,41 @@
|
||||
# Go-Rosalind |
||||
|
||||
Solving problems from Rosalind.info using Go |
||||
|
||||
## Organization |
||||
|
||||
Each chapter has its own directory. |
||||
|
||||
Within the chapter directory, each problem has |
||||
its own driver program, which prints info about |
||||
the problem, loads the input file from Rosalind, |
||||
and prints the solution. Each problem also has |
||||
its own test suite using the examples provided |
||||
on Rosalind.info. |
||||
|
||||
For example, the function that loads the |
||||
input file for problem BA1A is in `ba1a.go` |
||||
and the code to test the functionality |
||||
of the solution to BA1A is in `ba1a_test.go`. |
||||
|
||||
## Quick Start |
||||
|
||||
To run all the tests in a chapter directory: |
||||
|
||||
``` |
||||
go test -v |
||||
``` |
||||
|
||||
To run only a particular problem: |
||||
|
||||
1. Edit `main.go` to call the right method |
||||
for the right problem with the right input |
||||
file name. |
||||
|
||||
2. Run `main.go` using `go run`, and point Go |
||||
to all the relevant Go files: |
||||
|
||||
``` |
||||
go run main.go utils.go rosalind.go <name-of-BA-file> |
||||
``` |
||||
|
@ -1,38 +0,0 @@
@@ -1,38 +0,0 @@
|
||||
package main |
||||
|
||||
import "fmt" |
||||
|
||||
// Rosalind: Problem BA1A
|
||||
//
|
||||
// To run:
|
||||
//
|
||||
// $ go run ba1a.go
|
||||
|
||||
func pattern_count(input string, pattern string) int { |
||||
|
||||
// Number of substring overlaps
|
||||
var overlap = len(input) - len(pattern) + 1 |
||||
|
||||
// Count of occurrences
|
||||
count:=0 |
||||
|
||||
// Loop over each substring overlap
|
||||
for i:=0; i<overlap; i++ { |
||||
// Grab a slice of the full input
|
||||
start:=i |
||||
end:=i+len(pattern) |
||||
var slice = input[start:end] |
||||
if slice==pattern { |
||||
count += 1 |
||||
} |
||||
} |
||||
return count |
||||
} |
||||
|
||||
func main() { |
||||
// Call the pattern_count function
|
||||
fmt.Println("Number of occurrences of GCG in GCGCG:") |
||||
res := pattern_count("GCGCG","GCG") |
||||
fmt.Println(res) |
||||
} |
||||
|
@ -0,0 +1,73 @@
@@ -0,0 +1,73 @@
|
||||
# Chapter 1 |
||||
|
||||
In this chapter we perform basic operations with |
||||
strings and data structures. |
||||
|
||||
## How to run |
||||
|
||||
* Each problem has its own function |
||||
|
||||
* To run the code for a particular problem, |
||||
call the function for that problem in `main.go` |
||||
|
||||
* Edit `main.go` to call the right function, |
||||
and pass in the name of the input file you |
||||
want to use: for example, `BA1A("input.txt")` |
||||
|
||||
* The function you call is implemented in the |
||||
corresponding Go file (for example, `ba1a.go`). |
||||
It loads the inputs from the input file, |
||||
calls the right function with the inputs, |
||||
and prints the results. |
||||
|
||||
* The functions that load data from input files |
||||
are tested along with the functions themselves, |
||||
since each problem has a sample input file |
||||
in `data/` |
||||
|
||||
## Directory Layout |
||||
|
||||
* Each problem has one Go file and one test |
||||
|
||||
* The `data/` directory contains input files |
||||
for the tests (i.e., files that contain both |
||||
inputs and corresponding outputs) |
||||
|
||||
* The `for_real/` directory contains sample |
||||
input files from Rosalind.info for each |
||||
problem (i.e., files that contain only the |
||||
inputs) |
||||
|
||||
* The `main.go` file contains the `main()` |
||||
driver function and is the entrypoint for |
||||
`go run` |
||||
|
||||
* The `rosalind.go` file contains most of the |
||||
computational functionality implemented |
||||
for the problems. |
||||
|
||||
* The `utils.go` file contains utilties unrelated |
||||
to bioinformatics. |
||||
|
||||
## Compiling and Running |
||||
|
||||
To run all tests, `go test`: |
||||
|
||||
``` |
||||
go test -v |
||||
``` |
||||
|
||||
To run a specific problem, edit `main.go` |
||||
to call the corresponding problem's function |
||||
and then `go run`: |
||||
|
||||
``` |
||||
go run main.go utils.go rosalind.go <name of ba1 file.go> |
||||
``` |
||||
|
||||
## To Do |
||||
|
||||
Add a Snakefile |
||||
|
||||
|
||||
|
@ -0,0 +1,54 @@
@@ -0,0 +1,54 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"log" |
||||
) |
||||
|
||||
// Rosalind: Problem BA1A: Most Frequent k-mers
|
||||
|
||||
// Describe the problem
|
||||
func BA1ADescription() { |
||||
description := []string{ |
||||
"-----------------------------------------", |
||||
"Rosalind: Problem BA1A:", |
||||
"Most Frequest k-mers", |
||||
"", |
||||
"Given an input string and a length k,", |
||||
"report the k-mer or k-mers that occur", |
||||
"most frequently.", |
||||
"", |
||||
"URL: http://rosalind.info/problems/ba1a/", |
||||
"", |
||||
} |
||||
for _, line := range description { |
||||
fmt.Println(line) |
||||
} |
||||
} |
||||
|
||||
// Describe the problem,
|
||||
// print the name of the input file,
|
||||
// print the output/result
|
||||
func BA1A(filename string) { |
||||
|
||||
BA1ADescription() |
||||
|
||||
// Read the contents of the input file
|
||||
// into a single string
|
||||
lines, err := readLines(filename) |
||||
if err != nil { |
||||
log.Fatalf("readLines: %v",err) |
||||
} |
||||
|
||||
// Input file contents
|
||||
var input, pattern string |
||||
input = lines[0] |
||||
pattern = lines[1] |
||||
|
||||
result := PatternCount(input, pattern) |
||||
|
||||
fmt.Println("") |
||||
fmt.Printf("Computed result from input file: %s\n",filename) |
||||
fmt.Println(result) |
||||
} |
||||
|
@ -0,0 +1,99 @@
@@ -0,0 +1,99 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"log" |
||||
"strconv" |
||||
"testing" |
||||
) |
||||
|
||||
// To run this test:
|
||||
//
|
||||
// $ go test -v -run TestPatternCount
|
||||
|
||||
// Run a single test of the PatternCount function
|
||||
func TestPatternCount(t *testing.T) { |
||||
// Call the PatternCount function
|
||||
input := "GCGCG" |
||||
pattern := "GCG" |
||||
result := PatternCount(input,pattern) |
||||
gold := 2 |
||||
if result != gold { |
||||
err := fmt.Sprintf("Error testing PatternCount(): input = %s, pattern = %s, result = %d (should be %d)", |
||||
input, pattern, result, gold) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
|
||||
// Run a test matrix of the PatternCount function
|
||||
func TestMatrixPatternCount(t *testing.T) { |
||||
// Construct a test matrix
|
||||
var tests = []struct { |
||||
input string |
||||
pattern string |
||||
gold int |
||||
}{ |
||||
{"GCGCG", "GCG", 2}, |
||||
{"GAGGGGGGGAG", "AGG", 1}, |
||||
{"GCACGCACGCAC", "GCAC", 3}, |
||||
{"", "GC", 0}, |
||||
{"GCG", "GTACTCTC", 0}, |
||||
{"ACGTACGTACGT", "CG", 3}, |
||||
{"AAAGAGTGTCTGATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAATAATTACAGAGTACACAACATCCA", |
||||
"AAA", 4}, |
||||
{"AGCGTGCCGAAATATGCCGCCAGACCTGCTGCGGTGGCCTCGCCGACTTCACGGATGCCAAGTGCATAGAGGAAGCGAGCAAAGGTGGTTTCTTTCGCTTTATCCAGCGCGTTAACCACGTTCTGTGCCGACTTT", |
||||
"TTT", 4}, |
||||
{"GGACTTACTGACGTACG","ACT", 2}, |
||||
{"ATCCGATCCCATGCCCATG","CC", 5}, |
||||
{"CTGTTTTTGATCCATGATATGTTATCTCTCCGTCATCAGAAGAACAGTGACGGATCGCCCTCTCTCTTGGTCAGGCGACCGTTTGCCATAATGCCCATGCTTTCCAGCCAGCTCTCAAACTCCGGTGACTCGCGCAGGTTGAGT", |
||||
"CTC", 9}, |
||||
} |
||||
for _, test := range tests { |
||||
result := PatternCount(test.input, test.pattern) |
||||
if result != test.gold { |
||||
err := fmt.Sprintf("Error testing PatternCount(): input = %s, pattern = %s, result = %d (should be %d)", |
||||
test.input, test.pattern, result, test.gold) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
// Load a PatternCount test (input and output)
|
||||
// from a file. Run the test with the input
|
||||
// and verify the output matches the output
|
||||
// contained in the file.
|
||||
func TestPatternCountFile(t *testing.T) { |
||||
|
||||
filename := "data/pattern_count.txt" |
||||
|
||||
// Read the contents of the input file
|
||||
// into a single string
|
||||
lines, err := readLines(filename) |
||||
if err != nil { |
||||
log.Fatalf("readLines: %v",err) |
||||
} |
||||
|
||||
// lines[0]: Input
|
||||
input := lines[1] |
||||
pattern := lines[2] |
||||
|
||||
// lines[3]: Output
|
||||
output_str := lines[4] |
||||
|
||||
// Convert output to inteter
|
||||
output,err := strconv.Atoi(output_str) |
||||
if err!=nil { |
||||
t.Error(err) |
||||
} |
||||
|
||||
// Call the function with the given inputs
|
||||
result := PatternCount(input, pattern) |
||||
|
||||
// Verify answer
|
||||
if result != output { |
||||
err := fmt.Sprintf("Error testing PatternCount using test case from file: results do not match:\rcomputed result = %d\nexpected output = %d",result,output) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
|
@ -0,0 +1,58 @@
@@ -0,0 +1,58 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"log" |
||||
"strings" |
||||
"strconv" |
||||
) |
||||
|
||||
// Rosalind: Problem BA1B: Most Frequent k-mers
|
||||
|
||||
// Describe the problem
|
||||
func BA1BDescription() { |
||||
description := []string{ |
||||
"-----------------------------------------", |
||||
"Rosalind: Problem BA1B:", |
||||
"Most Frequest k-mers", |
||||
"", |
||||
"Given an input string and a length k,", |
||||
"report the k-mer or k-mers that occur", |
||||
"most frequently.", |
||||
"", |
||||
"URL: http://rosalind.info/problems/ba1b/", |
||||
"", |
||||
} |
||||
for _, line := range description { |
||||
fmt.Println(line) |
||||
} |
||||
} |
||||
|
||||
// Describe the problem, and call the function
|
||||
func BA1B(filename string) { |
||||
|
||||
BA1BDescription() |
||||
|
||||
// Read the contents of the input file
|
||||
// into a single string
|
||||
lines, err := readLines(filename) |
||||
if err != nil { |
||||
log.Fatalf("Error: readLines: %v",err) |
||||
} |
||||
|
||||
// Input file contents
|
||||
input := lines[0] |
||||
k_str := lines[1] |
||||
|
||||
k,err := strconv.Atoi(k_str) |
||||
if err!=nil { |
||||
log.Fatalf("Error: string to int conversion: %v",err) |
||||
} |
||||
|
||||
mfks,_ := MostFrequentKmers(input,k) |
||||
|
||||
fmt.Println("") |
||||
fmt.Printf("Computed result from input file: %s\n",filename) |
||||
fmt.Println(strings.Join(mfks," ")) |
||||
} |
||||
|
@ -0,0 +1,82 @@
@@ -0,0 +1,82 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"sort" |
||||
"strconv" |
||||
"strings" |
||||
"log" |
||||
"testing" |
||||
) |
||||
|
||||
// Run a test of the MostFrequentKmers function
|
||||
func TestMostFrequentKmers(t *testing.T) { |
||||
// Call MostFrequentKmers
|
||||
input := "AAAATGCGCTAGTAAAAGTCACTGAAAA" |
||||
k := 4 |
||||
result,err := MostFrequentKmers(input,k) |
||||
gold := []string{"AAAA"} |
||||
|
||||
if err!=nil { |
||||
t.Error(err) |
||||
} |
||||
|
||||
if !EqualStringSlices(result,gold) { |
||||
err := fmt.Sprintf("Error testing MostFrequentKmers(): input = %s, k = %d, result = %s (should be %s)", |
||||
input, k, result, gold) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
|
||||
// Run a test of the PatternCount function
|
||||
// using inputs/outputs from a file.
|
||||
func TestMostFrequentKmersFile(t *testing.T) { |
||||
|
||||
filename := "data/frequent_words.txt" |
||||
|
||||
// Read the contents of the input file
|
||||
// into a single string
|
||||
lines, err := readLines(filename) |
||||
if err != nil { |
||||
log.Fatalf("readLines: %v",err) |
||||
} |
||||
|
||||
// lines[0]: Input
|
||||
dna := lines[1] |
||||
k_str := lines[2] |
||||
// lines[3]: Output
|
||||
gold := strings.Split(lines[4]," ") |
||||
|
||||
// Convert k to integer
|
||||
k,err := strconv.Atoi(k_str) |
||||
if err!=nil { |
||||
t.Error(err) |
||||
} |
||||
|
||||
// Call the function with the given inputs
|
||||
result, err := MostFrequentKmers(dna,k) |
||||
|
||||
// Check if function threw error
|
||||
if err!=nil { |
||||
t.Error(err) |
||||
} |
||||
|
||||
// Check that there _was_ a result
|
||||
if len(result)==0 { |
||||
err := fmt.Sprintf("Error testing MostFrequentKmers using test case from file: length of most frequent kmers found was 0: %q", |
||||
result) |
||||
t.Error(err) |
||||
} |
||||
|
||||
// Sort before comparing
|
||||
sort.Strings(gold) |
||||
sort.Strings(result) |
||||
|
||||
// These will only be unequal if something went wrong
|
||||
if !EqualStringSlices(gold,result) { |
||||
err := fmt.Sprintf("Error testing MostFrequentKmers using test case from file: most frequent kmers mismatch.\ncomputed = %q\ngold = %q\n", |
||||
result,gold) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
|
@ -0,0 +1,50 @@
@@ -0,0 +1,50 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"log" |
||||
) |
||||
|
||||
// Rosalind: Problem BA1C: Find the Reverse Complement of a String
|
||||
|
||||
// Describe the problem
|
||||
func BA1CDescription() { |
||||
description := []string{ |
||||
"-----------------------------------------", |
||||
"Rosalind: Problem BA1C:", |
||||
"Find the Reverse Complement of a String", |
||||
"", |
||||
"Given a DNA input string,", |
||||
"find the reverse complement", |
||||
"of the DNA string.", |
||||
"", |
||||
"URL: http://rosalind.info/problems/ba1c/", |
||||
"", |
||||
} |
||||
for _, line := range description { |
||||
fmt.Println(line) |
||||
} |
||||
} |
||||
|
||||
// Describe the problem, and call the function
|
||||
func BA1C(filename string) { |
||||
|
||||
BA1CDescription() |
||||
|
||||
// Read the contents of the input file
|
||||
// into a single string
|
||||
lines, err := readLines(filename) |
||||
if err != nil { |
||||
log.Fatalf("Error: readLines: %v",err) |
||||
} |
||||
|
||||
// Input file contents
|
||||
input := lines[0] |
||||
|
||||
result,_ := ReverseComplement(input) |
||||
|
||||
fmt.Println("") |
||||
fmt.Printf("Computed result from input file: %s\n",filename) |
||||
fmt.Println(result) |
||||
} |
||||
|
@ -0,0 +1,123 @@
@@ -0,0 +1,123 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"testing" |
||||
) |
||||
|
||||
// Check that the DNA2Bitmasks utility
|
||||
// extracts the correct bitmasks from
|
||||
// a DNA input string.
|
||||
func TestDNA2Bitmasks(t *testing.T) { |
||||
|
||||
input := "AATCCGCT" |
||||
|
||||
result, func_err := DNA2Bitmasks(input) |
||||
|
||||
// Handle errors from in the DNA2Bitmasks function
|
||||
if func_err != nil { |
||||
err := fmt.Sprintf("Error in function DNA2Bitmasks(): input = %s", input) |
||||
t.Error(err) |
||||
} |
||||
|
||||
// Assemble gold standard answer (bitvectors)
|
||||
tt := true |
||||
ff := false |
||||
gold := make(map[string][]bool) |
||||
gold["A"] = []bool{tt,tt,ff,ff,ff,ff,ff,ff} |
||||
gold["T"] = []bool{ff,ff,tt,ff,ff,ff,ff,tt} |
||||
gold["C"] = []bool{ff,ff,ff,tt,tt,ff,tt,ff} |
||||
gold["G"] = []bool{ff,ff,ff,ff,ff,tt,ff,ff} |
||||
|
||||
// Verify result from DNA2Bitmasks is same as
|
||||
// our gold standard
|
||||
for _,cod := range "ATCG" { |
||||
cods := string(cod) |
||||
if !EqualBoolSlices(result[cods],gold[cods]) { |
||||
err := fmt.Sprintf("Error testing DNA2Bitmasks(): input = %s, codon = %s, extracted = %v, gold = %v", |
||||
input, cods, result[cods], gold[cods]) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
// Check that the Bitmasks2DNA utility
|
||||
// constructs the correct DNA string
|
||||
// from bitmasks.
|
||||
func TestBitmasks2DNA(t *testing.T) { |
||||
// Assemble input bitmasks
|
||||
tt := true |
||||
ff := false |
||||
input := make(map[string][]bool) |
||||
input["A"] = []bool{tt,tt,ff,ff,ff,ff,ff,ff} |
||||
input["T"] = []bool{ff,ff,tt,ff,ff,ff,ff,tt} |
||||
input["C"] = []bool{ff,ff,ff,tt,tt,ff,tt,ff} |
||||
input["G"] = []bool{ff,ff,ff,ff,ff,tt,ff,ff} |
||||
|
||||
gold := "AATCCGCT" |
||||
|
||||
result, func_err := Bitmasks2DNA(input) |
||||
|
||||
// Handle errors from in the DNA2Bitmasks function
|
||||
if func_err != nil { |
||||
err := fmt.Sprintf("Error in function Bitmasks2DNA(): function returned error") |
||||
t.Error(err) |
||||
} |
||||
|
||||
// Verify result from DNA2Bitmasks is same as
|
||||
// our gold standard
|
||||
if result != gold { |
||||
err := fmt.Sprintf("Error testing Bitmasks2DNA(): result = %s, gold = %s", result, gold) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
|
||||
// Run a test of the function that computes
|
||||
// the ReverseComplement of a DNA string.
|
||||
func TestReverseComplement(t *testing.T) { |
||||
input := "AAAACCCGGT" |
||||
result,_ := ReverseComplement(input) |
||||
gold := "ACCGGGTTTT" |
||||
if result!=gold { |
||||
err := fmt.Sprintf("Error testing ReverseComplement(): input = %s, result = %s (should be %s)", |
||||
input, result, gold) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
|
||||
|
||||
// Run a test of the ReverseComplement function
|
||||
// using inputs/outputs from a file.
|
||||
func TestReverseComplementFile(t *testing.T) { |
||||
|
||||
filename := "data/reverse_complement.txt" |
||||
|
||||
// Read the contents of the input file
|
||||
// into a single string
|
||||
lines, err := readLines(filename) |
||||
if err != nil { |
||||
t.Error(err) |
||||
} |
||||
|
||||
// lines[0]: Input
|
||||
input := lines[1] |
||||
// lines[2]: Output
|
||||
gold := lines[3] |
||||
|
||||
// Call the function with the given inputs
|
||||
result, err := ReverseComplement(input) |
||||
|
||||
// Check that there _was_ a result
|
||||
if len(result)==0 { |
||||
err := fmt.Sprintf("Error testing ReverseComplement using test case from file") |
||||
t.Error(err) |
||||
} |
||||
|
||||
if result!=gold { |
||||
err := fmt.Sprintf("Error testing ReverseComplement(): input = %s, result = %s (should be %s)", |
||||
input, result, gold) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
|
@ -0,0 +1,61 @@
@@ -0,0 +1,61 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"strconv" |
||||
"strings" |
||||
"log" |
||||
) |
||||
|
||||
// Rosalind: Problem BA1D: Find all occurrences of pattern in string
|
||||
|
||||
// Describe the problem
|
||||
func BA1DDescription() { |
||||
description := []string{ |
||||
"-----------------------------------------", |
||||
"Rosalind: Problem BA1D:", |
||||
"Find all occurrences of pattern in string", |
||||
"", |
||||
"Given a string input (genome) and a substring (pattern),", |
||||
"return all starting positions in the genome where the", |
||||
"pattern occurs in the genome.", |
||||
"", |
||||
"URL: http://rosalind.info/problems/ba1d/", |
||||
"", |
||||
} |
||||
for _, line := range description { |
||||
fmt.Println(line) |
||||
} |
||||
} |
||||
|
||||
|
||||
// Describe the problem, and call the function
|
||||
func BA1D(filename string) { |
||||
|
||||
BA1DDescription() |
||||
|
||||
// Read the contents of the input file
|
||||
// into a single string
|
||||
lines, err := readLines(filename) |
||||
if err != nil { |
||||
log.Fatalf("Error: readLines: %v",err) |
||||
} |
||||
|
||||
// Input file contents
|
||||
pattern := lines[0] |
||||
genome := lines[1] |
||||
|
||||
// Result is a slice of ints
|
||||
locs,_ := FindOccurrences(pattern,genome) |
||||
|
||||
// Convert to a slice of strings for easier printing
|
||||
locs_str := make([]string,len(locs)) |
||||
for i,j := range locs { |
||||
locs_str[i] = strconv.Itoa(j) |
||||
} |
||||
|
||||
fmt.Println("") |
||||
fmt.Printf("Computed result from input file: %s\n",filename) |
||||
fmt.Println(strings.Join(locs_str," ")) |
||||
} |
||||
|
@ -0,0 +1,97 @@
@@ -0,0 +1,97 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"log" |
||||
"strings" |
||||
"strconv" |
||||
"testing" |
||||
) |
||||
|
||||
func TestFindOccurrences(t *testing.T) { |
||||
// Call FindOccurrences
|
||||
pattern := "ATAT" |
||||
genome := "GATATATGCATATACTT" |
||||
|
||||
result,err := FindOccurrences(pattern,genome) |
||||
gold := []int{1,3,9} |
||||
|
||||
if !EqualIntSlices(result,gold) || err!=nil { |
||||
err := fmt.Sprintf("Error testing FindOccurrences(): result = %q, should be %q", |
||||
result, gold) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
|
||||
func TestFindOccurrencesDebug(t *testing.T) { |
||||
// Construct a test matrix
|
||||
var tests = []struct { |
||||
pattern string |
||||
genome string |
||||
gold []int |
||||
}{ |
||||
{"ACAC", "TTTTACACTTTTTTGTGTAAAAA", |
||||
[]int{4}}, |
||||
{"AAA", "AAAGAGTGTCTGATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAATAATTACAGAGTACACAACATCCAT", |
||||
[]int{0,46,51,74}}, |
||||
{"TTT", "AGCGTGCCGAAATATGCCGCCAGACCTGCTGCGGTGGCCTCGCCGACTTCACGGATGCCAAGTGCATAGAGGAAGCGAGCAAAGGTGGTTTCTTTCGCTTTATCCAGCGCGTTAACCACGTTCTGTGCCGACTTT", |
||||
[]int{88,92,98,132}}, |
||||
{"ATA", "ATATATA", |
||||
[]int{0,2,4}}, |
||||
} |
||||
for _, test := range tests { |
||||
|
||||
result,err := FindOccurrences(test.pattern, test.genome) |
||||
|
||||
if err!=nil { |
||||
t.Error(err) |
||||
} |
||||
|
||||
if !EqualIntSlices(result,test.gold) { |
||||
err := fmt.Sprintf("Error testing FindOccurrences(): result = %q, should be %q", |
||||
result, test.gold) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
} |
||||
|
||||
func TestFindOccurrencesFiles(t *testing.T) { |
||||
|
||||
filename := "data/pattern_matching.txt" |
||||
|
||||
// Read the contents of the input file
|
||||
// into a single string
|
||||
lines, err := readLines(filename) |
||||
if err != nil { |
||||
log.Fatalf("Error: readLines: %v",err) |
||||
} |
||||
|
||||
// lines[0]: Input
|
||||
pattern := lines[1] |
||||
genome := lines[2] |
||||
|
||||
// lines[3]: Output
|
||||
gold_str := lines[4] |
||||
gold_slice := strings.Split(gold_str," ") |
||||
|
||||
gold := make([]int,len(gold_slice)) |
||||
for i,g := range gold_slice { |
||||
gold[i],err = strconv.Atoi(g) |
||||
if err!=nil { |
||||
t.Error(err) |
||||
} |
||||
} |
||||
|
||||
result,err := FindOccurrences(pattern,genome) |
||||
|
||||
if err!=nil { |
||||
t.Error(err) |
||||
} |
||||
|
||||
if !EqualIntSlices(result,gold) { |
||||
err := fmt.Sprintf("Error testing FindOccurrences():\nresult = %v\ngold = %v\n", |
||||
result, gold) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
|
@ -0,0 +1,58 @@
@@ -0,0 +1,58 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"log" |
||||
"strings" |
||||
"strconv" |
||||
) |
||||
|
||||
// Rosalind: Problem BA1E: Find patterns forming clumps in a string
|
||||
|
||||
// Describe the problem
|
||||
func BA1EDescription() { |
||||
description := []string{ |
||||
"-----------------------------------------", |
||||
"Rosalind: Problem BA1E:", |
||||
"Find patterns forming clumps in a string", |
||||
"", |
||||
"A clump is characterized by integers L and t", |
||||
"if there is an interval in the genome of length L", |
||||
"in which a given pattern occurs t or more times.", |
||||
"", |
||||
"URL: http://rosalind.info/problems/ba1e/", |
||||
"", |
||||
} |
||||
for _, line := range description { |
||||
fmt.Println(line) |
||||
} |
||||
} |
||||
|
||||
// Describe the problem, and call the function
|
||||
func BA1E(filename string) { |
||||
|
||||
BA1EDescription() |
||||
|
||||
// Read the contents of the input file
|
||||
// into a single string
|
||||
lines, err := readLines(filename) |
||||
if err != nil { |
||||
log.Fatalf("Error: readLines: %v",err) |
||||
} |
||||
|
||||
// Input file contents
|
||||
genome := lines[0] |
||||
params_str := lines[1] |
||||
params_slice := strings.Split(params_str," ") |
||||
|
||||
k,_ := strconv.Atoi(params_slice[0]) |
||||
L,_ := strconv.Atoi(params_slice[1]) |
||||
t,_ := strconv.Atoi(params_slice[2]) |
||||
|
||||
patterns,_ := FindClumps(genome,k,L,t) |
||||
|
||||
fmt.Println("") |
||||
fmt.Printf("Computed result from input file: %s\n",filename) |
||||
fmt.Println(strings.Join(patterns," ")) |
||||
} |
||||
|
@ -0,0 +1,42 @@
@@ -0,0 +1,42 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"testing" |
||||
) |
||||
|
||||
func TestMatrixFindClumps(t *testing.T) { |
||||
var tests = []struct { |
||||
genome string |
||||
k int |
||||
L int |
||||
t int |
||||
gold []string |
||||
}{ |
||||
{"CGGACTCGACAGATGTGAAGAACGACAATGTGAAGACTCGACACGACAGAGTGAAGAGAAGAGGAAACATTGTAA", |
||||
5, 50, 4, |
||||
[]string{"CGACA","GAAGA"}}, |
||||
{"AAAACGTCGAAAAA", |
||||
2, 4, 2, |
||||
[]string{"AA"}}, |
||||
{"ACGTACGT", |
||||
1, 5, 2, |
||||
[]string{"A","C","G","T"}}, |
||||
{"CCACGCGGTGTACGCTGCAAAAAGCCTTGCTGAATCAAATAAGGTTCCAGCACATCCTCAATGGTTTCACGTTCTTCGCCAATGGCTGCCGCCAGGTTATCCAGACCTACAGGTCCACCAAAGAACTTATCGATTACCGCCAGCAACAATTTGCGGTCCATATAATCGAAACCTTCAGCATCGACATTCAACATATCCAGCG", |
||||
3, 25, 3, |
||||
[]string{"AAA","CAG","CAT","CCA","GCC","TTC"}}, |
||||
|
||||
} |
||||
for _, test := range tests { |
||||
result,err := FindClumps(test.genome, |
||||
test.k, test.L, test.t) |
||||
if err!=nil { |
||||
t.Error(err) |
||||
} |
||||
if !EqualStringSlices(result,test.gold) { |
||||
err := fmt.Sprintf("Error testing FindClumps(): k = %d, L = %d, t = %d",test.k,test.L,test.t) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
} |
||||
|
@ -0,0 +1,60 @@
@@ -0,0 +1,60 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"strings" |
||||
"strconv" |
||||
"log" |
||||
) |
||||
|
||||
// Rosalind: Problem BA1F: Find positions in a gene that minimizing skew
|
||||
|
||||
// Describe the problem
|
||||
func BA1FDescription() { |
||||
description := []string{ |
||||
"-----------------------------------------", |
||||
"Rosalind: Problem BA1F:", |
||||
"Find positions in a gene that minimize skew", |
||||
"", |
||||
"The skew of a genome is defined as the difference", |
||||
"between the number of C codons and the number of G", |
||||
"codons. Given a DNA string, this function should", |
||||
"compute the cumulative skew for each position in", |
||||
"the genome, and report the indices where the skew", |
||||
"value is minimzed.", |
||||
"", |
||||
"URL: http://rosalind.info/problems/ba1f/", |
||||
"", |
||||
} |
||||
for _, line := range description { |
||||
fmt.Println(line) |
||||
} |
||||
} |
||||
|
||||
// Describe the problem, and call the function
|
||||
func BA1F(filename string) { |
||||
|
||||
BA1FDescription() |
||||
|
||||
// Read the contents of the input file
|
||||
// into a single string
|
||||
lines, err := readLines(filename) |
||||
if err != nil { |
||||
log.Fatalf("Error: readLines: %v",err) |
||||
} |
||||
|
||||
// Input file contents
|
||||
genome := lines[0] |
||||
|
||||
minskew,_ := MinSkewPositions(genome) |
||||
|
||||
minskew_str := make([]string,len(minskew)) |
||||
for i,j := range minskew { |
||||
minskew_str[i] = strconv.Itoa(j) |
||||
} |
||||
|
||||
fmt.Println("") |
||||
fmt.Printf("Computed result from input file: %s\n",filename) |
||||
fmt.Println(strings.Join(minskew_str," ")) |
||||
} |
||||
|
@ -0,0 +1,53 @@
@@ -0,0 +1,53 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"sort" |
||||
"testing" |
||||
) |
||||
|
||||
func TestMatrixMinSkewPosition(t *testing.T) { |
||||
var tests = []struct { |
||||
genome string |
||||
gold []int |
||||
}{ |
||||
{"CCTATCGGTGGATTAGCATGTCCCTGTACGTTTCGCCGCGAACTAGTTCACACGGCTTGATGGCAAATGGTTTTTCCGGCGACCGTAATCGTCCACCGAG", |
||||
[]int{53, 97}}, |
||||
{"TAAAGACTGCCGAGAGGCCAACACGAGTGCTAGAACGAGGGGCGTAAACGCGGGTCCGA", |
||||
[]int{11, 24}}, |
||||
{"ACCG", |
||||
[]int{3}}, |
||||
{"ACCC", |
||||
[]int{4}}, |
||||
{"CCGGGT", |
||||
[]int{2}}, |
||||
{"CCGGCCGG", |
||||
[]int{2,6}}, |
||||
} |
||||
for _, test := range tests { |
||||
|
||||
// Do it - find the positions that minimize skew
|
||||
result,err := MinSkewPositions(test.genome) |
||||
if err!=nil { |
||||
t.Error(err) |
||||
} |
||||
|
||||
// Check length of result
|
||||
if len(result)!=len(test.gold) { |
||||
err := fmt.Sprintf("Error testing MinSkewPositions():\nfor genome: %s\nlength of result (%d) did not match length of gold standard (%d).\nFound: %v\nShould be: %v", |
||||
test.genome, len(result), len(test.gold), |
||||
result, test.gold) |
||||
t.Error(err) |
||||
} |
||||
|
||||
// Sort before comparing
|
||||
sort.Ints(result) |
||||
sort.Ints(test.gold) |
||||
if !EqualIntSlices(result,test.gold) { |
||||
err := fmt.Sprintf("Error testing MinSkewPositions():\nfor genome: %s\nfound: %v\nshould be: %v", |
||||
test.genome, result, test.gold) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
} |
||||
|
@ -0,0 +1,52 @@
@@ -0,0 +1,52 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"log" |
||||
) |
||||
|
||||
// Rosalind: Problem BA1G: Find Hamming distance between two DNA strings
|
||||
|
||||
// Describe the problem
|
||||
func BA1GDescription() { |
||||
description := []string{ |
||||
"-----------------------------------------", |
||||
"Rosalind: Problem BA1G:", |
||||
"Find Hamming distance between two DNA strings", |
||||
"", |
||||
"The Hamming distance between two strings HammingDistance(p,q)", |
||||
"is the number of characters different between the two", |
||||
"strands. This program computes the Hamming distance", |
||||
"between two strings.", |
||||
"", |
||||
"URL: http://rosalind.info/problems/ba1g/", |
||||
"", |
||||
} |
||||
for _, line := range description { |
||||
fmt.Println(line) |
||||
} |
||||
} |
||||
|
||||
// Describe the problem, and call the function
|
||||
func BA1G(filename string) { |
||||
|
||||
BA1GDescription() |
||||
|
||||
// Read the contents of the input file
|
||||
// into a single string
|
||||
lines, err := readLines(filename) |
||||
if err != nil { |
||||
log.Fatalf("Error: readLines: %v",err) |
||||
} |
||||
|
||||
// Input file contents
|
||||
p := lines[0] |
||||
q := lines[1] |
||||
|
||||
hamm,_ := HammingDistance(p,q) |
||||
|
||||
fmt.Println("") |
||||
fmt.Printf("Computed result from input file: %s\n",filename) |
||||
fmt.Println(hamm) |
||||
} |
||||
|
@ -0,0 +1,49 @@
@@ -0,0 +1,49 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"testing" |
||||
) |
||||
|
||||
func TestMatrixHammingDistance(t *testing.T) { |
||||
var tests = []struct { |
||||
p string |
||||
q string |
||||
dist int |
||||
}{ |
||||
{"GGGCCGTTGGT", |
||||
"GGACCGTTGAC", |
||||
3 }, |
||||
{"AAAA", |
||||
"TTTT", |
||||
4 }, |
||||
{"ACGTACGT", |
||||
"TACGTACG", |
||||
8 }, |
||||
{"ACGTACGT", |
||||
"CCCCCCCC", |
||||
6 }, |
||||
{"ACGTACGT", |
||||
"TGCATGCA", |
||||
8 }, |
||||
{"GATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATAC", |
||||
"AATAGCAGCTTCTCAACTGGTTACCTCGTATGAGTAAATTAGGTCATTATTGACTCAGGTCACTAACGTC", |
||||
15 }, |
||||
{"AGAAACAGACCGCTATGTTCAACGATTTGTTTTATCTCGTCACCGGGATATTGCGGCCACTCATCGGTCAGTTGATTACGCAGGGCGTAAATCGCCAGAATCAGGCTG", |
||||
"AGAAACCCACCGCTAAAAACAACGATTTGCGTAGTCAGGTCACCGGGATATTGCGGCCACTAAGGCCTTGGATGATTACGCAGAACGTATTGACCCAGAATCAGGCTC", |
||||
28 }, |
||||
} |
||||
for _, test := range tests { |
||||
result,err := HammingDistance(test.p, test.q) |
||||
if err!=nil { |
||||
t.Error(err) |
||||
} |
||||
if result!=test.dist { |
||||
err := fmt.Sprintf("Error testing HammingDistance(): computed dist = %d (should be %d)\np = %s\nq = %s\n", |
||||
result, test.dist, |
||||
test.p, test.q) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
} |
||||
|
@ -0,0 +1,65 @@
@@ -0,0 +1,65 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"strconv" |
||||
"strings" |
||||
"log" |
||||
) |
||||
|
||||
// Rosalind: Problem BA1H: Find approximate occurrences of pattern in string
|
||||
|
||||
// Describe the problem
|
||||
func BA1HDescription() { |
||||
description := []string{ |
||||
"-----------------------------------------", |
||||
"Rosalind: Problem BA1H:", |
||||
"Find approximate occurrences of pattern in string", |
||||
"", |
||||
"Given a string Text and a string Pattern, and a maximum", |
||||
"Hamming distance d, return all locations in Text where", |
||||
"there is an approximate match with Pattern (i.e., a pattern", |
||||
"with a Hamming distance from Pattern of d or less).", |
||||
"", |
||||
"URL: http://rosalind.info/problems/ba1h/", |
||||
"", |
||||
} |
||||
for _, line := range description { |
||||
fmt.Println(line) |
||||
} |
||||
} |
||||
|
||||
// Describe the problem, and call the function
|
||||
func BA1H(filename string) { |
||||
|
||||
BA1HDescription() |
||||
|
||||
// Read the contents of the input file
|
||||
// into a single string
|
||||
lines, err := readLines(filename) |
||||
if err != nil { |
||||
log.Fatalf("Error: readLines: %v",err) |
||||
} |
||||
|
||||
// Input file contents
|
||||
pattern := lines[0] |
||||
text := lines[1] |
||||
d_str := lines[2] |
||||
|
||||
d,_ := strconv.Atoi(d_str) |
||||
|
||||
approx,_ := FindApproximateOccurrences(pattern,text,d) |
||||
|
||||
approx_str := make([]string,len(approx)) |
||||
for i,j := range approx { |
||||
approx_str[i] = strconv.Itoa(j) |
||||
if err!=nil { |
||||
log.Fatalf("Error: conversion from int to string: %v",err) |
||||
} |
||||
} |
||||
|
||||
fmt.Println("") |
||||
fmt.Printf("Computed result from input file: %s\n",filename) |
||||
fmt.Println(strings.Join(approx_str," ")) |
||||
} |
||||
|
@ -0,0 +1,56 @@
@@ -0,0 +1,56 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"testing" |
||||
) |
||||
|
||||
func TestMatrixApproximateOccurrences(t *testing.T) { |
||||
var tests = []struct { |
||||
pattern string |
||||
text string |
||||
d int |
||||
gold []int |
||||
}{ |
||||
{"ATTCTGGA", |
||||
"CGCCCGAATCCAGAACGCATTCCCATATTTCGGGACCACTGGCCTCCACGGTACGGACGTCAATCAAATGCCTAGCGGCTTGTGGTTTCTCCTACGCTCC", |
||||
3, |
||||
[]int{6, 7, 26, 27, 78}}, |
||||
{"AAA", |
||||
"TTTTTTAAATTTTAAATTTTTT", |
||||
2, |
||||
[]int{4, 5, 6, 7, 8, 11, 12, 13, 14, 15}}, |
||||
{"GAGCGCTGG", |
||||
"GAGCGCTGGGTTAACTCGCTACTTCCCGACGAGCGCTGTGGCGCAAATTGGCGATGAAACTGCAGAGAGAACTGGTCATCCAACTGAATTCTCCCCGCTATCGCATTTTGATGCGCGCCGCGTCGATT", |
||||
2, |
||||
[]int{0, 30, 66}}, |
||||
{"AATCCTTTCA", |
||||
"CCAAATCCCCTCATGGCATGCATTCCCGCAGTATTTAATCCTTTCATTCTGCATATAAGTAGTGAAGGTATAGAAACCCGTTCAAGCCCGCAGCGGTAAAACCGAGAACCATGATGAATGCACGGCGATTGCGCCATAATCCAAACA", |
||||
3, |
||||
[]int{3, 36, 74, 137}}, |
||||
{"CCGTCATCC", |
||||
"CCGTCATCCGTCATCCTCGCCACGTTGGCATGCATTCCGTCATCCCGTCAGGCATACTTCTGCATATAAGTACAAACATCCGTCATGTCAAAGGGAGCCCGCAGCGGTAAAACCGAGAACCATGATGAATGCACGGCGATTGC", |
||||
3, |
||||
[]int{0, 7, 36, 44, 48, 72, 79, 112}}, |
||||
{"TTT", |
||||
"AAAAAA", |
||||
3, |
||||
[]int{0, 1, 2, 3}}, |
||||
{"CCA", |
||||
"CCACCT", |
||||
0, |
||||
[]int{0}}, |
||||
} |
||||
for _, test := range tests { |
||||
result,err := FindApproximateOccurrences(test.pattern, test.text, test.d) |
||||
if err!=nil { |
||||
t.Error(err) |
||||
} |
||||
if !EqualIntSlices(result, test.gold) { |
||||
err := fmt.Sprintf("Error testing FindApproximateOccurrences:\ncomputed = %v\ngold = %v", |
||||
result, test.gold) |
||||
t.Error(err) |
||||
} |
||||
} |
||||
} |
||||
|
@ -0,0 +1,5 @@
@@ -0,0 +1,5 @@
|
||||
Input |
||||
GCGGTTATGCACCGTTCAAATTAGCAAACCACTAAGCGACGTAGTCTGGATTGATTTCTCCCTACCAGTGACCCAAGACGCGTTAGTGAGTTAAGTTCATATCCAGTACCTGCCGCCCTCTGTACTTGGGCGTCCGATTCGCATGCTTACTCAGGTGGAGGACACGATAATCTGATTAAACTGAGCTAAACCAGGTGGAACCAGAAACCAGGTGGGGAGTCTCGCTTCAAGCCGTTCTTGCGATCAAACCAGGTGGTCCATTATGAAACCAGGTGGCTAAACCAGGTGGTCCAGATCCTCGAATGATGTCGGTGCACATCAAAACCAGGTGGGGTGGTGGAACGTAAAACCAGGTGGCATAAACCAGGTGGGCCGGTTCGTAAACCAGGTGAAACCAGGTGGGGTGGAAACCAGGTGGGTTACAAATTACGTTGAGATGGCCCAAACCAGGTGGTGGGCTTCACCCATGTCAACAAACCACCCTATGGAACTAAACCAGGTGGAACCAGGTGGTGAAGGCTTATCCTCAGGAAAAACCAGGTGGAGGTGGTGAAATAAAACCAGGTGGACCAGGTGGATAACCCTCGCCTCGCTTCTCAACCGAGACCTGGATAAACCAGGTGGGGTGGTCCACCGATTTTTGAGACACTAGAAACCAGGTGGGCGGGGAAACCAGGTGGCAAACCAGGTGGGGTGGACGGAAACCAGGTGGATATGTCATAAAACCAAACCAGGTGGTGCACCCCCATGGTGTGTCTTATCCGTGCGTATAAACCAGGTGGTCGCACGGCTTCCACTTGCTGAGAATAGGCCCGCAGGGTCAGTGCCATGCCCTCCGTCACTCGATATGTGTTGTAAGAGTGGTTACCCCTTCATTGAAGTCGCCCACAGCCCCACCTGCATTGCTAGACTATCACCCTACAGTAGGCCTTTTCGCCTTCTTCAAGCAGCAATCTCTTATCCGCGGATGGGCGCGGCGAGCGTGGCGTCCCCGAACATTTTTACCTAACGTGTTTTGTTGGCCGCAAGCCTTCCCTCTAGTCCACCTCAGCCATTCAGCCTAGTAGCTTTCAAGCCGAGCCTTCCATATCTAATGGACCGTCCAGAATTTCACACGTTTCACAGGGCTGTGTTCGACCGCCCGTAATGCTGTTTCACAGGCGATCGCCTTGCGGTTTTTTCACAGATCGCAGCCGATGGACATGCCAACTCGATTTTCACAGAGTTTTTCACAGCGGTTTCACAGCACAGCAGTGATTGTTTCACAGCAATTTTCACTTTCACAGGGGCCCTTTTCACAGCTCAGGGCTCTTTTCACTTTCACAGTTTCACAGCGCTCCTTTCACAGAGCGGGGAAATTTAAGGGAACACTCAAGGGAACAAGGGAACACACAAAGGGAACACAACACAACACATAAGGGAACACTTTCACAGAACACAAAAGTCCGAAATCATCAGCGGCGAAGGGATTTCACAGACAGACACTTTCACAGCGCATTTCACAGATACGTACTTTCACAGGCGTACTTTCACAGACTTTCACAGAGGACAAGCTCAATTTTCACAGACAGGCTGGATAAATTTCACAGCGGTAAGGGTTTCACAGCACACATAAGGGAACACGAATTTCACAGCAGGGAACACCTCTACGAGTAATCTATTACTCTACCTACTGAAGGGAACACACCGAAGACCTACTATTACCTATTACTCTTAAAGGGAACACATTACAAGGGAACACACTCTCTCGTCATATCTCACCTCTCTATTACTCTTAAGGGAACACCTTCTCGATCAACCTATTACTCTATGGAGATAGAGATATTCCAGACATATGGAGATAACATGGAGATATGGAGATAATGGAGATGGAGATAGCTCTTATATTTATCCTATGGAGATATGATACTATTAATGGAGATAATTCTAATGGAGATATAATTACTCTAAGAGGATGGGATCTCGGGCTATTACTCTAATGGAGATAAGCACTATTACTCTAGGAAATGGAGATATGTCAATGGAGATATGTAATGGAGATAGAGGGAGATGGAGTCGCCATTTCATAATCGCCATTTCATAGTTCAGGAATCGCCATTTCCGCCATTTCTAAGATGGAGTCGCCATTTCTACGTATGGAGATAGGATCGCCATTTCATACGACCCGTTGGATATCGCCATTTCCTCGCCATTTCTGGTGACATTTCTCGCCATTTCATTTCTGGAGATAGATGGATCTCGCCATTTCATAGGAATCGCCATTTCCACGTAGGGGGGGCCACAATCCGTAGGTCGGAATTCAGACTCGCCATTTCCCATCGCCATTTCTTCACCTGTATGCCGATCCCTTCGCCATTTCTCATGGAGATAACTCTCTCTCGCCATTTCTCGCCATTTCCATTTCACTCTCATTCGCCATCGCCATTTCCATTCGCCATTTCATCGCCATTTCTTCAGGATAAGATATCGCCATTTCGACTCTCATTCGCATACTGACTCTCATTCTCATCTCGCCATTTCTCATCTGACTCTCATCCTGGGGGAAACTTGCGACTCTCATCACACTTCCGTCGACTCTCATACTGGCGGATAGCATAGGAGCCATTTAAAGACTCTCATTCTCATTCGAGACTCTCATTCAAATCCTACGAGGACTCTCATATAGACTCTCATATCATTACGAGGACTCTCATATACGAGCCATGCATGTGGCGACGACTCTCATCTACGAGCCATGCAAGCAGAATCTACGAGCGACTCTCATTACGAGCCATGTGACCGTACGAGCCATGCATGCATGCCATGCTGACTCTCATCGAGTACGAGCCATGGAAGTTCTTGTTGGTTCGTAGCCCAAGAGCTGAAGTTACGAGCCTACGAGCCATGAAGTTACTTTTACGAGCCATGAAGCTTACGATACGAGCCATGCGAGCCATGCATCCGCGCTACGAGCCATGTTCCAGTACGAGCCATGTTAGTTGCTGAAGTTAAGTTTGGCGCTGAAGTTTGTACGAGCCATGTGCCCGCTGAAGTTTGTTGTACGAGCCATGCATGCTGAAGTTAATGGCTGAAGTTAGCGTTTGCGGGCAGATCCTCATTCTACGATACGAGCCATGCCATGCAGCTGAAGTTAAGTTGGGTTACGAGCCATGCGAGCCATGTGAAGTACGAGCCATGCTGGCTGAAGTTGTTTGTGCTGCTGAAGTTGCTCTTGTCTCTAGCTGAAGTTGCCAACAGGGCTGAAGCTGAAGTTTAAGCTGAAGTTGCGAGCAGGCTGAAGTTATCGGATTGGGGCTGAAGTTCAACCTCCCGTCCCCCCACACTATATTCCCGTCCCCCCCCGCGCACGCGCCGTCTCCCGTCCCCCCTATCCCGTGCGCACGCGACGCGATCCCGTCCCCCCAGAGTGCGCGCACGCGTCCCCCTTCCCGTCCCCCTCTCCCGGGCGCACGCGTCGCTCAACATTTCCGCGCACGCGTCGCGCACGCGGGCGCACGCGGGTCCCGTCCCCCCCCCTCTTCGGCGCACGCGGAATTCCCGTCGCGCACGCGTCCCGTCCCGCGCACGCGTCGCGCACGCGACTGCCCTAACCAACAGTGCGCACGCGCCGGTAACCCGGTAACCCGGTAACCGCGCACGCGGGCGCACGCGCGTAACCCGCGCACGCGCCGCGCACGCGGCCCGGTTCCCGTCCCCCCCGGTAACCCGGTAACTCCCGTCCCCCGTAACCCGGTGCGCACGCGCCCGGCGCACGCGGAGCGCACGCGCCCCCCCCGGTAATAGCGCACGCGCCCGGGCGCACGCGCCCGGTAACCCGGTAACCCGGGCGCGCGCACGCGGCGGCGCACGCGGCGCACGCGGCGCACGCG |
||||
11 566 18 |
||||
Output |
||||
AAACCAGGTGG |
@ -0,0 +1,5 @@
@@ -0,0 +1,5 @@
|
||||
Input |
||||
CGGAAGCGAGATTCGCGTGGCGTGATTCCGGCGGGCGTGGAGAAGCGAGATTCATTCAAGCCGGGAGGCGTGGCGTGGCGTGGCGTGCGGATTCAAGCCGGCGGGCGTGATTCGAGCGGCGGATTCGAGATTCCGGGCGTGCGGGCGTGAAGCGCGTGGAGGAGGCGTGGCGTGCGGGAGGAGAAGCGAGAAGCCGGATTCAAGCAAGCATTCCGGCGGGAGATTCGCGTGGAGGCGTGGAGGCGTGGAGGCGTGCGGCGGGAGATTCAAGCCGGATTCGCGTGGAGAAGCGAGAAGCGCGTGCGGAAGCGAGGAGGAGAAGCATTCGCGTGATTCCGGGAGATTCAAGCATTCGCGTGCGGCGGGAGATTCAAGCGAGGAGGCGTGAAGCAAGCAAGCAAGCGCGTGGCGTGCGGCGGGAGAAGCAAGCGCGTGATTCGAGCGGGCGTGCGGAAGCGAGCGG |
||||
12 |
||||
Output |
||||
CGGCGGGAGATT CGGGAGATTCAA CGTGCGGCGGGA CGTGGAGGCGTG CGTGGCGTGCGG GCGTGCGGCGGG GCGTGGAGGCGT GCGTGGCGTGCG GGAGAAGCGAGA GGAGATTCAAGC GGCGGGAGATTC GGGAGATTCAAG GTGCGGCGGGAG TGCGGCGGGAGA |
@ -0,0 +1,5 @@
@@ -0,0 +1,5 @@
|
||||
Input |
||||
CCGAAGCAATTGAAACCCCCCCGGCCTGGGAGGCGCAAAAATCTGACCTCTTTGTGAGTTGACCACTTAATTTATGTCTGACCACGAGAAGGGCTACTGATTTGGTACGTCGGGTCATGACCCCCAGTTCTTAGCCGCCTGCTCCAATCTCTGACTTGTTTATCGAGGGGATGGAGTAACGAAATGCGATTCGCCCGCTCAGGCCAAGGTATATATTTGAGTAGCGGAAGGTTGCACTACCTACAACCACGGCACACCGGCACGTTGTCGTGCCCTGGCGGCCTGCGCACTTTCGCCACTGTCAAGTACGACTTCCCAAGCTCAACCAACATTCATAATCCGGTGCAATTCATACCGTATCATCGTGCTATAAGCGACGCCGATTCTCGGGGCCTGATAATTGAGACTGGACTACATAGTGGGTGCCCTCTCTGCGAGTAAGTGACGGAACAACGGAGATCAGGGACCAAATGGTAGCAAAACAGATCGAGGTACACGCAGGTAGCTGTCCGTGGAGTAGACCGCGCTTAGCGTCTGTTAGAGTATCATCGGGGTATTAGACACAGGAACCTCTATGCTGTTAAAAGGCCATACCCCGTAATTGTGCAAATTTGTTACGTTCAAATCTACGCAGTGAGGGTCCTAAGGTGATGGCAGGGATTGGAACTTCTCCGCTGGCTCTTAGATTACTTAGCCAGTCTACCCTCGAAGATACAAATCCTTCCACCAGAGGGAGCTCATTGAAATTCATTCCATGCTACTCGACCGCGCGTATGGGTGCGGGGCTCTATGGGATCTAACTCGATCCTTCAGAGTCCTTATTCAAATGCATTTCCGTCCCCGTATGTTTCGACGAAGCCGAAGCCCAAACCCTGGGATGGACGAATTAAGGACAGTACAGGCAATAGTGTTCTCCCATACTCGGAACAGACGCCTCATTTTTTCGCGAAATCGATCTGGGTTGGAAGAAGTTCCAGTGCAGAGTTCCTATCACACAATTCGTTCTCGGGGCTTCCGGCCCATAAGCGATACTACTGTCTTTGCGAGCTAACGATTACATTCGGGGGAACTTAGCTCGGACTGGACCAGGTACATGATCCAAAGCGCGATGTCTGTCTGTTACCCTCACCGCCGCTCTTTTATCGGGTA |
||||
GCGTAGTAGGTTCGCGTACCTAGTTCCGCCGAAAAGACAAAGGAGAAGGGAATGCTCCTAGTAGTTTCAGTCTAGCAAACATGTTATAACGCTAACTGTGTGCTGCAAAAAGGATTTGAACCCAAATTTTAAAGCGCTGATCGACAGAACGCTGTTGAAGAGGCGATGGTACTGAGATTCCCCAGAAACCACCTCCGCGCTATGTGCTCAAGACAACCCGCATTCGTTTTTACTAGATTTGGAGCCGAGTTGTGATTTGGATATTTTCACATAAGACCGAGCAGGAAATATACCTTGTTGCAGCTATTGACCCCGTTCTCTCGGAAATCCATGGAATAGTCTTCGGATATTCGTACCAATGGGCGCGATGTTGCGATAAGAGAGCACATTTCATTAAGTGGTGCTCCGCCGCTAAGATGGGAAGGGGCGAGTCTATCGCAGCATCGAAGGCTGAGTTGGCCATTGCCGAGAGTATACATATTTACGATCACACTCGCATAGTCCCACGCATTACGTCCGAGATAGTATGTCCCAATGCAACCTAAAGCCGCGAGATTCCCTAAGGAGAAAATTAAACACTGGAAATTAGGTGATGCTACATCCCATGGACACTTTCGGAACAATATCGGTGACACACATCATCCGTGATCCCGTGATATTTCATCCATGGAGAGAGTATGGTTTTACTACACCTGGTCTAGGCCAAGCCTAACCCCCTGTTCATCCGTTTTATACGAGTATTACCTTGACGACCATAGAGGATAGACTCGGTATCCCGCACACTCTACACACACGACTTAATCCGCTCCACGACCTTCCTAGCGATCTTTGGCGCAGCCGGTTCGCGTATTTTACGACCAACTCGATGGATCCCAATTATCCCCCTGGTAGTGCCCCTCCGCCTGAGAATTCGACGGGCGAGGTCCGGGGGACCGACATAGAGTGGAATGCTTCTTTCCGGGATAACACGTGATTGACATAAAAATGTAGGGCAGATAGGCATCGTTAGCACCTCTCTCCTTGCTGCACTGCGTTTATCGATCGAATTCAAGACTTGTGCATGTTGAAAACAACCTCGCGTTATCCCTGCTATTTGCTTCAGAGCCGTAGGAGGGGACCATGCGTGAGTCCTCCTGAGCAACCTCAATT |
||||
Output |
||||
844 |
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1,5 @@
@@ -0,0 +1,5 @@
|
||||
Input |
||||
ACACCA |
||||
CCGAACACCCGTACACCGAACACCACACCACACCTTGCACACCACACCTACACCACACACCACACCGGACACCCACACCCACACCACGAACACCGAGAGTACACCTACACCTGACACCGGGGATCGTCACACCAAGTGGTGATACACCCACACCCTTTACACCTACACCACACCCGTACACCCTGAACACCACACCTAGAGAGTTGCACACCTCACACCGAAGGCACACCACACCATCCACACCATAAACACCGTTAACACCGTAGAACACCCAGCACACCCTTACCGCATACACCGACGTTAGACACCCACACCGGCAGTCACACCGTACACCCATTCGGTCCACACCCTACACCGCCTGCCACACCTACTGAGTTACACCGCATGACACCATTATCCGAACACACCAATATACACCAACACCATACACCATTTAACACCCCAAAACACCGACACCGACACCGCAAGCCCACACCACACCCACACCACAGACACCTACACCGTTTAGACACCAACACCGACACCACACCCCACACCCAAGACACCGCTACACCCTGCTGGACACCGACACCTACACCTCACACCGGACACCGCACACACCGCCACACCAATCACACCACACCACACCAGTACAACACCGACACCTACACCACACCACACCCAGATACACCCACACCGGACACCACACCAAACACCATTACACCCACACCGGTACACCACACCTCGTACACCAAGTAGACACCCAACACACCACACCTTGATGACACCTGACACCATACACCAAACACCACACCGAGGTAGACACCACACCGCCATCGACCACACCCTGACACCATACACCACACCACACCTAGTCGACACCCACACCCTCACACCTGACACCCGCGGCATACACCCACACCACTTACACCTACACCGGGGGAAACACCGAAACACCTCAACACCGGACACCACACCTAAGACACCGGGCGATACACCTGACCCTGACACCACACCACACCCAACACCCGAACACCACACCCAAACCTTGACACCCACACCAAAACACCCTTTATTAAAACACCCCGACCACCAAACACCACACCCCACACCGAACACCCACACCGCATACACCGGTCACACCTTATCTCGCCCACACCCTACACCCCACACCACACCACACCACACCGTACCACACCACACCCCCACACCAAAACACCACACCACACCGGTTACACCCCACACCAACACCCACACCATTACACCTACACCGCAACACCTGCACACCACACCAAGACTGGAGACACCTACCACACCCTCGTTTACACCACCTGACACCTTACACCTCCGACACCAAAAACCCGTTGGGTCATCGGATCAGGACACCTTTACACCACACCTTCGAGGACACCACGGACACCACACCCCACACCACACCGGTACACCGCGTTCACACCTCACACCGACACCACACCCCCTGAACTGTATACACCACACCACACCAACCCAACACCCTAGAAGACACCTGCCACACCTTACACCACACCACCGACACCAACACCCAAACACCTTTGACACACCACACCAACACCGTACACCGCAACACCCGCATTACACCTTACACCACACCACACCCCCCTACACCCACACCACACCCTCGGACACCAGTACACCACACCACAGATAGACACCATACACCTTACACCACATACACCTTTCACACCACACCCACACCCCGCTTAGACACCGACACCACACCACACCTGACACCACACCTCGCACACCGCCCTTACACCACACCCCAGCAGAAAACGAACACCCACACCACACCACACACCACACCACACCACACCGACACCTGACACCTAAACACCCCCACACCACACCTCTCCAACACCACACCAACACCTACACCAGAAAGACACCGACACCCGACACCCGCTGTTGTACACCCACACCATCGACACCACACCACACCACACCCTACACCGGCACACCATGCAAACACCACACACCTGGACACCCACACCACACCGCACACCACACCACACCTACACCACCGACACCACACCACACACCTACTCCACAACACCTACACCAAACACCCTACACCTACACCTACACCTACATACACCTACACCTAATATTATGGACACCACACCTTCAGACACCGTACACCACACACCCTATGTTACACCACAGGCAGAATTTGACACCTCACACCCACACCCACACCCGCACACCACACCAACACCACACCACACCCCCAACACCGCTCTTACACCTTACACCGACACCAACACCGACACCGACACCACACCCCAATATCCCTCACACCACACCTAACCAGTATACACCGTTGACAACACCCCAATTTACACCCCATACACCTCAGACCACACACCGGACGGGCAACACCTACACCGATGTTACTTTACACCGGGCTCGCGGACACCACTCGACACCAACACCCGACACCTTACACCACACCAGCTGCGTGAACACCTACACCATCCCAACACCACACCGACACCGTATGGACACCTACACCTCGAGAGTTCCGCTAGAACACCACACCCATACACCATACACCGCGTACACCGAACACCGACACCCACACCACACCCAATGACACCGATGACACCGGCTCGATACACCTACACCGAACACCATCAGACACCGCGTACACCCAACACCTGACACCAACACCGCGGCACACCTAGTGACACCTACACCTACACCACACCATACACCCTACACCGATGAACACCAACACCACTCTAAACACCCAGGACACCAACACACCTAGACACCACACCAACGACAGAGACACCCTACACCTGCCAAGCTTTACACCATTGGTGAATCACACACCACACCAACACCACACCACACCGCTTACACCCGACCCGAAAACACCCACACCACACCAACACCACACCACATTACTCCCGTTACACCTACACCAACACCACACCTTTACACCACACCCAGCAACACCACACCAAATGGACACCACACCACACCACACCTTAGCCGATGTGCCGACACCGCTGTCGTCACACCAGTGACACCTTAGCGTACACACCACACCCAACACCTACACCACACCCGAAACACCTGACACCACACCACACCACACCCTACACCACACCATGACCACACACCAGCCGACACCACACCATACACCTACACCGAAACACCTTTCTACACCACACCACACCTGAACACCTAGTCACACCACGACACCAACACCTGACCACACCGGGGGACACCTTTGGAACGACACCTAACACCGCCACACCACACCACACCCGACACCTATAACACCACACCACACCACACCAAAGGCACACCTTAACACCCACACCAAGGGCTACACCACACCACACCTCCAAAACAAGGGACACCACACCCAACACCACACCACACCGCGTGGACACCACACCTTGACACCAAATTGTGCACACCACACCTGCACACCTTAAGAACGACACCGTCAGTACACCGAAACCCTATGACACCTGGGACACCTGGCACACCAACTACACCACACCCACACCACACACCTGGACACCGTTTCGCGAGTGTGGGTTGCTTGACACCACACCACACCGCGGCCTTACACCGCACACCGTAAACACCGTTGACACCTCATTACTCGACACCACACCGCACACCCACACCCGACACCGAACACCACACCTGGGCATACACACCACACCGTACACCTACACCACACCTGTGCTACACCAGGGGTACACCACACCTAGTACACCACACCGATACACCCACACCACACCACACCCACCAACACCACACCATCAAGAACACCCTATACACCCACACCACACCTACACCACACCCTACACCACACCACACCACACCATCGACACCTACACCACACCAACACCACACCAAACACCACACCCACACCCGGACACCACACCCACACCACACCATAACACCTAACACCACACACCTACACCTACTCTGCTAAACACCCAACACCTCTACACCCTGCCGACACCGCGACACCGGCGACACCCTGTTACACCACACCTCACACCTTCGACACCAGCCAGAGACACCGGACACCGACACCCCGAACACCAACACACCCGA |
||||
Output |
||||
19 24 38 49 56 80 128 164 186 225 230 239 387 403 413 419 426 471 482 508 520 604 613 618 623 646 651 679 684 691 713 727 747 770 777 784 801 829 836 841 897 947 986 991 1011 1036 1075 1148 1153 1158 1173 1186 1194 1199 1220 1232 1262 1267 1303 1329 1369 1386 1395 1407 1444 1467 1472 1477 1516 1521 1530 1555 1560 1599 1604 1625 1640 1648 1653 1666 1680 1698 1728 1733 1745 1770 1800 1805 1812 1817 1822 1856 1872 1877 1889 1933 1942 1947 1952 1972 1983 2004 2016 2021 2032 2041 2046 2073 2131 2153 2172 2218 2223 2229 2234 2272 2290 2312 2430 2440 2460 2465 2486 2497 2547 2560 2595 2645 2678 2716 2721 2745 2751 2772 2788 2793 2831 2849 2854 2860 2865 2900 2905 2911 2916 2941 2947 2960 2975 2980 2991 2996 3001 3040 3063 3081 3102 3107 3112 3124 3129 3142 3152 3157 3188 3193 3216 3224 3279 3284 3305 3310 3315 3320 3345 3357 3362 3385 3397 3402 3418 3431 3445 3517 3526 3537 3580 3585 3643 3675 3694 3712 3728 3739 3753 3772 3777 3792 3797 3824 3835 3847 3852 3857 3862 3877 3882 3888 3893 3900 3919 3930 3935 3950 4032 4053 4088 |
@ -0,0 +1,4 @@
@@ -0,0 +1,4 @@
|
||||
Input |
||||
GCACTAAAGCACCAGCGAGACTAGACAGTGCCTTACGCTGTATAGGGATAAAAGTTGTCAAGATGACTTGCGGGAATCGTTAGGCTGACACGCACTAATGCTCGCCTTCCGGGTGTTCTGTGAGTACGGTTGATCACGGTCGCCCTGCGGATGTACTACCATGAAAGTTGATCACGTGCCGCGCGCTCCCTAAGCTTAGAAGTTTGCACAATCTGCATTCTATCCTGCCACGCCTTCAATAATAAGTGGTGTATGCAATTTGGAGTCGATCTGGGAACCAACGATTAACTTGGGAAGTGGCTATATCAAAATACGATGTCTTCAGCGTCGCGGTCGACGCTGCGCAACGAACGAAAAGTCCGATGGACCCGAACTCTGATTATACCGAATCTCCGCTTTTACGACTCGCCACATACCGGCATAAGCCATTCTGGGGCTTTGCCCCCTTAGGTCTAGCCCACCCCCGACCTAGCTTGAGCGTGTCACACCCCAACAGCCGCATTACGCCCGCTCACCGACACTTGGCGGTCGTATAAGAAATCCAAAACCGAGACGAAAACTGAAGAATAAGGTTCATTCAGCATTGTGGAGTTGACAACATCAGTATGAGGGTGAGTTGCGTCAAAGTCGAAGAATATGGAGGGTCAAATCACGAGATGTAACATCCACGCGAACACTTAGCTAGTAATCATTTTTCCGTAAAGAGTCGTTGAGTCCGACCAGTTGAAGCTCAGTGTTTATCCGGTAGGGAATTGTAGGATCAACGATAGGGTCGCGGAACCGCCGTATTATAGAAAGAGATAGTCCCAACGTTCTTTATGCACTTCGCTGAGAGAGGGTGACCGGGCACGCAGAGACTTTGGCTTTGTAGCCCCATTCCGCGGCTCTTCGGATACTGACTGAGCTGTAGTCGGCACATCCTTTACAACAAAAAAGCTCATGTCCGAGATTTTAATGGCGGCGCACGGTCACTCGGAGTTGACGAATGCGCAGCGAATCGTTGGTTCCAGATAAAGGCAAGGCTGTGTTACTGTTTCGGAGGGCAATCGTCAACGAGCAAAGATGTTAGAATAGAAATCGGAGCGAGGCTCCCAGCAAATATGAGTTAGGATCTTTTTTGCGAAAGGGTTGGTCTCCATCTCCTCTCGCCTGCGAGCGAGTCCCCGAAGCACGTTCAACCTATTTGATTCGGTGCAGGACACCCTAGATTAGCATACAGGTATAATATCAGGAAGAGTCACCTTTCATTCCCGACCAGTAGGATGTATAGGAATGAGACTATCCAGTTCTTTGTCAGCTCAAGACAGCGTTGGCAATACGGCCGAGTATTGGGGGGAATACCCCGGAACATAGTATTGTGCCTTAGCTATTGCCCTAGATACCACGCGGCCCTTGAGCATTTGTCTACACTTTGGTGATCCTAGGCACCCCGCGCTCGTGGCAACGTCAGCATCTTGTGATAGCAAAGCGTATGTACCTGTAATGTAACATCAAAGTATATCGGCACCCTAGTGGGGGCGAAGGTTGGATCGCTTATCACTCGGGACGACGGTGGTATCCAGCCACAGTGTTGCTCATTAACGACCACACAGCTCTTGGAATCGAGCCATGGACAGGGGACGCCCCAGGATACATGATGTTCCTGTGAGCACAAGCACTATGGCAGGCTTAGAGCTAATTCTTCCATTGGGCCGGTAAGACGCCAGAGAAAGTCACCGGTGTGAGAAAGGGTTTCGTGTGGGGGAGGCGTCAAACAACAAGGATTTACGTCGAACCGATCAGCCCTTGTCTGATTCATTCCAGGTTTAAGCGAGCCCTGGCGGTGACCTCCCGGGGATTCTTGGTGACGATAAGTGTAGACTGGTTTATGACTGTCTATAAGTGCAAGCAGTCCGCGACTCGGCCGCTCCTCAGATCTCGTCCTCCCAATCCTTACGAGGCACTATTCCGGCCCTAAAAACTTACCTACCAACCGGACATAGCGAACGGTCTAAGTTTTCGGAAATTGAATAACACTCGAACAAAGGAGCCCAATACATGGCACAAGCACACATAAAGCTTGGCGCTGCTGACGGCCGGCCCCCACAGCAGGTGGGTATATCAGGATAATGCTCTACCTCCTCGGGGATGACCAGAGACGAACGTTCGGACGCTATTAGTTAGTGGTCGCCCAGATATTCTCCTAATCAAGCCCTCGAAGGCTAGTCTAAATTTTAGCAAAAACTCGTATAGCAGCACATGCGGTAGACTGGGCCTCAGCCAGGTAGAGCTGTGGCTGCACTCGAGCAATCACTACCGTATAGAGTGGTGTTATTTCGGGGTGAATGTCAGGGGTGGTCCAAAATCACAAACACGTCTATTCGCACCCGGGAATGCTCATGTTCCCACGGCGGGCCTGTACAGATGTGAGAGGCAGCGATCATACAAAGTTGCCTGGCCTCCCCACGAACACACGGCGGCCCATTAGGTCTGAACAGGTTTATCGTTAATATATTTTGCGGTGG |
||||
Output |
||||
CCACCGCAAAATATATTAACGATAAACCTGTTCAGACCTAATGGGCCGCCGTGTGTTCGTGGGGAGGCCAGGCAACTTTGTATGATCGCTGCCTCTCACATCTGTACAGGCCCGCCGTGGGAACATGAGCATTCCCGGGTGCGAATAGACGTGTTTGTGATTTTGGACCACCCCTGACATTCACCCCGAAATAACACCACTCTATACGGTAGTGATTGCTCGAGTGCAGCCACAGCTCTACCTGGCTGAGGCCCAGTCTACCGCATGTGCTGCTATACGAGTTTTTGCTAAAATTTAGACTAGCCTTCGAGGGCTTGATTAGGAGAATATCTGGGCGACCACTAACTAATAGCGTCCGAACGTTCGTCTCTGGTCATCCCCGAGGAGGTAGAGCATTATCCTGATATACCCACCTGCTGTGGGGGCCGGCCGTCAGCAGCGCCAAGCTTTATGTGTGCTTGTGCCATGTATTGGGCTCCTTTGTTCGAGTGTTATTCAATTTCCGAAAACTTAGACCGTTCGCTATGTCCGGTTGGTAGGTAAGTTTTTAGGGCCGGAATAGTGCCTCGTAAGGATTGGGAGGACGAGATCTGAGGAGCGGCCGAGTCGCGGACTGCTTGCACTTATAGACAGTCATAAACCAGTCTACACTTATCGTCACCAAGAATCCCCGGGAGGTCACCGCCAGGGCTCGCTTAAACCTGGAATGAATCAGACAAGGGCTGATCGGTTCGACGTAAATCCTTGTTGTTTGACGCCTCCCCCACACGAAACCCTTTCTCACACCGGTGACTTTCTCTGGCGTCTTACCGGCCCAATGGAAGAATTAGCTCTAAGCCTGCCATAGTGCTTGTGCTCACAGGAACATCATGTATCCTGGGGCGTCCCCTGTCCATGGCTCGATTCCAAGAGCTGTGTGGTCGTTAATGAGCAACACTGTGGCTGGATACCACCGTCGTCCCGAGTGATAAGCGATCCAACCTTCGCCCCCACTAGGGTGCCGATATACTTTGATGTTACATTACAGGTACATACGCTTTGCTATCACAAGATGCTGACGTTGCCACGAGCGCGGGGTGCCTAGGATCACCAAAGTGTAGACAAATGCTCAAGGGCCGCGTGGTATCTAGGGCAATAGCTAAGGCACAATACTATGTTCCGGGGTATTCCCCCCAATACTCGGCCGTATTGCCAACGCTGTCTTGAGCTGACAAAGAACTGGATAGTCTCATTCCTATACATCCTACTGGTCGGGAATGAAAGGTGACTCTTCCTGATATTATACCTGTATGCTAATCTAGGGTGTCCTGCACCGAATCAAATAGGTTGAACGTGCTTCGGGGACTCGCTCGCAGGCGAGAGGAGATGGAGACCAACCCTTTCGCAAAAAAGATCCTAACTCATATTTGCTGGGAGCCTCGCTCCGATTTCTATTCTAACATCTTTGCTCGTTGACGATTGCCCTCCGAAACAGTAACACAGCCTTGCCTTTATCTGGAACCAACGATTCGCTGCGCATTCGTCAACTCCGAGTGACCGTGCGCCGCCATTAAAATCTCGGACATGAGCTTTTTTGTTGTAAAGGATGTGCCGACTACAGCTCAGTCAGTATCCGAAGAGCCGCGGAATGGGGCTACAAAGCCAAAGTCTCTGCGTGCCCGGTCACCCTCTCTCAGCGAAGTGCATAAAGAACGTTGGGACTATCTCTTTCTATAATACGGCGGTTCCGCGACCCTATCGTTGATCCTACAATTCCCTACCGGATAAACACTGAGCTTCAACTGGTCGGACTCAACGACTCTTTACGGAAAAATGATTACTAGCTAAGTGTTCGCGTGGATGTTACATCTCGTGATTTGACCCTCCATATTCTTCGACTTTGACGCAACTCACCCTCATACTGATGTTGTCAACTCCACAATGCTGAATGAACCTTATTCTTCAGTTTTCGTCTCGGTTTTGGATTTCTTATACGACCGCCAAGTGTCGGTGAGCGGGCGTAATGCGGCTGTTGGGGTGTGACACGCTCAAGCTAGGTCGGGGGTGGGCTAGACCTAAGGGGGCAAAGCCCCAGAATGGCTTATGCCGGTATGTGGCGAGTCGTAAAAGCGGAGATTCGGTATAATCAGAGTTCGGGTCCATCGGACTTTTCGTTCGTTGCGCAGCGTCGACCGCGACGCTGAAGACATCGTATTTTGATATAGCCACTTCCCAAGTTAATCGTTGGTTCCCAGATCGACTCCAAATTGCATACACCACTTATTATTGAAGGCGTGGCAGGATAGAATGCAGATTGTGCAAACTTCTAAGCTTAGGGAGCGCGCGGCACGTGATCAACTTTCATGGTAGTACATCCGCAGGGCGACCGTGATCAACCGTACTCACAGAACACCCGGAAGGCGAGCATTAGTGCGTGTCAGCCTAACGATTCCCGCAAGTCATCTTGACAACTTTTATCCCTATACAGCGTAAGGCACTGTCTAGTCTCGCTGGTGCTTTAGTGC |
@ -0,0 +1,2 @@
@@ -0,0 +1,2 @@
|
||||
GTCCGGGGTCCGGGGTCCCGGGGTTACCCGGGGTCCCGGGGTCCGGGGTTACCCGGGGTAACCGGGGTCACCGGGGTCTCCGCCGGGGTGGCCGGGGTACCCGGGGTCCCGGGGTCCGGGGTCCGGGGTGCACGGGCCGGGGTGTACCGGGGTCCGGGGTCGGCGTCCGGGGTCCGGGGTCTGGAAGTTGAACTGACACCGGGGTTCCGGGGTCCGGGGTCCGGGGTCCGGGGTGGAACCCGGGGTGTCCCGGGGTCCGGGGTCCGGGGTCCGGGGTTCCGGGGTAGGCCGGGGTCCCGGGGTAGTGTGTGCCGGGGTCCTCGCCGGGGTAGCGCAAAACCGGGGTGCGGTAACTACCGGGGTCCGGGGTGCCGGGGTCCGGGGTTCCGGGGTCCGGGGTTCTCCGGGGTCCCGGGGTAGCCCGGGGTATCCGGGGTCCGGGGTCCGGGGTCCGGGGTACCGGGGTCAGGGCCGGGGTGACCGGGGTTCCGGGGTATCTGTTTATCCCGGGGTCCGGGGTCGGTAAACCGTCCGGGGTTTACCCCGGGGTCCGGGGTCTCGATCAAACCGGGGTTATGAGAATCCGGGGTCCGGGGTCCCGGGGTAGACCGGGGTACATCCCGGGGTGTCCGGGGTTACAAGCCGGGGTCCAAACGATTCCCGGGGTCCGGGGTTGCCCCGGGGTCCGGGGTGATGCACCGGGGTAAGCCGGGGTTGACGACCCCGGGGTCGCCGGGGTCTGCACTCCGGGGTTCCGGGGTAGCCGGGGTCAACCGGGGTAACCGGGGTTTGCCGGGGTCCCGGGGTTTGTCCGGGGTCCACCGGGGTCCGGGGTGCCGGGGTTCTACCGGGGTGCCGGGGTACACCGGGGTAGCCGGGGTATCCGGGGTACCGGGGTAAACCGGGGTGCCGGGGTCCGGGGTCCGGGGTTCCCGGGGTTTCTACCGGGGTGGGACCGGGGTCCGGGGTCCGGGGTATTAACCACCGGGGTGCGACCGGGGTGGCCGGGGTCCGGGGTATCCGGGGTACATCCGGGGTACGG |
||||
CCGGGGTCC |
@ -0,0 +1,2 @@
@@ -0,0 +1,2 @@
|
||||
AGTAGGTTCAGGGCGTTTAATAGCGAAAACAAATAATAGCAGTAGGTTGTACCACGTACCACTAATAGCGAAAACAAAAGTAGGTTCAGGGCGTTGTACCACGTACCACGAAAACAAATAATAGCTAATAGCGAAAACAAAGTACCACGAAAACAAATAATAGCGAAAACAAAGTACCACCAGGGCGTTTAATAGCGAAAACAAATAATAGCTAATAGCTAATAGCAGTAGGTTCAGGGCGTTGTACCACGAAAACAAAGAAAACAAAGTACCACTAATAGCCAGGGCGTTAGTAGGTTGAAAACAAACAGGGCGTTAGTAGGTTCAGGGCGTTGTACCACTAATAGCTAATAGCGAAAACAAATAATAGCTAATAGCTAATAGCCAGGGCGTTGTACCACGAAAACAAACAGGGCGTTTAATAGCAGTAGGTTGAAAACAAATAATAGCCAGGGCGTTGTACCACGAAAACAAAGAAAACAAAGTACCACCAGGGCGTTAGTAGGTTGTACCACGTACCACGAAAACAAAGTACCACGAAAACAAATAATAGCGTACCACGAAAACAAAAGTAGGTTTAATAGCGAAAACAAAAGTAGGTTAGTAGGTTCAGGGCGTTTAATAGCTAATAGCGAAAACAAAGTACCACCAGGGCGTTTAATAGCTAATAGCGTACCACCAGGGCGTTGTACCACGTACCACGAAAACAAAGAAAACAAAAGTAGGTTTAATAGCAGTAGGTTAGTAGGTTAGTAGGTTTAATAGCGAAAACAAACAGGGCGTTTAATAGCGTACCACGTACCACGAAAACAAAGAAAACAAAGTACCACCAGGGCGTTTAATAGCGAAAACAAACAGGGCGTTGAAAACAAA |
||||
12 |
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1,2 @@
@@ -0,0 +1,2 @@
|
||||
ATGACTAGTTATGCGACACGTGTTCCTTAAACAAACCGCTGATTGCGGAGGGATCATGTTGAAACGCAGTCAGTTGGCGCTTTACAAGAATTTAAGTGTCCCTCGGAGATGCTCCACTACACGCCATGGCGAAACGGTTCAGTCTCTTAGAAGAAGAAAGATATAGGAGTTGCGCCACCGTGATATAAGCACCGCAGTATCTGAAGGGAGCACAACTTGCTGCGAACAGACTGGTACGGTTACGTCGGGGCTTCAGGCATCGTTGGCGAGGTAGGAATCCTTATGTTAATTTTAAATCGAAGCAAAACAGAACTGTTGATCACTCATGTGTCGTTAACCGGAAGACTGCGGGTGCTCAGCCCCAATCGACGGCTGTTAGGAATGGCACACTACTGTATTTGTGACGACTAACTTGACATTCGAAGGTATCTGCGGTTGTTAAACGCCGATAATCGCCACCGCAGTTCTGAAAGGCTATATGTATCACGGTGATTTACGGCATTGTAAGCCCACTCAGAGTGCGTCGTAGGTTACGCGTTCTGAGTTGAAATAATCCAGTCGAACACGGTTGGTATCATGAATTCAGACTACCGTTTCTTGACTCCCGTCCTATACGAGTCTAGAGCGAACTTCGGGGTAAGAAATCACAATTAATCTCTTCCTTGTGTGATCCGCAAGGAAGCTGAGCTCAATTTGCAAGTACAGGTAGGTGGCAATCGAGAGCTACTAACACTCTTGTGTCGTTCGTAATTCATAAATAAAAAGACACGCCCTTATGATTGAAGCCTGAACTGCGGCAACGGTAGGTTTCCAAAGAGGATCGAGTCAGCGATACCCCCTGTACGCAGACAGATTATTACCCCCACTCTGCAATGTAGAAGTCTTAAAAACGCACTCTAGGCCAGTAACCAACCAGCTGGGTGGTGCGTTACCTAGTGCTATACAACAGTACCACCAGATTAGAAGCATGCCAGGTGTCTCGACACCTCCAATTCGTCATTTGGTGTGAGAAAAAGATATACCGCCAAGTTGCCATACCTGCAC |
||||
ATGGACTATTTTGCTATACGATTACTAGGAATAAGTTGAACAACCCTTGCTTTTCTTTTTAACACAGCCAGAGGCTCGGGATGGAACGCGTCATCTCGCGGACTCAGAGATGCCAGATGGTAGGCCTCTTCCAACGAGTAACTTACGATAATTTGATAGATTCTTGAACGTAGTGTGTCGACCTCCGTACCGGAAAATTTTCTTATCTCTAGTGAACCGTCGAGCTGTACTTTAGACCCCTGTGCGACGATAGGTCTCCTGCGTTAGGTATTTTACATATTCCGCTGGGACCCAAATTTTTCCCGCGAACGGGATAGAGGTAGTATCTAACTTCGTTTACACAACGTAACATCCCGCCATGGTCGTTACGGGCGTACCCGCGCGGCGAAGGGCGCGGACCCGCGAATCATAAACTAAGAAAAGAGTATGTTGAAGCGCACCCGCCATGTCGCTCGACATCTGCCTGGCATGCTATAATACCTGCTGAGCAGTACATCCACGGCGTCTATGAGCGCCACGTCAATCGGATCAGCCGGAATGCTGATTCTGTTGTGTCGCCGTTATGAATTTGGAGGTGGCACGCAAGGTTCCAGCCCTGTATAGTGTGTTAAAGTCCACTTTTCATCATTGCTTAATGTTTAATCGGGTCCTCACCCGAAACTGTGATTGCGTTCTTATGTAAAGCTCTCGTTAGCAGACACCAATCTATGAAACTTCCGCCTCGGGCAACTTTCATGAGGCACTGTAACATTTGTTGCATAGAGCCGTACTATGGCCACCGTATTTTATATGGCTGACGTAAAGAGCCTGTTAATGTGTAATTCGAAGGTCCCTTTAGATGAGTCTCATGCCAGACCCAGAAGAGTGACGGCTGTCTCGGAGTGGGTATACGTTAGCCCCTGCCAATAGTAAAGCGTACACCTTGTCTTCAAGACTGTCACTGACACAAATTCCCCGACCCATATTCCGTTCCGGGTTGGTCTACCTTACGGCGGGAATCCAGAGGCCTAATGCGCTGGTTATATACCACCGGATCCCGATATA |
File diff suppressed because one or more lines are too long
@ -0,0 +1,15 @@
@@ -0,0 +1,15 @@
|
||||
package main |
||||
|
||||
import ( |
||||
) |
||||
|
||||
func main() { |
||||
//BA1A("for_real/rosalind_ba1a.txt")
|
||||
//BA1B("for_real/rosalind_ba1b.txt")
|
||||
//BA1C("for_real/rosalind_ba1c.txt")
|
||||
//BA1D("for_real/rosalind_ba1d.txt")
|
||||
//BA1E("for_real/rosalind_ba1e.txt")
|
||||
//BA1F("for_real/rosalind_ba1f.txt")
|
||||
//BA1G("for_real/rosalind_ba1g.txt")
|
||||
BA1H("for_real/rosalind_ba1h.txt") |
||||
} |
@ -0,0 +1,545 @@
@@ -0,0 +1,545 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"sort" |
||||
"errors" |
||||
s "strings" |
||||
) |
||||
|
||||
|
||||
/* |
||||
rosalind.go: |
||||
|
||||
This file contains core functions that |
||||
are used to solve Rosalind problems. |
||||
*/ |
||||
|
||||
|
||||
////////////////////////////////
|
||||
// BA1A
|
||||
|
||||
|
||||
// Count occurrences of a substring pattern
|
||||
// in a string input
|
||||
func PatternCount(input string, pattern string) int { |
||||
|
||||
// Number of substring overlaps
|
||||
var overlap = len(input) - len(pattern) + 1 |
||||
|
||||
// If overlap < 1, we are looking
|
||||
// for a pattern longer than our input
|
||||
if overlap<1 { |
||||
return 0 |
||||
} |
||||
|
||||
// Count of occurrences
|
||||
count:=0 |
||||
|
||||
// Loop over each substring overlap
|
||||
for i:=0; i<overlap; i++ { |
||||
// Grab a slice of the full input
|
||||
start:=i |
||||
end:=i+len(pattern) |
||||
var slice = input[start:end] |
||||
if slice==pattern { |
||||
count += 1 |
||||
} |
||||
} |
||||
return count |
||||
} |
||||
|
||||
|
||||
////////////////////////////////
|
||||
// BA1B
|
||||
|
||||
|
||||
// Return the histogram of kmers of length k
|
||||
// found in the given input
|
||||
func KmerHistogram(input string, k int) (map[string]int,error) { |
||||
|
||||
result := map[string]int{} |
||||
|
||||
if len(input)<1 { |
||||
err := fmt.Sprintf("Error: input string was not DNA. Only characters ATCG are allowed, you had %s",input) |
||||
return result, errors.New(err) |
||||
} |
||||
|
||||
// Number of substring overlaps
|
||||
overlap := len(input) - k + 1 |
||||
|
||||
// If overlap < 1, we are looking
|
||||
// for kmers longer than our input
|
||||
if overlap<1 { |
||||
return result,nil |
||||
} |
||||
|
||||
// Iterate over each position,
|
||||
// extract the string,
|
||||
// increment the count.
|
||||
for i:=0; i<overlap; i++ { |
||||
// Get the kmer of interest
|
||||
substr := input[i:i+k] |
||||
|
||||
// If it doesn't exist, the value is 0
|
||||
result[substr] += 1 |
||||
} |
||||
|
||||
return result,nil |
||||
} |
||||
|
||||
|
||||
// Find the most frequent kmer(s) in the kmer histogram,
|
||||
// and return as a string array slice
|
||||
func MostFrequentKmers(input string, k int) ([]string,error) { |
||||
max := 0 |
||||
|
||||
// most frequent kmers
|
||||
mfks := []string{} |
||||
|
||||
if k<1 { |
||||
err := fmt.Sprintf("Error: MostFrequentKmers received a kmer size that was not a natural number: k = %d",k) |
||||
return mfks, errors.New(err) |
||||
} |
||||
|
||||
khist,err := KmerHistogram(input,k) |
||||
|
||||
if err != nil { |
||||
err := fmt.Sprintf("Error: MostFrequentKmers failed when calling KmerHistogram()") |
||||
return mfks, errors.New(err) |
||||
} |
||||
|
||||
for kmer,freq := range khist { |
||||
if freq > max { |
||||
// We have a new maximum, and a new set of kmers
|
||||
max = freq |
||||
mfks = []string{kmer} |
||||
} else if freq==max { |
||||
// We have another maximum
|
||||
mfks = append(mfks,kmer) |
||||
} |
||||
} |
||||
return mfks,nil |
||||
} |
||||
|
||||
|
||||
// Find the kmer(s) in the kmer histogram
|
||||
// exceeding a count of N, and return as
|
||||
// a string array slice
|
||||
func MoreFrequentThanNKmers(input string, k, N int) ([]string,error) { |
||||
|
||||
// more frequent than n kmers
|
||||
mftnks := []string{} |
||||
|
||||
if k<1 || N<1 { |
||||
err := fmt.Sprintf("Error: MoreFrequentThanNKmers received a kmer or frequency size that was not a natural number: k = %d, N = %d",k,N) |
||||
return mftnks, errors.New(err) |
||||
} |
||||
|
||||
khist,err := KmerHistogram(input,k) |
||||
|
||||
if err != nil { |
||||
err := fmt.Sprintf("Error: MoreFrequentThanNKmers failed when calling KmerHistogram()") |
||||
return mftnks, errors.New(err) |
||||
} |
||||
|
||||
for kmer,freq := range khist { |
||||
if freq >= N { |
||||
// Add another more frequent than n
|
||||
mftnks = append(mftnks,kmer) |
||||
} |
||||
} |
||||
return mftnks,nil |
||||
} |
||||
|
||||
|
||||
////////////////////////////////
|
||||
// BA1C
|
||||
|
||||
|
||||
// Reverse returns its argument string reversed
|
||||
// rune-wise left to right.
|
||||
// https://github.com/golang/example/blob/master/stringutil/reverse.go
|
||||
func ReverseString(s string) string { |
||||
r := []rune(s) |
||||
for i, j := 0, len(r)-1; i < len(r)/2; i, j = i+1, j-1 { |
||||
r[i], r[j] = r[j], r[i] |
||||
} |
||||
return string(r) |
||||
} |
||||
|
||||
// Given an alleged DNA input string,
|
||||
// iterate through it character by character
|
||||
// to ensure that it only contains ATGC.
|
||||
// Returns true if this is DNA (ATGC only),
|
||||
// false otherwise.
|
||||
func CheckIsDNA(input string) bool { |
||||
|
||||
// Convert input to uppercase
|
||||
input = s.ToUpper(input) |
||||
|
||||
// If any character is not ATCG, fail
|
||||
for _, c := range input { |
||||
if c!='A' && c!='T' && c!='C' && c!='G' { |
||||
return false |
||||
} |
||||
} |
||||
|
||||
// If we made it here, everything's gravy!
|
||||
return true |
||||
} |
||||
|
||||
// Convert a DNA string into four bitmasks:
|
||||
// one each for ATGC. That is, for the DNA
|
||||
// string AATCCGCT, it would become:
|
||||
//
|
||||
// bitmask[A] = 11000000
|
||||
// bitmask[T] = 00100001
|
||||
// bitmask[C] = 00011010
|
||||
// bitmask[G] = 00000100
|
||||
func DNA2Bitmasks(input string) (map[string][]bool,error) { |
||||
|
||||
// Convert input to uppercase
|
||||
input = s.ToUpper(input) |
||||
|
||||
// Allocate space for the map
|
||||
m := make(map[string][]bool) |
||||
|
||||
// Start by checking whether we have DNA
|
||||
if CheckIsDNA(input)==false { |
||||
err := fmt.Sprintf("Error: input string was not DNA. Only characters ATCG are allowed, you had %s",input) |
||||
return m, errors.New(err) |
||||
} |
||||
|
||||
// Important: we want to iterate over the
|
||||
// DNA string ONCE and only once. That means
|
||||
// we need to have the bit vectors initialized
|
||||
// already, and as we step through the DNA
|
||||
// string, we access the appropriate index
|
||||
// of the appropriate bit vector and set
|
||||
// it to true.
|
||||
m["A"] = make([]bool, len(input)) |
||||
m["T"] = make([]bool, len(input)) |
||||
m["C"] = make([]bool, len(input)) |
||||
m["G"] = make([]bool, len(input)) |
||||
|
||||
// To begin with, every bit vector is false.
|
||||
for i,c := range input { |
||||
cs := string(c) |
||||
// Get the corresponding bit vector - O(1)
|
||||
bitty := m[cs] |
||||
// Flip to true for this position - O(1)
|
||||
bitty[i] = true |
||||
} |
||||
|
||||
return m,nil |
||||
} |
||||
|
||||
|
||||
// Convert four bitmasks (one each for ATGC)
|
||||
// into a DNA string.
|
||||
func Bitmasks2DNA(bitmasks map[string][]bool) (string,error) { |
||||
|
||||
// Verify ATGC keys are all present
|
||||
_,Aok := bitmasks["A"] |
||||
_,Tok := bitmasks["T"] |
||||
_,Gok := bitmasks["G"] |
||||
_,Cok := bitmasks["C"] |
||||
if !(Aok && Tok && Gok && Cok) { |
||||
err := fmt.Sprintf("Error: input bitmask was missing one of: ATGC (Keys present? A: %t, T: %t, G: %t, C: %t",Aok,Tok,Gok,Cok) |
||||
return "", errors.New(err) |
||||
} |
||||
|
||||
// Hope that all bitmasks are the same size
|
||||
size := len(bitmasks["A"]) |
||||
|
||||
// Make a rune array that we'll turn into
|
||||
// a string for our final return value
|
||||
dna := make([]rune,size) |
||||
|
||||
// Iterate over the bitmask, using only
|
||||
// the index and not the mask value itself
|
||||
for i, _ := range bitmasks["A"] { |
||||
if bitmasks["A"][i] == true { |
||||
dna[i] = 'A' |
||||
} else if bitmasks["T"][i] == true { |
||||
dna[i] = 'T' |
||||
} else if bitmasks["G"][i] == true { |
||||
dna[i] = 'G' |
||||
} else if bitmasks["C"][i] == true { |
||||
dna[i] = 'C' |
||||
} |
||||
} |
||||
|
||||
return string(dna),nil |
||||
} |
||||
|
||||
|
||||
// Given a DNA input string, find the
|
||||
// complement. The complement swaps
|
||||
// Gs and Cs, and As and Ts.
|
||||
func Complement(input string) (string,error) { |
||||
|
||||
// Convert input to uppercase
|
||||
input = s.ToUpper(input) |
||||
|
||||
// Start by checking whether we have DNA
|
||||
if CheckIsDNA(input)==false { |
||||
return "", errors.New(fmt.Sprintf("Error: input string was not DNA. Only characters ATCG are allowed, you had %s",input)) |
||||
} |
||||
|
||||
m,_ := DNA2Bitmasks(input) |
||||
|
||||
// Swap As and Ts
|
||||
newT := m["A"] |
||||
newA := m["T"] |
||||
m["T"] = newT |
||||
m["A"] = newA |
||||
|
||||
// Swap Cs and Gs
|
||||
newG := m["C"] |
||||
newC := m["G"] |
||||
m["G"] = newG |
||||
m["C"] = newC |
||||
|
||||
output,_ := Bitmasks2DNA(m) |
||||
|
||||
return output,nil |
||||
} |
||||
|
||||
|
||||
// Given a DNA input string, find the
|
||||
// reverse complement. The complement
|
||||
// swaps Gs and Cs, and As and Ts.
|
||||
// The reverse complement reverses that.
|
||||
func ReverseComplement(input string) (string,error) { |
||||
|
||||
// Convert input to uppercase
|
||||
input = s.ToUpper(input) |
||||
|
||||
// Start by checking whether we have DNA
|
||||
if CheckIsDNA(input)==false { |
||||
err := fmt.Sprintf("Error: input string was not DNA. Only characters ATCG are allowed, you had %s",input) |
||||
return "", errors.New(err) |
||||
} |
||||
|
||||
comp,_ := Complement(input) |
||||
|
||||
revcomp := ReverseString(comp) |
||||
|
||||
return revcomp,nil |
||||
} |
||||
|
||||
|
||||
////////////////////////////////
|
||||
// BA1D
|
||||
|
||||
|
||||
// Given a large string (genome) and a string (pattern),
|
||||
// find the zero-based indices where pattern occurs in genome.
|
||||
func FindOccurrences(pattern, genome string) ([]int,error) { |
||||
locations := []int{} |
||||
slots := len(genome)-len(pattern)+1 |
||||
|
||||
if slots<1 { |
||||
// pattern is longer than genome
|
||||
return locations,nil |
||||
} |
||||
|
||||
// Loop over each character,
|
||||
// saving the position if it
|
||||
// is the start of pattern
|
||||
for i:=0; i<slots; i++ { |
||||
start := i |
||||
end := i+len(pattern) |
||||
if genome[start:end]==pattern { |
||||
locations = append(locations,i) |
||||
} |
||||
} |
||||
return locations,nil |
||||
} |
||||
|
||||
|
||||
////////////////////////////////
|
||||
// BA1E
|
||||
|
||||
// Find k-mers (patterns) of length k occuring at least
|
||||
// t times over an interval of length L in a genome.
|
||||
func FindClumps(genome string, k, L, t int) ([]string,error) { |
||||
|
||||
// Algorithm:
|
||||
// allocate a list of kmers
|
||||
// for each possible position of L window,
|
||||
// feed string L to KmerHistogram()
|
||||
// save any kmers with frequency > t
|
||||
// return master list of saved kmers
|
||||
|
||||
L_slots := len(genome)-L+1 |
||||
|
||||
// Set kmers
|
||||
kmers := map[string]bool{} |
||||
|
||||
// List kmers
|
||||
kmers_list := []string{} |
||||
|
||||
// Loop over each possible window of length L
|
||||
for iL:=0; iL<L_slots; iL++ { |
||||
|
||||
// Grab this portion of the genome
|
||||
winstart := iL |
||||
winend := iL+L |
||||
genome_window := genome[winstart:winend] |
||||
|
||||
// Get the number of kmers that occur more
|
||||
// frequently than t times
|
||||
new_kmers,err := MoreFrequentThanNKmers(genome_window,k,t) |
||||
if err!=nil { |
||||
return kmers_list,err |
||||
} |
||||
// Add these to the set kmers
|
||||
for _,new_kmer := range new_kmers { |
||||
kmers[new_kmer] = true |
||||
} |
||||
} |
||||
|
||||
for k := range kmers { |
||||
kmers_list = append(kmers_list,k) |
||||
} |
||||
sort.Strings(kmers_list) |
||||
|
||||
return kmers_list,nil |
||||
} |
||||
|
||||
|
||||
////////////////////////////////
|
||||
// BA1F
|
||||
|
||||
// The skew of a genome is the difference between
|
||||
// the number of G and C codons that have occurred
|
||||
// cumulatively in a given strand of DNA.
|
||||
// This function computes the positions in the genome
|
||||
// at which the cumulative skew is minimized.
|
||||
func MinSkewPositions(genome string) ([]int,error) { |
||||
|
||||
n := len(genome) |
||||
cumulative_skew := make([]int,n+1) |
||||
|
||||
// Get C/G bitmasks
|
||||
bitmasks,err := DNA2Bitmasks(genome) |
||||
if err!=nil { |
||||
return cumulative_skew,err |
||||
} |
||||
c := bitmasks["C"] |
||||
g := bitmasks["G"] |
||||
|
||||
// Init
|
||||
cumulative_skew[0] = 0 |
||||
|
||||
// Make space to keep track of the
|
||||
// minima we have encountered so far
|
||||
min := 999 |
||||
min_skew_ix := []int{} |
||||
|
||||
// At each position, compute the next skew value.
|
||||
// We need two indices b/c for a genome of size N,
|
||||
// the cumulative skew array index is of size N+1.
|
||||
for i,ibit:=1,0; i<=n; i,ibit=i+1,ibit+1 { |
||||
|
||||
var next int |
||||
// Next skew value
|
||||
if c[ibit] { |
||||
// C -1
|
||||
next = -1 |
||||
} else if g[ibit] { |
||||
// G +1
|
||||
next = 1 |
||||
} else { |
||||
next = 0 |
||||
} |
||||
cumulative_skew[i] = cumulative_skew[i-1] + next |
||||
|
||||
if cumulative_skew[i] < min { |
||||
// New min and min_skew
|
||||
min = cumulative_skew[i] |
||||
min_skew_ix = []int{i} |
||||
} else if cumulative_skew[i] == min { |
||||
// Additional min and min_skew
|
||||
min_skew_ix = append(min_skew_ix,i) |
||||
} |
||||
|
||||
} |
||||
return min_skew_ix,nil |
||||
} |
||||
|
||||
|
||||
////////////////////////////////
|
||||
// BA1G
|
||||
|
||||
// Compute the Hamming distance between
|
||||
// two strings. The Hamming distance is
|
||||
// defined as the number of characters
|
||||
// different between two strings.
|
||||
func HammingDistance(p, q string) (int,error) { |
||||
|
||||
// Technically a Hamming distance when
|
||||
// one string is empty would be 0, but
|
||||
// we will throw an error instead.
|
||||
if len(p)==0 || len(q)==0 { |
||||
err := fmt.Sprintf("Error: HammingDistance: one or more arguments had length 0. len(p) = %d, len(q) = %d",len(p),len(q)) |
||||
return -1,errors.New(err) |
||||
} |
||||
|
||||
// Get longest length common to both
|
||||
var m int |
||||
if len(p)>len(q) { |
||||
m = len(q) |
||||
} else { |
||||
m = len(p) |
||||
} |
||||
|
||||
// Accumulate distance
|
||||
dist := 0 |
||||
for i:=0; i<m; i++ { |
||||
if p[i]!=q[i] { |
||||
dist += 1 |
||||
} |
||||
} |
||||
return dist,nil |
||||
} |
||||
|
||||
|
||||
////////////////////////////////
|
||||
// BA1H
|
||||
|
||||
|
||||
// Given a large string (text) and a string (pattern),
|
||||
// find the zero-based indices where we have an occurrence
|
||||
// of pattern or a string with Hamming distance d or less
|
||||
// from pattern.
|
||||
func FindApproximateOccurrences(pattern, text string, d int) ([]int,error) { |
||||
|
||||
locations := []int{} |
||||
slots := len(text)-len(pattern)+1 |
||||
|
||||
if slots<1 { |
||||
// pattern is longer than genome
|
||||
return locations,nil |
||||
} |
||||
|
||||
// Loop over each character,
|
||||
// saving the position if it
|
||||
// is the start of pattern
|
||||
for i:=0; i<slots; i++ { |
||||
start := i |
||||
end := i+len(pattern) |
||||
poss_approx_pattern := text[start:end] |
||||
hamm,_ := HammingDistance(poss_approx_pattern,pattern) |
||||
if hamm<=d { |
||||
locations = append(locations,i) |
||||
} |
||||
} |
||||
|
||||
return locations,nil |
||||
} |
||||
|
||||
|
@ -0,0 +1,21 @@
@@ -0,0 +1,21 @@
|
||||
https://github.com/moul/euler |
||||
- use snakemake |
||||
|
||||
main.go is a cli: |
||||
- given a problem... |
||||
- print url for problem |
||||
- duration |
||||
- answer |
||||
- awesome go |
||||
|
||||
ba1c test |
||||
- not testing everything |
||||
- finish |
||||
|
||||
code coverage |
||||
- https://mlafeldt.github.io/blog/test-coverage-in-go/ |
||||
- go lint |
||||
- go test |
||||
|
||||
|
||||
|
@ -0,0 +1,95 @@
@@ -0,0 +1,95 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"bufio" |
||||
"fmt" |
||||
"os" |
||||
) |
||||
|
||||
// readLines reads a whole file into memory
|
||||
// and returns a slice of its lines.
|
||||
func readLines(path string) ([]string, error) { |
||||
file, err := os.Open(path) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
defer file.Close() |
||||
|
||||
var lines []string |
||||
scanner := bufio.NewScanner(file) |
||||
buf := make([]byte, 2) |
||||
|
||||
// This is awkward.
|
||||
// Scanners aren't good for big files,
|
||||
// just simple stuff.
|
||||
BIGNUMBER := 90000 |
||||
scanner.Buffer(buf, BIGNUMBER) |
||||
for scanner.Scan() { |
||||
lines = append(lines, scanner.Text()) |
||||
} |
||||
return lines, scanner.Err() |
||||
} |
||||
|
||||
// writeLines writes the lines to the given file.
|
||||
func writeLines(lines []string, path string) error { |
||||
file, err := os.Create(path) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
defer file.Close() |
||||
|
||||
w := bufio.NewWriter(file) |
||||
for _, line := range lines { |
||||
fmt.Fprintln(w, line) |
||||
} |
||||
return w.Flush() |
||||
} |
||||
|
||||
// Utility function: check if two string arrays/array slices
|
||||
// are equal. This is necessary because of squirrely
|
||||
// behavior when comparing arrays (of type [1]string)
|
||||
// and slices (of type []string).
|
||||
func EqualStringSlices(a, b []string) bool { |
||||
if len(a)!=len(b) { |
||||
return false |
||||
} |
||||
for i:=0; i<len(a); i++ { |
||||
if a[i] != b[i] { |
||||
return false |
||||
} |
||||
} |
||||
return true |
||||
} |
||||
|
||||
|
||||
// Utility function: check if two boolean arrays/array slices
|
||||
// are equal. This is necessary because of squirrely
|
||||
// behavior when comparing arrays (of type [1]bool)
|
||||
// and slices (of type []bool).
|
||||
func EqualBoolSlices(a, b []bool) bool { |
||||
if len(a)!=len(b) { |
||||
return false |
||||
} |
||||
for i:=0; i<len(a); i++ { |
||||
if a[i] != b[i] { |
||||
return false |
||||
} |
||||
} |
||||
return true |
||||
} |
||||
|
||||
// Utility function: check if two int arrays/array slices
|
||||
// are equal.
|
||||
func EqualIntSlices(a, b []int) bool { |
||||
if len(a)!=len(b) { |
||||
return false |
||||
} |
||||
for i:=0; i<len(a); i++ { |
||||
if a[i] != b[i] { |
||||
return false |
||||
} |
||||
} |
||||
return true |
||||
} |
||||
|
||||
|
Loading…
Reference in new issue