Compare commits
31 Commits
99 changed files with 50341 additions and 38 deletions
@ -0,0 +1,19 @@ |
|||||||
|
golibby |
||||||
|
queens |
||||||
|
chapter01/chapter01 |
||||||
|
|
||||||
|
# golang: |
||||||
|
|
||||||
|
# Binaries for programs and plugins |
||||||
|
*.exe |
||||||
|
*.exe~ |
||||||
|
*.dll |
||||||
|
*.so |
||||||
|
*.dylib |
||||||
|
|
||||||
|
# Test binary, build with `go test -c` |
||||||
|
*.test |
||||||
|
|
||||||
|
# Output of the go coverage tool, specifically when used with LiteIDE |
||||||
|
*.out |
||||||
|
|
@ -0,0 +1,14 @@ |
|||||||
|
# https://docs.travis-ci.com/user/languages/go/ |
||||||
|
language: go |
||||||
|
go: |
||||||
|
- 1.10.x |
||||||
|
- 1.11.x |
||||||
|
- tip |
||||||
|
|
||||||
|
install: true |
||||||
|
|
||||||
|
script: |
||||||
|
- go test -v ./rosalind/... |
||||||
|
- go test -v ./chapter1/... |
||||||
|
- go test -v ./chapter2/... |
||||||
|
- go test -v ./chapter3/... |
@ -0,0 +1,19 @@ |
|||||||
|
Copyright 2019 Charles Reid |
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of |
||||||
|
this software and associated documentation files (the "Software"), to deal in |
||||||
|
the Software without restriction, including without limitation the rights to |
||||||
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies |
||||||
|
of the Software, and to permit persons to whom the Software is furnished to do |
||||||
|
so, subject to the following conditions: |
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all |
||||||
|
copies or substantial portions of the Software. |
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||||
|
SOFTWARE. |
@ -0,0 +1,126 @@ |
|||||||
|
# go-rosalind |
||||||
|
|
||||||
|
`rosalind` is a Go (golang) package for solving bioinformatics problems. |
||||||
|
|
||||||
|
[![travis](https://img.shields.io/travis/charlesreid1/go-rosalind.svg)](https://travis-ci.org/charlesreid1/go-rosalind.svg) |
||||||
|
[![golang](https://img.shields.io/badge/language-golang-00ADD8.svg)](https://golang.org) |
||||||
|
[![license](https://img.shields.io/github/license/charlesreid1/go-rosalind.svg)](https://github.com/charlesreid1/go-rosalind/blob/master/LICENSE) |
||||||
|
[![godoc](https://godoc.org/github.com/charlesreid1/go-rosalind?status.svg)](http://godoc.org/github.com/charlesreid1/go-rosalind) |
||||||
|
|
||||||
|
## Summary |
||||||
|
|
||||||
|
This repo contains a Go (golang) library, `rosalind`, that implements |
||||||
|
functionality for solving bioinformatics problems. This is mainly |
||||||
|
useful for problems on Rosalind.info but is for general use as well. |
||||||
|
|
||||||
|
Rosalind problems are grouped by chapter. Each problem has its own |
||||||
|
function and is implemented in a library called `chapter1`, `chapter2`, |
||||||
|
etc. |
||||||
|
|
||||||
|
For example, Chapter 1 question A is implemented in package |
||||||
|
`chapter1` as the function `BA1a( <input-file-name> )`. |
||||||
|
This (specific) functionality wraps the (general purpose) |
||||||
|
`rosalind` library. |
||||||
|
|
||||||
|
## Quick Start |
||||||
|
|
||||||
|
### Rosalind |
||||||
|
|
||||||
|
The `rosalind` library can be installed using `go get`: |
||||||
|
|
||||||
|
``` |
||||||
|
go get https://github.com/charlesreid1/go-rosalind/rosalind |
||||||
|
``` |
||||||
|
|
||||||
|
The library can now be imported and its functions called directly. |
||||||
|
Here is a brief example: |
||||||
|
|
||||||
|
``` |
||||||
|
package main |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
func main() { |
||||||
|
input := "AAAATGCGCTAGTAAAAGTCACTGAAAA" |
||||||
|
k := 4 |
||||||
|
result, _ := rosalind.MostFrequentKmers(input, k) |
||||||
|
fmt.Println(result) |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
### Problem Sets |
||||||
|
|
||||||
|
Each set of problems is grouped into its own package. These |
||||||
|
packages import the `rosalind` package, so it should be |
||||||
|
available. |
||||||
|
|
||||||
|
You can install the Chapter 1 problem set, for example, like so: |
||||||
|
|
||||||
|
``` |
||||||
|
go get https://github.com/charlesreid1/go-rosalind/chapter1 |
||||||
|
``` |
||||||
|
|
||||||
|
This can now be imported and used in any Go program. |
||||||
|
|
||||||
|
Try creating a `main.go` file in a temporary directory, |
||||||
|
and run it with `go run main.go`: |
||||||
|
|
||||||
|
``` |
||||||
|
package main |
||||||
|
|
||||||
|
import ( |
||||||
|
rch1 "github.com/charlesreid1/go-rosalind/chapter1" |
||||||
|
) |
||||||
|
|
||||||
|
func main() { |
||||||
|
filename := "rosalind_ba1a.txt" |
||||||
|
rch1.BA1a(filename) |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
Assuming an input file `rosalind_ba1a.txt` is available, |
||||||
|
you should see a problem description and the output of |
||||||
|
the problem, which can be copied and pasted into |
||||||
|
Rosalind.info: |
||||||
|
|
||||||
|
``` |
||||||
|
$ go run main.go |
||||||
|
|
||||||
|
----------------------------------------- |
||||||
|
Rosalind: Problem BA1a: |
||||||
|
Most Frequest k-mers |
||||||
|
|
||||||
|
Given an input string and a length k, |
||||||
|
report the k-mer or k-mers that occur |
||||||
|
most frequently. |
||||||
|
|
||||||
|
URL: http://rosalind.info/problems/ba1a/ |
||||||
|
|
||||||
|
|
||||||
|
Computed result from input file: for_real/rosalind_ba1a.txt |
||||||
|
39 |
||||||
|
``` |
||||||
|
|
||||||
|
## Command Line Interface |
||||||
|
|
||||||
|
TBA |
||||||
|
|
||||||
|
## Organization |
||||||
|
|
||||||
|
The repo contains the following directories: |
||||||
|
|
||||||
|
* `rosalind/` - code and functions for the Rosalind library |
||||||
|
|
||||||
|
* `chapter1/` - solutions to chapter 1 questions (utilizes `rosalind` library) |
||||||
|
|
||||||
|
* `chapter2/` - solutions to chapter 2 questions |
||||||
|
|
||||||
|
* `chapter3/` - solutions to chapter 3 questions |
||||||
|
|
||||||
|
* `stronghold/` - solutions to questions from the stronghold section of Rosalind.info |
||||||
|
|
||||||
|
See the Readme file in each respective directory for more info. |
||||||
|
|
@ -1,38 +0,0 @@ |
|||||||
package main |
|
||||||
|
|
||||||
import "fmt" |
|
||||||
|
|
||||||
// Rosalind: Problem BA1A
|
|
||||||
//
|
|
||||||
// To run:
|
|
||||||
//
|
|
||||||
// $ go run ba1a.go
|
|
||||||
|
|
||||||
func pattern_count(input string, pattern string) int { |
|
||||||
|
|
||||||
// Number of substring overlaps
|
|
||||||
var overlap = len(input) - len(pattern) + 1 |
|
||||||
|
|
||||||
// Count of occurrences
|
|
||||||
count:=0 |
|
||||||
|
|
||||||
// Loop over each substring overlap
|
|
||||||
for i:=0; i<overlap; i++ { |
|
||||||
// Grab a slice of the full input
|
|
||||||
start:=i |
|
||||||
end:=i+len(pattern) |
|
||||||
var slice = input[start:end] |
|
||||||
if slice==pattern { |
|
||||||
count += 1 |
|
||||||
} |
|
||||||
} |
|
||||||
return count |
|
||||||
} |
|
||||||
|
|
||||||
func main() { |
|
||||||
// Call the pattern_count function
|
|
||||||
fmt.Println("Number of occurrences of GCG in GCGCG:") |
|
||||||
res := pattern_count("GCGCG","GCG") |
|
||||||
fmt.Println(res) |
|
||||||
} |
|
||||||
|
|
@ -0,0 +1,69 @@ |
|||||||
|
# Rosalind Chapter 1 |
||||||
|
|
||||||
|
This folder contains the `chapter1` module, which |
||||||
|
provides functions for each of the problems from |
||||||
|
Chapter 1 of Rosalind.info's Bionformatics Textbook |
||||||
|
track. |
||||||
|
|
||||||
|
## How to run |
||||||
|
|
||||||
|
* Each problem has its own function (example: `BA1a(...)`) |
||||||
|
|
||||||
|
* Each problem expects an input file |
||||||
|
(example input files in `for_real` directory, |
||||||
|
or provide the input file downloaded |
||||||
|
from Rosalind.info) |
||||||
|
|
||||||
|
* Pass the input file name to the function, like this: |
||||||
|
`BA1a("rosalind_ba1a.txt")` |
||||||
|
|
||||||
|
## Quick Start |
||||||
|
|
||||||
|
To use the functions in this package, start by installing it: |
||||||
|
|
||||||
|
``` |
||||||
|
go get github.com/charlesreid1/go-rosalind/chapter1 |
||||||
|
``` |
||||||
|
|
||||||
|
Once you have installed the `chapter1` package, |
||||||
|
you can import it, then call the function for whichever |
||||||
|
Rosalind.info problem you want to solve from Chapter 1: |
||||||
|
|
||||||
|
``` |
||||||
|
package main |
||||||
|
|
||||||
|
import ( |
||||||
|
rch1 "github.com/charlesreid1/go-rosalind/chapter1" |
||||||
|
) |
||||||
|
|
||||||
|
func main() { |
||||||
|
rch1.BA1a("rosalind_ba1a.txt") |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
## Examples |
||||||
|
|
||||||
|
See `chapter1_test.go` for examples. |
||||||
|
|
||||||
|
## Tests |
||||||
|
|
||||||
|
To run tests of all Chapter 1 problems, run |
||||||
|
`go test` from this directory: |
||||||
|
|
||||||
|
``` |
||||||
|
go test -v |
||||||
|
``` |
||||||
|
|
||||||
|
or, from the parent directory, the root of the |
||||||
|
go-rosalind repository: |
||||||
|
|
||||||
|
``` |
||||||
|
go test -v ./chapter1/... |
||||||
|
``` |
||||||
|
|
||||||
|
Note that this solves every problem in |
||||||
|
Chapter 1 and prints the solutions (so there |
||||||
|
is a lot of spew). It does not check the |
||||||
|
solutions (for that, see the tests in the |
||||||
|
`rosalind` library.) |
||||||
|
|
@ -0,0 +1,55 @@ |
|||||||
|
package rosalindchapter1 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Rosalind: Problem BA1a: Most Frequent k-mers
|
||||||
|
|
||||||
|
// Describe the problem
|
||||||
|
func BA1aDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA1a:", |
||||||
|
"Most Frequest k-mers", |
||||||
|
"", |
||||||
|
"Given an input string and a length k,", |
||||||
|
"report the k-mer or k-mers that occur", |
||||||
|
"most frequently.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba1a/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describe the problem,
|
||||||
|
// print the name of the input file,
|
||||||
|
// print the output/result
|
||||||
|
func BA1a(filename string) { |
||||||
|
|
||||||
|
BA1aDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
var input, pattern string |
||||||
|
input = lines[0] |
||||||
|
pattern = lines[1] |
||||||
|
|
||||||
|
result := rosa.PatternCount(input, pattern) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(result) |
||||||
|
} |
@ -0,0 +1,59 @@ |
|||||||
|
package rosalindchapter1 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Rosalind: Problem BA1b: Most Frequent k-mers
|
||||||
|
|
||||||
|
// Describe the problem
|
||||||
|
func BA1bDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA1b:", |
||||||
|
"Most Frequest k-mers", |
||||||
|
"", |
||||||
|
"Given an input string and a length k,", |
||||||
|
"report the k-mer or k-mers that occur", |
||||||
|
"most frequently.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba1b/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describe the problem, and call the function
|
||||||
|
func BA1b(filename string) { |
||||||
|
|
||||||
|
BA1bDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
input := lines[0] |
||||||
|
k_str := lines[1] |
||||||
|
|
||||||
|
k, err := strconv.Atoi(k_str) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: string to int conversion: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
mfks, _ := rosa.MostFrequentKmers(input, k) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(strings.Join(mfks, " ")) |
||||||
|
} |
@ -0,0 +1,51 @@ |
|||||||
|
package rosalindchapter1 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Rosalind: Problem BA1c: Find the Reverse Complement of a String
|
||||||
|
|
||||||
|
// Describe the problem
|
||||||
|
func BA1cDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA1c:", |
||||||
|
"Find the Reverse Complement of a String", |
||||||
|
"", |
||||||
|
"Given a DNA input string,", |
||||||
|
"find the reverse complement", |
||||||
|
"of the DNA string.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba1c/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describe the problem, and call the function
|
||||||
|
func BA1c(filename string) { |
||||||
|
|
||||||
|
BA1cDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
input := lines[0] |
||||||
|
|
||||||
|
result, _ := rosa.ReverseComplement(input) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(result) |
||||||
|
} |
@ -0,0 +1,61 @@ |
|||||||
|
package rosalindchapter1 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Rosalind: Problem BA1d: Find all occurrences of pattern in string
|
||||||
|
|
||||||
|
// Describe the problem
|
||||||
|
func BA1dDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA1d:", |
||||||
|
"Find all occurrences of pattern in string", |
||||||
|
"", |
||||||
|
"Given a string input (genome) and a substring (pattern),", |
||||||
|
"return all starting positions in the genome where the", |
||||||
|
"pattern occurs in the genome.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba1d/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describe the problem, and call the function
|
||||||
|
func BA1d(filename string) { |
||||||
|
|
||||||
|
BA1dDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
pattern := lines[0] |
||||||
|
genome := lines[1] |
||||||
|
|
||||||
|
// Result is a slice of ints
|
||||||
|
locs, _ := rosa.FindOccurrences(pattern, genome) |
||||||
|
|
||||||
|
// Convert to a slice of strings for easier printing
|
||||||
|
locs_str := make([]string, len(locs)) |
||||||
|
for i, j := range locs { |
||||||
|
locs_str[i] = strconv.Itoa(j) |
||||||
|
} |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(strings.Join(locs_str, " ")) |
||||||
|
} |
@ -0,0 +1,59 @@ |
|||||||
|
package rosalindchapter1 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Rosalind: Problem BA1e: Find patterns forming clumps in a string
|
||||||
|
|
||||||
|
// Describe the problem
|
||||||
|
func BA1eDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA1e:", |
||||||
|
"Find patterns forming clumps in a string", |
||||||
|
"", |
||||||
|
"A clump is characterized by integers L and t", |
||||||
|
"if there is an interval in the genome of length L", |
||||||
|
"in which a given pattern occurs t or more times.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba1e/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describe the problem, and call the function
|
||||||
|
func BA1e(filename string) { |
||||||
|
|
||||||
|
BA1eDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
genome := lines[0] |
||||||
|
params_str := lines[1] |
||||||
|
params_slice := strings.Split(params_str, " ") |
||||||
|
|
||||||
|
k, _ := strconv.Atoi(params_slice[0]) |
||||||
|
L, _ := strconv.Atoi(params_slice[1]) |
||||||
|
t, _ := strconv.Atoi(params_slice[2]) |
||||||
|
|
||||||
|
patterns, _ := rosa.FindClumps(genome, k, L, t) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(strings.Join(patterns, " ")) |
||||||
|
} |
@ -0,0 +1,61 @@ |
|||||||
|
package rosalindchapter1 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Rosalind: Problem BA1f: Find positions in a gene that minimizing skew
|
||||||
|
|
||||||
|
// Describe the problem
|
||||||
|
func BA1fDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA1f:", |
||||||
|
"Find positions in a gene that minimize skew", |
||||||
|
"", |
||||||
|
"The skew of a genome is defined as the difference", |
||||||
|
"between the number of C codons and the number of G", |
||||||
|
"codons. Given a DNA string, this function should", |
||||||
|
"compute the cumulative skew for each position in", |
||||||
|
"the genome, and report the indices where the skew", |
||||||
|
"value is minimzed.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba1f/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describe the problem, and call the function
|
||||||
|
func BA1f(filename string) { |
||||||
|
|
||||||
|
BA1fDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
genome := lines[0] |
||||||
|
|
||||||
|
minskew, _ := rosa.MinSkewPositions(genome) |
||||||
|
|
||||||
|
minskew_str := make([]string, len(minskew)) |
||||||
|
for i, j := range minskew { |
||||||
|
minskew_str[i] = strconv.Itoa(j) |
||||||
|
} |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(strings.Join(minskew_str, " ")) |
||||||
|
} |
@ -0,0 +1,53 @@ |
|||||||
|
package rosalindchapter1 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Rosalind: Problem BA1g: Find Hamming distance between two DNA strings
|
||||||
|
|
||||||
|
// Describe the problem
|
||||||
|
func BA1gDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA1g:", |
||||||
|
"Find Hamming distance between two DNA strings", |
||||||
|
"", |
||||||
|
"The Hamming distance between two strings HammingDistance(p,q)", |
||||||
|
"is the number of characters different between the two", |
||||||
|
"strands. This program computes the Hamming distance", |
||||||
|
"between two strings.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba1g/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describe the problem, and call the function
|
||||||
|
func BA1g(filename string) { |
||||||
|
|
||||||
|
BA1gDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
p := lines[0] |
||||||
|
q := lines[1] |
||||||
|
|
||||||
|
hamm, _ := rosa.HammingDistance(p, q) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(hamm) |
||||||
|
} |
@ -0,0 +1,66 @@ |
|||||||
|
package rosalindchapter1 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Rosalind: Problem BA1h: Find approximate occurrences of pattern in string
|
||||||
|
|
||||||
|
// Describe the problem
|
||||||
|
func BA1hDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA1h:", |
||||||
|
"Find approximate occurrences of pattern in string", |
||||||
|
"", |
||||||
|
"Given a string Text and a string Pattern, and a maximum", |
||||||
|
"Hamming distance d, return all locations in Text where", |
||||||
|
"there is an approximate match with Pattern (i.e., a pattern", |
||||||
|
"with a Hamming distance from Pattern of d or less).", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba1h/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describe the problem, and call the function
|
||||||
|
func BA1h(filename string) { |
||||||
|
|
||||||
|
BA1hDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
pattern := lines[0] |
||||||
|
text := lines[1] |
||||||
|
d_str := lines[2] |
||||||
|
|
||||||
|
d, _ := strconv.Atoi(d_str) |
||||||
|
|
||||||
|
approx, _ := rosa.FindApproximateOccurrences(pattern, text, d) |
||||||
|
|
||||||
|
approx_str := make([]string, len(approx)) |
||||||
|
for i, j := range approx { |
||||||
|
approx_str[i] = strconv.Itoa(j) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: conversion from int to string: %v", err) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(strings.Join(approx_str, " ")) |
||||||
|
} |
@ -0,0 +1,70 @@ |
|||||||
|
package rosalindchapter1 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Rosalind: Problem BA1i: Most Frequent Words with Mismatches
|
||||||
|
|
||||||
|
// Describe the problem
|
||||||
|
func BA1iDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA1i:", |
||||||
|
"Most Frequent Words with Mismatches", |
||||||
|
"", |
||||||
|
"Given an input string and a maximum allowable", |
||||||
|
"Hamming distance d, report the most frequent", |
||||||
|
"kmer that either occurs or whose Hamming neighbors", |
||||||
|
"occur most frequently.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba1i/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describe the problem, and call the function
|
||||||
|
func BA1i(filename string) { |
||||||
|
|
||||||
|
BA1iDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
input := lines[0] |
||||||
|
params := strings.Split(lines[1], " ") |
||||||
|
if len(params) < 1 { |
||||||
|
log.Fatalf("Error splitting second line: only found 0-1 tokens") |
||||||
|
} |
||||||
|
|
||||||
|
k_str, d_str := params[0], params[1] |
||||||
|
|
||||||
|
k, err := strconv.Atoi(k_str) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: string to int conversion for parameter k: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
d, err := strconv.Atoi(d_str) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: string to int conversion for parameter d: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
mfks_mis, _ := rosa.MostFrequentKmersMismatches(input, k, d) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(strings.Join(mfks_mis, " ")) |
||||||
|
} |
@ -0,0 +1,71 @@ |
|||||||
|
package rosalindchapter1 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Rosalind: Problem BA1j: Most Frequent Words with Mismatches and Reverse Complements
|
||||||
|
|
||||||
|
// Describe the problem
|
||||||
|
func BA1jDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA1j:", |
||||||
|
"Most Frequent Words with Mismatches and Reverse Complements", |
||||||
|
"", |
||||||
|
"Given an input string and a maximum allowable", |
||||||
|
"Hamming distance d, report the most frequent", |
||||||
|
"kmer that either occurs or whose Hamming neighbors", |
||||||
|
"occur most frequently in the input string and in the", |
||||||
|
"reverse complement of the input string.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba1j/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describe the problem, and call the function
|
||||||
|
func BA1j(filename string) { |
||||||
|
|
||||||
|
BA1jDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
input := lines[0] |
||||||
|
params := strings.Split(lines[1], " ") |
||||||
|
if len(params) < 1 { |
||||||
|
log.Fatalf("Error splitting second line: only found 0-1 tokens") |
||||||
|
} |
||||||
|
|
||||||
|
k_str, d_str := params[0], params[1] |
||||||
|
|
||||||
|
k, err := strconv.Atoi(k_str) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: string to int conversion for parameter k: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
d, err := strconv.Atoi(d_str) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: string to int conversion for parameter d: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
mfks_mis, _ := rosa.MostFrequentKmersMismatchesRevComp(input, k, d) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(strings.Join(mfks_mis, " ")) |
||||||
|
} |
@ -0,0 +1,62 @@ |
|||||||
|
package rosalindchapter1 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Rosalind: Problem BA1k: Generate Frequency Array
|
||||||
|
|
||||||
|
// Describe the problem
|
||||||
|
func BA1kDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA1k:", |
||||||
|
"Generate Frequency Array", |
||||||
|
"", |
||||||
|
"Given an integer k, generate the frequency array of", |
||||||
|
"an input string. The frequency array is an array of", |
||||||
|
"counts with one count per index, and integers mapped", |
||||||
|
"to kmers.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba1k/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describe the problem, and call the function
|
||||||
|
func BA1k(filename string) { |
||||||
|
|
||||||
|
BA1kDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
input := lines[0] |
||||||
|
k_str := lines[1] |
||||||
|
|
||||||
|
k, err := strconv.Atoi(k_str) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: string to int conversion for parameter k: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
arr, _ := rosa.FrequencyArray(input, k) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
for _, e := range arr { |
||||||
|
fmt.Print(e, " ") |
||||||
|
} |
||||||
|
//fmt.Println(strings.Join(arr, " "))
|
||||||
|
} |
@ -0,0 +1,51 @@ |
|||||||
|
package rosalindchapter1 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Rosalind: Problem BA1L: Pattern to Number
|
||||||
|
|
||||||
|
// Describe the problem
|
||||||
|
func BA1LDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA1L:", |
||||||
|
"Pattern to Number", |
||||||
|
"", |
||||||
|
"Given an input kmer of length k, convert it to", |
||||||
|
"an integer corresponding to its lexicographic", |
||||||
|
"order among kmers of length k.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba1l/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describe the problem, and call the function
|
||||||
|
func BA1L(filename string) { |
||||||
|
|
||||||
|
BA1LDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
input := lines[0] |
||||||
|
|
||||||
|
number, _ := rosa.PatternToNumber(input) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(number) |
||||||
|
} |
@ -0,0 +1,62 @@ |
|||||||
|
package rosalindchapter1 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Rosalind: Problem BA1m: Pattern to Number
|
||||||
|
|
||||||
|
// Describe the problem
|
||||||
|
func BA1mDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA1m:", |
||||||
|
"Number to Pattern", |
||||||
|
"", |
||||||
|
"Given an integer and a kmer length k, convert", |
||||||
|
"the integer to its corresponding kmer.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba1m/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describe the problem, and call the function
|
||||||
|
func BA1m(filename string) { |
||||||
|
|
||||||
|
BA1mDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
number_str := lines[0] |
||||||
|
k_str := lines[1] |
||||||
|
|
||||||
|
number, err := strconv.Atoi(number_str) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: string to int conversion for number: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
k, err := strconv.Atoi(k_str) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: string to int conversion for k: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
result, _ := rosa.NumberToPattern(number, k) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(result) |
||||||
|
} |
@ -0,0 +1,60 @@ |
|||||||
|
package rosalindchapter1 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Rosalind: Problem BA1n: Calculating d-Neighborhood of String
|
||||||
|
|
||||||
|
// Describe the problem
|
||||||
|
func BA1nDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA1n:", |
||||||
|
"Calculating d-Neighborhood of String", |
||||||
|
"", |
||||||
|
"Given an input string of DNA and a Hamming", |
||||||
|
"distance d, compute all DNA strings that", |
||||||
|
"are a Hamming distance of up to d away.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba1n/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Describe the problem, and call the function
|
||||||
|
func BA1n(filename string) { |
||||||
|
|
||||||
|
BA1nDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
input := lines[0] |
||||||
|
d_str := lines[1] |
||||||
|
|
||||||
|
d, err := strconv.Atoi(d_str) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error: string to int conversion for d: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
result, _ := rosa.VisitHammingNeighbors(input, d) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
for _, j := range result { |
||||||
|
fmt.Println(j) |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,20 @@ |
|||||||
|
package rosalindchapter1 |
||||||
|
|
||||||
|
import "testing" |
||||||
|
|
||||||
|
func TestChapter01(t *testing.T) { |
||||||
|
BA1a("for_real/rosalind_ba1a.txt") |
||||||
|
BA1b("for_real/rosalind_ba1b.txt") |
||||||
|
BA1c("for_real/rosalind_ba1c.txt") |
||||||
|
BA1d("for_real/rosalind_ba1d.txt") |
||||||
|
BA1e("for_real/rosalind_ba1e.txt") |
||||||
|
BA1f("for_real/rosalind_ba1f.txt") |
||||||
|
BA1g("for_real/rosalind_ba1g.txt") |
||||||
|
BA1h("for_real/rosalind_ba1h.txt") |
||||||
|
BA1i("for_real/rosalind_ba1i.txt") |
||||||
|
BA1j("for_real/rosalind_ba1j.txt") |
||||||
|
BA1k("for_real/rosalind_ba1k.txt") |
||||||
|
BA1L("for_real/rosalind_ba1l.txt") |
||||||
|
BA1m("for_real/rosalind_ba1m.txt") |
||||||
|
BA1n("for_real/rosalind_ba1n.txt") |
||||||
|
} |
@ -0,0 +1,2 @@ |
|||||||
|
GTCCGGGGTCCGGGGTCCCGGGGTTACCCGGGGTCCCGGGGTCCGGGGTTACCCGGGGTAACCGGGGTCACCGGGGTCTCCGCCGGGGTGGCCGGGGTACCCGGGGTCCCGGGGTCCGGGGTCCGGGGTGCACGGGCCGGGGTGTACCGGGGTCCGGGGTCGGCGTCCGGGGTCCGGGGTCTGGAAGTTGAACTGACACCGGGGTTCCGGGGTCCGGGGTCCGGGGTCCGGGGTGGAACCCGGGGTGTCCCGGGGTCCGGGGTCCGGGGTCCGGGGTTCCGGGGTAGGCCGGGGTCCCGGGGTAGTGTGTGCCGGGGTCCTCGCCGGGGTAGCGCAAAACCGGGGTGCGGTAACTACCGGGGTCCGGGGTGCCGGGGTCCGGGGTTCCGGGGTCCGGGGTTCTCCGGGGTCCCGGGGTAGCCCGGGGTATCCGGGGTCCGGGGTCCGGGGTCCGGGGTACCGGGGTCAGGGCCGGGGTGACCGGGGTTCCGGGGTATCTGTTTATCCCGGGGTCCGGGGTCGGTAAACCGTCCGGGGTTTACCCCGGGGTCCGGGGTCTCGATCAAACCGGGGTTATGAGAATCCGGGGTCCGGGGTCCCGGGGTAGACCGGGGTACATCCCGGGGTGTCCGGGGTTACAAGCCGGGGTCCAAACGATTCCCGGGGTCCGGGGTTGCCCCGGGGTCCGGGGTGATGCACCGGGGTAAGCCGGGGTTGACGACCCCGGGGTCGCCGGGGTCTGCACTCCGGGGTTCCGGGGTAGCCGGGGTCAACCGGGGTAACCGGGGTTTGCCGGGGTCCCGGGGTTTGTCCGGGGTCCACCGGGGTCCGGGGTGCCGGGGTTCTACCGGGGTGCCGGGGTACACCGGGGTAGCCGGGGTATCCGGGGTACCGGGGTAAACCGGGGTGCCGGGGTCCGGGGTCCGGGGTTCCCGGGGTTTCTACCGGGGTGGGACCGGGGTCCGGGGTCCGGGGTATTAACCACCGGGGTGCGACCGGGGTGGCCGGGGTCCGGGGTATCCGGGGTACATCCGGGGTACGG |
||||||
|
CCGGGGTCC |
@ -0,0 +1,2 @@ |
|||||||
|
AGTAGGTTCAGGGCGTTTAATAGCGAAAACAAATAATAGCAGTAGGTTGTACCACGTACCACTAATAGCGAAAACAAAAGTAGGTTCAGGGCGTTGTACCACGTACCACGAAAACAAATAATAGCTAATAGCGAAAACAAAGTACCACGAAAACAAATAATAGCGAAAACAAAGTACCACCAGGGCGTTTAATAGCGAAAACAAATAATAGCTAATAGCTAATAGCAGTAGGTTCAGGGCGTTGTACCACGAAAACAAAGAAAACAAAGTACCACTAATAGCCAGGGCGTTAGTAGGTTGAAAACAAACAGGGCGTTAGTAGGTTCAGGGCGTTGTACCACTAATAGCTAATAGCGAAAACAAATAATAGCTAATAGCTAATAGCCAGGGCGTTGTACCACGAAAACAAACAGGGCGTTTAATAGCAGTAGGTTGAAAACAAATAATAGCCAGGGCGTTGTACCACGAAAACAAAGAAAACAAAGTACCACCAGGGCGTTAGTAGGTTGTACCACGTACCACGAAAACAAAGTACCACGAAAACAAATAATAGCGTACCACGAAAACAAAAGTAGGTTTAATAGCGAAAACAAAAGTAGGTTAGTAGGTTCAGGGCGTTTAATAGCTAATAGCGAAAACAAAGTACCACCAGGGCGTTTAATAGCTAATAGCGTACCACCAGGGCGTTGTACCACGTACCACGAAAACAAAGAAAACAAAAGTAGGTTTAATAGCAGTAGGTTAGTAGGTTAGTAGGTTTAATAGCGAAAACAAACAGGGCGTTTAATAGCGTACCACGTACCACGAAAACAAAGAAAACAAAGTACCACCAGGGCGTTTAATAGCGAAAACAAACAGGGCGTTGAAAACAAA |
||||||
|
12 |
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1,2 @@ |
|||||||
|
ATGACTAGTTATGCGACACGTGTTCCTTAAACAAACCGCTGATTGCGGAGGGATCATGTTGAAACGCAGTCAGTTGGCGCTTTACAAGAATTTAAGTGTCCCTCGGAGATGCTCCACTACACGCCATGGCGAAACGGTTCAGTCTCTTAGAAGAAGAAAGATATAGGAGTTGCGCCACCGTGATATAAGCACCGCAGTATCTGAAGGGAGCACAACTTGCTGCGAACAGACTGGTACGGTTACGTCGGGGCTTCAGGCATCGTTGGCGAGGTAGGAATCCTTATGTTAATTTTAAATCGAAGCAAAACAGAACTGTTGATCACTCATGTGTCGTTAACCGGAAGACTGCGGGTGCTCAGCCCCAATCGACGGCTGTTAGGAATGGCACACTACTGTATTTGTGACGACTAACTTGACATTCGAAGGTATCTGCGGTTGTTAAACGCCGATAATCGCCACCGCAGTTCTGAAAGGCTATATGTATCACGGTGATTTACGGCATTGTAAGCCCACTCAGAGTGCGTCGTAGGTTACGCGTTCTGAGTTGAAATAATCCAGTCGAACACGGTTGGTATCATGAATTCAGACTACCGTTTCTTGACTCCCGTCCTATACGAGTCTAGAGCGAACTTCGGGGTAAGAAATCACAATTAATCTCTTCCTTGTGTGATCCGCAAGGAAGCTGAGCTCAATTTGCAAGTACAGGTAGGTGGCAATCGAGAGCTACTAACACTCTTGTGTCGTTCGTAATTCATAAATAAAAAGACACGCCCTTATGATTGAAGCCTGAACTGCGGCAACGGTAGGTTTCCAAAGAGGATCGAGTCAGCGATACCCCCTGTACGCAGACAGATTATTACCCCCACTCTGCAATGTAGAAGTCTTAAAAACGCACTCTAGGCCAGTAACCAACCAGCTGGGTGGTGCGTTACCTAGTGCTATACAACAGTACCACCAGATTAGAAGCATGCCAGGTGTCTCGACACCTCCAATTCGTCATTTGGTGTGAGAAAAAGATATACCGCCAAGTTGCCATACCTGCAC |
||||||
|
ATGGACTATTTTGCTATACGATTACTAGGAATAAGTTGAACAACCCTTGCTTTTCTTTTTAACACAGCCAGAGGCTCGGGATGGAACGCGTCATCTCGCGGACTCAGAGATGCCAGATGGTAGGCCTCTTCCAACGAGTAACTTACGATAATTTGATAGATTCTTGAACGTAGTGTGTCGACCTCCGTACCGGAAAATTTTCTTATCTCTAGTGAACCGTCGAGCTGTACTTTAGACCCCTGTGCGACGATAGGTCTCCTGCGTTAGGTATTTTACATATTCCGCTGGGACCCAAATTTTTCCCGCGAACGGGATAGAGGTAGTATCTAACTTCGTTTACACAACGTAACATCCCGCCATGGTCGTTACGGGCGTACCCGCGCGGCGAAGGGCGCGGACCCGCGAATCATAAACTAAGAAAAGAGTATGTTGAAGCGCACCCGCCATGTCGCTCGACATCTGCCTGGCATGCTATAATACCTGCTGAGCAGTACATCCACGGCGTCTATGAGCGCCACGTCAATCGGATCAGCCGGAATGCTGATTCTGTTGTGTCGCCGTTATGAATTTGGAGGTGGCACGCAAGGTTCCAGCCCTGTATAGTGTGTTAAAGTCCACTTTTCATCATTGCTTAATGTTTAATCGGGTCCTCACCCGAAACTGTGATTGCGTTCTTATGTAAAGCTCTCGTTAGCAGACACCAATCTATGAAACTTCCGCCTCGGGCAACTTTCATGAGGCACTGTAACATTTGTTGCATAGAGCCGTACTATGGCCACCGTATTTTATATGGCTGACGTAAAGAGCCTGTTAATGTGTAATTCGAAGGTCCCTTTAGATGAGTCTCATGCCAGACCCAGAAGAGTGACGGCTGTCTCGGAGTGGGTATACGTTAGCCCCTGCCAATAGTAAAGCGTACACCTTGTCTTCAAGACTGTCACTGACACAAATTCCCCGACCCATATTCCGTTCCGGGTTGGTCTACCTTACGGCGGGAATCCAGAGGCCTAATGCGCTGGTTATATACCACCGGATCCCGATATA |
File diff suppressed because one or more lines are too long
@ -0,0 +1,2 @@ |
|||||||
|
CAGTGTAAGTAACGGATTGAGGACGTAACGGACTAGTATTCGAGGACAGTGTAATTGAGGACGTAACGGAGTAACGGATCGAGGACTAGTATCAGTGTAATTGAGGACGTAACGGAGTAACGGACAGTGTAACAGTGTAACTAGTATGTAACGGACAGTGTAAGTAACGGAGTAACGGAGTAACGGATCGAGGATTGAGGACCTAGTATCTAGTATTCGAGGATCGAGGATTGAGGACCTAGTATCTAGTATGTAACGGATTGAGGACTTGAGGACCTAGTATTCGAGGATCGAGGAGTAACGGACAGTGTAACAGTGTAATCGAGGATCGAGGACAGTGTAATTGAGGACTCGAGGACTAGTATTTGAGGACTCGAGGATTGAGGACGTAACGGAGTAACGGATCGAGGACTAGTATGTAACGGAGTAACGGACAGTGTAACTAGTATTTGAGGACCAGTGTAACAGTGTAACAGTGTAACAGTGTAACAGTGTAACTAGTATGTAACGGAGTAACGGATTGAGGACGTAACGGAGTAACGGATCGAGGATTGAGGACCTAGTATTTGAGGACGTAACGGATTGAGGACCTAGTATCTAGTATCAGTGTAACTAGTATGTAACGGATCGAGGATCGAGGACAGTGTAATTGAGGACTTGAGGACCAGTGTAATCGAGGATTGAGGACTTGAGGACTTGAGGACTCGAGGACAGTGTAAGTAACGGAGTAACGGATCGAGGACAGTGTAATTGAGGACCTAGTATTTGAGGACCTAGTATGTAACGGATTGAGGACCAGTGTAACTAGTATCTAGTATCTAGTATCAGTGTAATTGAGGACTCGAGGATTGAGGAC |
||||||
|
6 2 |
@ -0,0 +1,2 @@ |
|||||||
|
TTACTCGCTGGCAGGTTGACGGAGAAATATTGGTGACGGAGAAGACGGAGAATGGGCATATATTGGTTGGCAGGTTTGGGCATTTACTCGCGACGGAGAATTACTCGCTGGGCATTTACTCGCTGGGCATTTACTCGCTGGCAGGTTTGGCAGGTTATATTGGTATATTGGTATATTGGTTGGGCATTTACTCGCGACGGAGAATGGCAGGTTGACGGAGAAGACGGAGAAATATTGGTTTACTCGCATATTGGTGACGGAGAAATATTGGTTTACTCGCTTACTCGCTGGGCATTGGGCATTGGCAGGTTGACGGAGAAGACGGAGAATTACTCGCATATTGGTTTACTCGCGACGGAGAATTACTCGCATATTGGTGACGGAGAAGACGGAGAATTACTCGCTGGCAGGTTTGGGCATTGGGCATTTACTCGCTGGCAGGTTTGGGCATTGGCAGGTTGACGGAGAAGACGGAGAATGGCAGGTTTGGCAGGTTTGGCAGGTTTGGCAGGTTTGGGCATGACGGAGAATTACTCGCTGGCAGGTTTTACTCGCTGGCAGGTTTTACTCGCATATTGGTTGGCAGGTTTTACTCGCTTACTCGCTTACTCGCGACGGAGAAGACGGAGAAATATTGGTATATTGGTATATTGGTTGGCAGGTTTGGCAGGTTTGGCAGGTTATATTGGTTTACTCGCTTACTCGCATATTGGTTGGCAGGTTTGGGCATTGGCAGGTTTGGCAGGTTGACGGAGAATGGCAGGTTGACGGAGAAGACGGAGAATGGGCATTGGGCATGACGGAGAATGGCAGGTT |
||||||
|
5 3 |
@ -0,0 +1,2 @@ |
|||||||
|
CAATGAGTGATATTGTTTGGTAGCAATCCATAGTTGAGGCCCTACGGAAGTTGCATCCGGGGCCCGTAGGACTCGCGGGCAAAAGATTGCTAAGCATTCTTGGTCACCATCGCAGTATTGCTCGTAGTCGGGTGGGTTTGCCGAACTGATAATGTGCCAGTCCCCGCGGAACCGGAATCAGGGCAACGGCTAGAGATACTCTCCGTGGGTCCTAAGTAGGAGGCTTGGGGCTGAGTGAGCAACCACTTACTCGAGTGTGTTGTTTTCTGTGCGTCCCCCGGGCGGTGTTCATTTAAGGATGACCGGGTGAGTAACCGAACAATTTTGTTGCCATGAAACGCGGCAATAACTCAATCTACCAGTACGGACAAATATAATGTTGGGCCCTTTTAGCTTAACGGACGTCGTCCCATTCTGACCTTAACTAAGACTATAAGGTAGGGGGTCAGATACGACACGGTCAGTAGGTGGATATACCGTGACAAATACCGGCACCTATGCTAATTGCGATTTGGAATGGAACGCGCCGAATACTTCGGATCATATCACCGTCCCTGTACTCGAAAGTTCTGCCACGAACAAGTCTCCTACTTGTGTCTTTTCTCACTGCGAAG |
||||||
|
5 |
@ -0,0 +1,69 @@ |
|||||||
|
# Rosalind Chapter 2 |
||||||
|
|
||||||
|
This folder contains the `chapter2` module, which |
||||||
|
provides functions for each of the problems from |
||||||
|
Chapter 2 of Rosalind.info's Bionformatics Textbook |
||||||
|
track. |
||||||
|
|
||||||
|
## How to run |
||||||
|
|
||||||
|
* Each problem has its own function (example: `BA2a(...)`) |
||||||
|
|
||||||
|
* Each problem expects an input file |
||||||
|
(example input files in `for_real` directory, |
||||||
|
or provide the input file downloaded |
||||||
|
from Rosalind.info) |
||||||
|
|
||||||
|
* Pass the input file name to the function, like this: |
||||||
|
`BA2a("rosalind_ba2a.txt")` |
||||||
|
|
||||||
|
## Quick Start |
||||||
|
|
||||||
|
To use the functions in this package, start by installing it: |
||||||
|
|
||||||
|
``` |
||||||
|
go get github.com/charlesreid1/go-rosalind/chapter2 |
||||||
|
``` |
||||||
|
|
||||||
|
Once you have installed the `chapter2` package, |
||||||
|
you can import it, then call the function for whichever |
||||||
|
Rosalind.info problem you want to solve from Chapter 2: |
||||||
|
|
||||||
|
``` |
||||||
|
package main |
||||||
|
|
||||||
|
import ( |
||||||
|
rch1 "github.com/charlesreid1/go-rosalind/chapter2" |
||||||
|
) |
||||||
|
|
||||||
|
func main() { |
||||||
|
rch1.BA2a("rosalind_ba2a.txt") |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
## Examples |
||||||
|
|
||||||
|
See `chapter2_test.go` for examples. |
||||||
|
|
||||||
|
## Tests |
||||||
|
|
||||||
|
To run tests of all Chapter 2 problems, run |
||||||
|
`go test` from this directory: |
||||||
|
|
||||||
|
``` |
||||||
|
go test -v |
||||||
|
``` |
||||||
|
|
||||||
|
or, from the parent directory, the root of the |
||||||
|
go-rosalind repository: |
||||||
|
|
||||||
|
``` |
||||||
|
go test -v ./chapter2/... |
||||||
|
``` |
||||||
|
|
||||||
|
Note that this solves every problem in |
||||||
|
Chapter 2 and prints the solutions (so there |
||||||
|
is a lot of spew). It does not check the |
||||||
|
solutions (for that, see the tests in the |
||||||
|
`rosalind` library.) |
||||||
|
|
@ -0,0 +1,67 @@ |
|||||||
|
package rosalindchapter2 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Print problem description for Rosalind.info
|
||||||
|
// Problem BA2a: Implement Motif Enumeration
|
||||||
|
func BA2aDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA2a:", |
||||||
|
"Implement Motif Enumeration", |
||||||
|
"", |
||||||
|
"Given a collection of strings of DNA, find all motifs (kmers of length k and Hamming distance d from all DNA strings).", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba2a/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Run the problem
|
||||||
|
func BA2a(filename string) { |
||||||
|
|
||||||
|
BA2aDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
params := strings.Split(lines[0], " ") |
||||||
|
k, _ := strconv.Atoi(params[0]) |
||||||
|
d, _ := strconv.Atoi(params[1]) |
||||||
|
|
||||||
|
// 1 line in the input file is for
|
||||||
|
// parameters/gold standard.
|
||||||
|
// The rest of the lines are DNA strings.
|
||||||
|
|
||||||
|
// Make space for DNA strings
|
||||||
|
dna := make([]string, len(lines)-1) |
||||||
|
iLstart := 1 |
||||||
|
iLend := len(lines) |
||||||
|
// Two counters:
|
||||||
|
// one for the line index (iL),
|
||||||
|
// one for the array index (iA).
|
||||||
|
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 { |
||||||
|
dna[iA] = lines[iL] |
||||||
|
} |
||||||
|
|
||||||
|
results, _ := rosa.FindMotifs(dna, k, d) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(strings.Join(results, " ")) |
||||||
|
} |
@ -0,0 +1,61 @@ |
|||||||
|
package rosalindchapter2 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Print problem description for Rosalind.info
|
||||||
|
// Problem BA2b: Find a Median String
|
||||||
|
func BA2bDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA2b:", |
||||||
|
"Find a Median String", |
||||||
|
"", |
||||||
|
"Given a kmer length k and a set of strings of DNA, find the kmer(s) that minimize the L1 norm of the distance from it to all other DNA strings.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba2b/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Run the problem
|
||||||
|
func BA2b(filename string) { |
||||||
|
|
||||||
|
BA2bDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
k_str := lines[0] |
||||||
|
k, _ := strconv.Atoi(k_str) |
||||||
|
|
||||||
|
// Make space for DNA strings
|
||||||
|
dna := make([]string, len(lines)-1) |
||||||
|
iLstart := 1 |
||||||
|
iLend := len(lines) |
||||||
|
// Two counters:
|
||||||
|
// one for the line index (iL),
|
||||||
|
// one for the array index (iA).
|
||||||
|
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 { |
||||||
|
dna[iA] = lines[iL] |
||||||
|
} |
||||||
|
|
||||||
|
results, _ := rosa.MedianString(dna, k) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(results) |
||||||
|
} |
@ -0,0 +1,54 @@ |
|||||||
|
package rosalindchapter2 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Print problem description for Rosalind.info
|
||||||
|
// Problem BA2c: Find a Profile-most Probable k-mer in a String
|
||||||
|
func BA2cDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA2c:", |
||||||
|
"Find a Profile-most Probable k-mer in a String", |
||||||
|
"", |
||||||
|
"Given a profile matrix, find the most probable k-mer to generate the given DNA string.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba2c/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Run the problem
|
||||||
|
func BA2c(filename string) { |
||||||
|
|
||||||
|
BA2cDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
dna := lines[0] |
||||||
|
k_str := lines[1] |
||||||
|
k, _ := strconv.Atoi(k_str) |
||||||
|
|
||||||
|
// To make multidimensional slice,
|
||||||
|
// make a slice, then loop and make more slices
|
||||||
|
profile, _ := rosa.ReadMatrix32(lines[2:6], k) |
||||||
|
|
||||||
|
// Find the most probable kmer
|
||||||
|
result, _ := rosa.ProfileMostProbableKmers(dna, k, profile) |
||||||
|
fmt.Println(strings.Join(result, " ")) |
||||||
|
} |
@ -0,0 +1,67 @@ |
|||||||
|
package rosalindchapter2 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Print problem description for Rosalind.info
|
||||||
|
// Problem BA2d: Implement GreedyMotifSearch
|
||||||
|
func BA2dDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA2d:", |
||||||
|
"Implement GreedyMotifSearch", |
||||||
|
"", |
||||||
|
"Find a collection of motif strings using a greedy motif search. Return first-occurring profile-most probable kmer.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba2d/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Run the problem
|
||||||
|
func BA2d(filename string) { |
||||||
|
|
||||||
|
BA2dDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
//// Input file contents
|
||||||
|
params := strings.Split(lines[0], " ") |
||||||
|
k, _ := strconv.Atoi(params[0]) |
||||||
|
t, _ := strconv.Atoi(params[1]) |
||||||
|
|
||||||
|
// 1 line in the input file is for
|
||||||
|
// parameters.
|
||||||
|
// The rest of the lines are DNA strings.
|
||||||
|
|
||||||
|
// Make space for DNA strings
|
||||||
|
dna := make([]string, len(lines)-1) |
||||||
|
iLstart := 1 |
||||||
|
iLend := len(lines) |
||||||
|
// Two counters:
|
||||||
|
// one for the line index (iL),
|
||||||
|
// one for the array index (iA).
|
||||||
|
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 { |
||||||
|
dna[iA] = lines[iL] |
||||||
|
} |
||||||
|
|
||||||
|
result, _ := rosa.GreedyMotifSearchNoPseudocounts(dna, k, t) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(strings.Join(result, " ")) |
||||||
|
} |
@ -0,0 +1,67 @@ |
|||||||
|
package rosalindchapter2 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Print problem description for Rosalind.info
|
||||||
|
// Problem BA2e: Implement GreedyMotifSearch with Pseudocounts
|
||||||
|
func BA2eDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA2e:", |
||||||
|
"Implement GreedyMotifSearch with Pseudocounts", |
||||||
|
"", |
||||||
|
"Re-implement problem BA2d (greedy motif search) using pseudocounts, which avoid setting probabilities to an absolute value of zero.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba2e/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Run the problem
|
||||||
|
func BA2e(filename string) { |
||||||
|
|
||||||
|
BA2eDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
params := strings.Split(lines[0], " ") |
||||||
|
k, _ := strconv.Atoi(params[0]) |
||||||
|
t, _ := strconv.Atoi(params[1]) |
||||||
|
|
||||||
|
// 1 line in the input file is for
|
||||||
|
// parameters.
|
||||||
|
// The rest of the lines are DNA strings.
|
||||||
|
|
||||||
|
// Make space for DNA strings
|
||||||
|
dna := make([]string, len(lines)-1) |
||||||
|
iLstart := 1 |
||||||
|
iLend := len(lines) |
||||||
|
// Two counters:
|
||||||
|
// one for the line index (iL),
|
||||||
|
// one for the array index (iA).
|
||||||
|
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 { |
||||||
|
dna[iA] = lines[iL] |
||||||
|
} |
||||||
|
|
||||||
|
result, _ := rosa.GreedyMotifSearchPseudocounts(dna, k, t) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(strings.Join(result, " ")) |
||||||
|
} |
@ -0,0 +1,64 @@ |
|||||||
|
package rosalindchapter2 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Print problem description for Rosalind.info
|
||||||
|
// Problem BA2f: Implement RandomizedMotifSearch with Pseudocounts
|
||||||
|
func BA2fDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA2f:", |
||||||
|
"Implement RandomizedMotifSearch with Pseudocounts", |
||||||
|
"", |
||||||
|
"Re-implement problem BA2e (greedy motif search with pseudocounts) but use a random, instead of greedy, algorithm to pick motif kmers from each DNA string.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba2f/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Run the problem
|
||||||
|
func BA2f(filename string) { |
||||||
|
|
||||||
|
BA2fDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
params := strings.Split(lines[0], " ") |
||||||
|
k, _ := strconv.Atoi(params[0]) |
||||||
|
t, _ := strconv.Atoi(params[1]) |
||||||
|
|
||||||
|
// Make space for DNA strings
|
||||||
|
dna := make([]string, len(lines)-1) |
||||||
|
iLstart := 1 |
||||||
|
iLend := len(lines) |
||||||
|
// Two counters:
|
||||||
|
// one for the line index (iL),
|
||||||
|
// one for the array index (iA).
|
||||||
|
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 { |
||||||
|
dna[iA] = lines[iL] |
||||||
|
} |
||||||
|
|
||||||
|
n := 100 |
||||||
|
result, _ := rosa.ManyRandomMotifSearches(dna, k, t, n) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(strings.Join(result, "\n")) |
||||||
|
} |
@ -0,0 +1,65 @@ |
|||||||
|
package rosalindchapter2 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Print problem description for Rosalind.info
|
||||||
|
// Problem BA2g: Implement GibbsSampler
|
||||||
|
func BA2gDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA2g:", |
||||||
|
"Implement GibbsSampler", |
||||||
|
"", |
||||||
|
"Generate probabilities of each kmer in a DNA string using its profile. Use these to assemble a list of probabilities. GibbsSampler uses this random number generator to generate a random k-mer.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba2g/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Run the problem
|
||||||
|
func BA2g(filename string) { |
||||||
|
|
||||||
|
BA2gDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
params := strings.Split(lines[0], " ") |
||||||
|
k, _ := strconv.Atoi(params[0]) |
||||||
|
t, _ := strconv.Atoi(params[1]) |
||||||
|
|
||||||
|
// Make space for DNA strings
|
||||||
|
dna := make([]string, len(lines)-1) |
||||||
|
iLstart := 1 |
||||||
|
iLend := len(lines) |
||||||
|
// Two counters:
|
||||||
|
// one for the line index (iL),
|
||||||
|
// one for the array index (iA).
|
||||||
|
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 { |
||||||
|
dna[iA] = lines[iL] |
||||||
|
} |
||||||
|
|
||||||
|
n := 100 |
||||||
|
n_starts := 20 |
||||||
|
result, _ := rosa.ManyGibbsSamplers(dna, k, t, n, n_starts) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(strings.Join(result, "\n")) |
||||||
|
} |
@ -0,0 +1,13 @@ |
|||||||
|
package rosalindchapter2 |
||||||
|
|
||||||
|
import "testing" |
||||||
|
|
||||||
|
func TestChapter02(t *testing.T) { |
||||||
|
//BA2a("for_real/rosalind_ba2a.txt")
|
||||||
|
//BA2b("for_real/rosalind_ba2b.txt")
|
||||||
|
//BA2c("for_real/rosalind_ba2c.txt")
|
||||||
|
//BA2d("for_real/rosalind_ba2d.txt")
|
||||||
|
//BA2e("for_real/rosalind_ba2e.txt")
|
||||||
|
//BA2f("for_real/rosalind_ba2f.txt")
|
||||||
|
BA2g("for_real/rosalind_ba2g.txt") |
||||||
|
} |
@ -0,0 +1,11 @@ |
|||||||
|
5 1 |
||||||
|
GATTTGGGCCAAAGTCTGCGGCGAA |
||||||
|
GATGTGCGTCAACCAGTCGGAGTCC |
||||||
|
TCACACCGGCTCGGAGATTTTTTTT |
||||||
|
GATCTACAACGCGTGACTATATGCT |
||||||
|
TAAGTGATTTTGTGGCCTTTACTCG |
||||||
|
CCATCTACCCGATGTTCGACCGCGT |
||||||
|
GAGCGCGCTGCCTACATTTGGATCT |
||||||
|
TCCGGGTTAGGATGTTGAAACAAAA |
||||||
|
ATGGAGCCATGATATGTACACTTAG |
||||||
|
GCATGGATCTTACTCCGACGTTATC |
@ -0,0 +1,11 @@ |
|||||||
|
6 |
||||||
|
CCCTAGTCTACCTGTTTGGAGCGGGGCCTGAATTTGACTGGC |
||||||
|
GTCTTTACCGAGTTAGTCTGATGTAAGTACTGCTCCTCTACC |
||||||
|
CCGACATTGCGCTCTACTCTGCGCACATAACTAAACGTTGCA |
||||||
|
CCTCCGTCTACATAGAAGGAGTCTGCAACGCCCCCACTGAGG |
||||||
|
ATCTTGCTCGTATCTACCGATAAGTAGCGAAAATCTAGCGTT |
||||||
|
CGGGGTTACCTGGCAGTGTCTACTAGATCAGATTGCCCGGCT |
||||||
|
TTAGTAAATGAATCTACGTCTCTGAGCGCGCGAATCAGGGTG |
||||||
|
TGAGCACTCTGACTTAACTCTACTACTCTCCAATAAGCGCTC |
||||||
|
TCACGTTCTACACTAGGTAAGTATGCATATTTGCATGAGTCT |
||||||
|
TTTGAAGAAGGCTCTACAAATTTAAACCCAGACTCAGACACG |
@ -0,0 +1,6 @@ |
|||||||
|
GTCACAGCTGCATAACAAGTAAACTGAGAAATCCCCAGTTAGGCGGATTGACCATCGAACACACTTTCACTACTTGCGGATAAATCCTGTAGAACTAGACTTTATCTCGGCTGCGACAAGACAGGAGTTCATGCACCTGCTCTGTCCCTCGCAACAGTCTAGGGAGCAAGTAGGCGGCTTCTTAGCTAGTACCTGGGTAG |
||||||
|
7 |
||||||
|
0.393 0.286 0.286 0.25 0.179 0.321 0.107 |
||||||
|
0.071 0.357 0.25 0.286 0.214 0.393 0.357 |
||||||
|
0.214 0.214 0.143 0.286 0.25 0.143 0.25 |
||||||
|
0.321 0.143 0.321 0.179 0.357 0.143 0.286 |
@ -0,0 +1,26 @@ |
|||||||
|
12 25 |
||||||
|
AGATCCGGTTTTATTCAAGCGAATTAGTGGGAGTGCGAGCATGCGCCAGATTCGTCCGGGATTGTCGTTAGGACACTAAACAGAGTCAGGTGCAGTGAGGAACCGGTCCTCCTTGCTGTCCATCTTTGGCTATCAATCGCTTTGCGGGCGGCATGC |
||||||
|
CGAGCATCCCTTTAACATAATTGCCCGTGGGTGTATTGCGTTTTTCCAACGCATAAGAGCATCTTATGTGTTTATGCGTGGAAGCCTATCACTTTGCATAGCGTTTGGCGATCACCTCCATGCCGCAAGGCCTAAGGCACACGGTTAATTGGGTCA |
||||||
|
AACGAGGCGAACCCTGGAACAGGTACCATGCCTTTGCGATTCAGCTTCTATCCCCGTCTAATTAGACATCTCAGCGTTCCTCAAGCTAGCAGACTGCACAGGGCTTATCCCCGGATGGTCGCTACTTCTCTGTGCATATAGCACGTAATGCCACAT |
||||||
|
CTTCCCGTCGAAATGCTACATAGACTGAGCGATACATGCGGTGCAGTTAGTTTGTTGACCTTATCCCACACTACAACGGCCTGTTACATTGCGCGTGTCTTATGCAAATCGATCGCTTTGTAACCGTAATCCACCATTTCTGGAAAGCATTTCCAG |
||||||
|
ATTAAACATTCCAGCAACACGCGGGCGATCCTGAGGAATCACCGCAACTCACGTCTAGAGCCTGTCCGGCACTCGATTACTTTGTCTTTCGAACCCCGTTGGTTAGTGCACTCTGTCATATAGTGCTAGGCTGCCCTCTCAGACGCGCTCAGTCGT |
||||||
|
TCGGTGTGTACACCTGGTAGAGGAGGAACCAATTAAACTTCGTGAACCCAAGGCGGCCCCCCATTCAGTTCGACTGGGACTCCGGCGCTTTTATGCGCGCGTAGAGGCAGTGACAAGGCTTCCGGTTAAGTCTTCTTTACTGACGCCATGCCTTTG |
||||||
|
CGTATCTCTGTTTAGGCTCCCACCCCGATACCTTTGTTTCTCATATGAGCGCTTGTCTCGCCGCCAGATATCTGACTGGTCCGGTGATCAATGCTTAGGCGTTCAGGTTTACTACTGTCGCGACAAGACGGTCATACGCGCCAAAGGCTTCACAGC |
||||||
|
AAGCGAAGTCCTTTGAATACTAAAACTCACCACTGGGCCGTCCCGACTATAAGTTGTCGCGAACAGAGTTTCTGTTACTTACCTCACTATCTTGCATCCATTCCTTTGGGTATTTGGGTTGTACACGCTATACGATCATGATTAGTCTCTATGCCT |
||||||
|
TAACAACGATGCGGTTCCGTAATCGTAGTGAGAAAACCGGGTAGGAAGTAAGTGTGCATGAACGTTAGGCGCGTCTTGAAGCCAGATGGGTAGCTGGCTAATGTTTCTGCCATAGGACTGGATCACTTGTGCCCAACAGGAACAGCAATTCCTTTG |
||||||
|
GCGATGACTTTGACGGCAGATCCGACCTCGGCTTAGTATGGTGGATGAACCTCCAAGTCACCGGGTCCTAGCATTATTTCGAATGGCCGAGGAGGCCATCATTAGGTAACGCCCAGAGTACATCCCCCGAACACCGAAGGTCGTTCGCGTCCGGCA |
||||||
|
CCTAACGTACCATTTTTGACTGGAAGCCAAAGTTGACCGGCTTTTATAGCTTTTGACGGTCTCCTGTACTCAAGTAGATTTTTGTTAACAAACCTGGCATTGTCGTCATACAGTCAGGGAAGATACTTCCCTAGCTGCACCCACCCAATAGCTTTG |
||||||
|
TGCTCTGACCAGACGATGGCTTTGCTGGAGGTTGAAGGCCATTTTTTTGTTCTAGTGCCCGACAGCTTCATGAGGGCGGTCGACTCTGAGGCTTGAGCAAAACCTAATATAAATGCTGAAGCTTAGCGCACGGCACGGAAATTGGGGGGAACTACT |
||||||
|
CGGAAGCGTTTATGACGGCGACAGGAGTAACCATGAAGAGGAACAGGCGCGACGATGGAACCGCCTTACTACGTTCCGTCACGCCACCCGAGTGGAGTCGGTACCGTTAAGCTGACGGCGCGCTATTCTCTCCTGATTAGGTTACCTATGCCTTTG |
||||||
|
GATGTAGCCATATAAATCATTCATCGTTATTGTGGGCTCTTGTCTACCGTATACACACACCCAATCCCTTTGGGCATTATTCGACTATCCCCTACCTCGCCTACTGCTGATACCACGTTTTAGGCTCCGTTTCATATATATCCCCCTAAACAAGGG |
||||||
|
GATGGAGCGTTGGCGAACCGCTGAGCGAGCTATGAACAGCCTGTGAGACGCGGGGTAGGAGCCATCACTTTGGATCGTTCCCAGTCTTTCTATTATCAGTATCGATATGCGGCAACCAGTTTTCTTGCGCTCTGAACCATCCTATAGTAGAACTTC |
||||||
|
TCCTATACGTAGCCTCGTCCGGCCTGACGTGTCCGGATTCATTTAGAGGCCATTACTTTGCTGTCAGTCGCTGCACTCATGTCGATTGTCGTGGTTGATTTAAAGACCCGCATAGCACAGTACCCTAACCCCAACTTCTCTCTGTTTAGACAGTGC |
||||||
|
GAGCTTTGTATGGAGATTGCGCTTCCGATTGCTTTGAACATCGGACGCGCTTATAGAGACACTCGTGCTGGCAGACCGGTGCGCGATAAACGAATCTCGGCGTGCATTGGTGTTTGGGCTTCCGATGTCAAAGACCGCAGAACTGCGCCGGGGAAT |
||||||
|
CGATCTTCAAAGGCTGGCTTGCATTAGGAGGACTGTGAAGAACACGCTTCTCTTATGACTGCACGGCGGTTGACTACGTCGCTTTGGGGCCACCCTTTCATTGCATGAACAATACCTTTGGTCTTTGACTGATCTTGAGGAGTCCACCGGATCACT |
||||||
|
ACATTTCAAACACACTGTATGGGTTACCCTAATTCGCTGCGCATGCGCTGGGCCTCGAGCGAAGAATGTACGTGCTTTAGCTACTGTCAGTCTATCCAACGAAACTACGGCTTACGTGGTTACAGACCCCATGCTGGTTGGGAATCGATTTCTTTG |
||||||
|
TATAAAGAAGTAGGTCCGTCAGATTCGAGGAATCCTCGATGTCCCTGGTACATGCAAAAGTTCAGAGCCGTAGAACTACTGTAGGCGATTGCTTTGCGCAAAAGGGATCAGTCGCCGTCGTAACTCAAATTTAGTCTTTTCACCAACGTGCAGGGA |
||||||
|
TTTGAGTCATTATTAACGGTGTACGGAGTGACGCCCCCAATGCCTTTGTCCGGCTTGTACCGGATTATCCGCTTGAGTAACTTATTCTTATCTGAGATGTCGGTGGATATTGCCACTTAATCGAAACGATCGTACCTCGCCCGAGTCCTAGCAGCG |
||||||
|
CGCACGTGAATGTAGGAGCCAATCCGGCCTCTTTAGTGCTCCAATCACTAAGGGTAGATTTGTCGCACCACCCGTATGTGATCCCTCAAAGCGAAATCATCTACACTCTCCATAGCTTTGAAATCCAATAGTACAACCTCGGCCGGGTAATCACCA |
||||||
|
ACCATATCTTTGCGGACTTCCGAAAAGATCGAAAAAATAGCTTACTGACCCCCAACCTTGAGGTAAGAGCGGTCCCTCGGTCAGGCGGAACTTCCAGTGTCCGATTAGATCAGGCCGCATAGTGTGGGACTCCGATCAAGTGTATAATATGCAGGT |
||||||
|
GCGGGGGGAGTTTGCTAGGACAGTCGGGCGGTAGTTTGTGTCTTAAGTAACTGCTCGAAGGCTAGAATGTGGGATCATAGCTTCAGCGGATTCCTAGCGATGGCTTTGAAACATGGACGAGTTACTTTTGGCGTTTTTGAGAGTTTATAAGGTGAG |
||||||
|
CCAAACATGGTGGTCACTATTAATTGTCCTCCGCGTACCGAGATACGAGGGGAGTCCTCCCACAATTCGTCGCCGATTTCTTTGAGTCAGGGTATCATAGGGAGTGCTATTCCATAGCGATAACTGCTCCACAGAAGTTCATTAAGTATTTTTTCT |
@ -0,0 +1,26 @@ |
|||||||
|
12 25 |
||||||
|
GATGTGCGTCACAATCCCGCCCTCCAGCTGAACTAGCCAATACTTCCTCTTTCTGCTTCATGATTCACCCAATAGACACTAGGGCTTATACGGGGTGTGTACTTCCCACTGTGGGGCGAGCTGAGTCCATAGTCATGGGCCCGCCTCATCTAAACC |
||||||
|
TCATAGCCGCCGCGCTGGTGGGTGCAAGTCTGCCGACTCCCACCTTATATGTAGCAGGTCACGTAGATAGGAGTGTGTTATATCCTCCGATAATCCCTAAATATAGGATGATACTGGTTCTGCCAGACTCTGTCTGTCTTGAAGTCGCTAGATGAA |
||||||
|
GCGTATAGAAGGAATCCGACTTGGACGCCATTCAGATCAACTAACATAAACAGCAGTAAGCACTTAGCTAGTTGATACCGACGCATACAGAGCGGTCGCGAAAGTCGAGACCGTTCCTTGGTTCAGCAATTCGTGGCTGCCGTCCTCTGGTAAGCC |
||||||
|
ATTGTATGGAACGGTGATGTGCTACTCGCTTAAACCTATAAGCGCACATTTGCAGTCCATGTGATACCTTCCAAGATGTTATTGGCGTGGAAATTAATAAGACGAGACTATTCGTCCTCTGGGTTAAGCTGGTATTTAGAAAGATTATCCAGCAAT |
||||||
|
CGGGCCGCAATGGGCGCCATTACGTTTTTGGTTTAATCATTGCTGGATCTCCCAACCCACTCGCCTGCGGGACGTTCGCTCAATCTCCGCCCACGTCTGTGCTCTATCTCAACGATAGTCCTCGACTATAGCCTCATGTAAACCCGGGAGACCTTT |
||||||
|
ACACAGCGCCTCGCAGGGGGTGTGAGGACCGACGTTGTAAAGGCCATACCGACTCTCAAGGCATTTTGGGAACCCCCACCTGTCTGTGCCATTGAACTCTACTAACCCTAATTCACCTATTGCCGAGCCTTCGGATTTGACGAATACGCATTGCGG |
||||||
|
AGCTAGCCGCTTGCCAAACCGCTCTCAGAAGGAACGAGCCTAATTCTCCTCACGTAATCCGGCCAGTTCATAGGTGTGCAAAAACTTAACACACGTTCGGGTGGGGTTAGCAGCGCAGAATTGAACCCCTTCACGCCGACAGGGTGGCTAACTACA |
||||||
|
ATTCATGATTGCTGAGTCATTGAAACACACCAGAAACGTCAGCGGCTAGACTATGTACCAGCGGAAGCTGGGATTCCTTTAGAAGCTGTTCCCATACTTGGTGGGTGATCCTATGATACTCTCGGTTAATCCGTGCGAATTTAGCTGTCCCTGAGT |
||||||
|
TGGTTGTGATATTAACCAAGTGCGCTCCCCTAAACCTCGAACCTGGGCCATTTAGATGACTTAACGCCGCCTAGCAGCGCCGCCGTGGTCTTACAACTAGATAGAACTGCGAGGCATTCTGCGGGGCGTAAGTGTCGTCACAGCGATCATGGATTC |
||||||
|
AACTTTCATGTCCACGCGCAACCTTCGGTTTCGTCCCTCTGCTAAACCAATTGGTCATTTCTATTGCTGGACCCACCCGCAGGCGGTCGAAAAGAAATACGCTGCCCCGAATACAGCTCCGATTCCTCTTGCCTCCCAGACACAGATGGTAACTTA |
||||||
|
GGTCCGCGTTGTAGTACACGTAAAGCCTTATCGGGGGTCCTAAATCTAGTACCTGGCAGACTCACATAAACCGCTCTCCCCGCCTGGAATCTAATGTCTGTAACTGATCCTTTTCCTTCATATCAATACCTCTGTATCAGTGTCCAAAGGTCAGCT |
||||||
|
AGATGAGGTGCAGTGGGGGGAGTGATGCTAAGTGGTCCCACTGAACTCATTGAACCCATACTCGGTGGGACTAACATCTACAGCCGCACGTCAGAACTACAAACAAGATCGGCACGAACGTTCGCGACTATTTACATCTTAGCGCTCTACTAATCC |
||||||
|
CGCGTTGTTGCCCCCGGATTACTACTCATAGTCTGGACAACTGAGACGATAGCTCATCATCGCGGTAACATGCGTGATTACGGGGAAAATTATGGGGTGACCCAGATCCGCTTCGACGCGCTCACCTAAGCCAACACGCGGGTATCGGTCTAGTAT |
||||||
|
TCTGACGCTTTCCCCACGAGAAGATCATCCCGGTTGTTACGATCCTTCCTCCAATAAACCTGCGCCCCATACTGAGCGGTACCCTTTCAAAGGTAGAGTGCTACCATGCGATGTATCTGAATACCATAAACGTGTGAGTAGGATTGTGGGGGTACA |
||||||
|
GACCGAAAATCGATGGCCAGACCCATCGAAGTCCGCCCCAAGTTGGCTATGAGTTTAATGTGCGCGTTGCTGCTATGAGGATGTCAGGCTGGGCCGAGGGAATGAGAAAGTTTCTGACCTCGAATGAGAGTGAGTCCTCCCGCACTCCGCTAAGCC |
||||||
|
TTAAGGTCTCTGACCGTTGTAAAAATTGTGCCCCCCCATCTTGTAATCGCTAACGAAAGGAAATCTTCTTGACAACCACAACGAGTAACTGCCGACTCGGTGGGTAGCCAGCGGTTGACGAACTGGACAATGCTCTAGTAAACCCCGGTCGACAGC |
||||||
|
TAAGAAACCTTGTCGACTGGGAAACTCGCGTAAACCTTTGCGGAGGCTCCTTATCGTTACCTGACTTCCAGAGGGACTACCGTCGAGTCAAGGGGTCAGTTAGGTAAGCACACGATGCATGCAACCCTTGCTGACTTCTTCAATTTCCGTGACCGT |
||||||
|
GACCATCAAGCCCGTAAATCGATGGTATCCTATGGTATCGTCACCACAGCCTCGTCACTTAAACCAGGCGTAAACCGAGTGAAACGACTCGGCTCGCAGATAGAACCCGTGTAGAAAACCATGTGAAACAAATGAATCACTTTACTCGGGTAAGCC |
||||||
|
TCAGTGTTAGCCTCGGGTCAGAACGGTCTATTGAAGTTAATGACACTCGGGTAGTCGGCGCTCAGCTAATCCGCCGCTAAGCCACGAGCAGGCGACAAAGGAGAATCTGGCGACGGGTAGCACGATAGTTGGGGAGGCCTCGCCTAGGTTAGACAG |
||||||
|
CTCGACTAATCCGAATTACACTGCACCTCCGAGTGGGTCTGGTTACTGCCGGTAGGGAGGCCAAATAGCTTGCCCAACTCGATAAGTCCTATGACGATCGGGCTGTCATAACAAGATTAATAGGGATGTCAAACCGTAGGGTGCACAGACAATTCC |
||||||
|
ATCCAATTCTCGATGGGATTACACCCTAACCAGACTGGGGATCCATAAGTCTTTTTGCCTCTCGCGTAATCCGCGCATGACTGTATACACTTCCCCAACGGGGGGTCAGTGTTTTCTATTCCCGCAGCACGCCCCGTCTAGCCGACCCGAAGGTGC |
||||||
|
GATGTCAGATTACACCATTTCTGGCGCGTTTTGAAAGATCGGACCTTCATATGGGTTCCTGCTCAGCGTGGACCAATGAGAATGGAGAGCCATGAATTAGCACTACGACTCTCCTAAACCATGATTCTGATTTTCTGATCTTCCCATCAGCCGTAC |
||||||
|
GCTAATCCTCCTGAGTTACGAGCATCCATGCGATAAACAAGACCGATTCACATCCAAATTGGCCGTCTCTGTGATGCTGGGCCGGTGAAGTTGACTTCGTAGAGTTTATCTCCAGCCTGCAACCTGAAGGATCTCGACTAACCCTTAAGCGAGCTG |
||||||
|
CCGCTCGAAGATCCCCTCTTGCAGCACGGTGCAGGTTCGGCAGGCTGAAGTCTACACCGCTTTGGTGACAAAGCGAATGACTCACTTAGGCCCGCGCATAGGGCAACCGTACATCACCGACAGAGTGTACAGCTCGGCTAAACCAGACATACGCTT |
||||||
|
GGGGAGCGGTGTCGAAAGAGAAGGCATCTCTGAAGGAGTTAAAAACCACGATTTGAAAGTCCTCTGTATATGCTCGCGTAAGCCTTGCTTTTCCCACTGAGGCTACACAGGCGAGTCCAGCTAATGACGGCGTTCTCATCTCAATGTTGGCGACTG |
@ -0,0 +1,21 @@ |
|||||||
|
15 20 |
||||||
|
CCCCGAGTAATTCCCAGATATAACGTACTCAAATGTTGAAAACAAGTGACCACTGTATCCACGAGGGGTGTAACTCTTATCAATGGCATAAGGGCCACGAGGACTCTACCATAAGAGCACAGCGCCAGCTGACGAATGAGTATCTCTGACCAACCGATTGCGATCTGTTGTTGGCAGATAGGCCCGCAGGACCCCGAGTAATTCCC |
||||||
|
AGATATAACGTACTCAAATGTTGAAAACAAGTGACCACTGTATCCACGAGGGGTGTAACTCTTATCAATGGCATAAGGGCCACGAGGACTCTACCATAAGAGCACAGCGCCAGCTGACGAATGAGAGATTTGACTACTAGTATCTCTGACCAACCGATTGCGATCTGTTGTTGGCAGATAGGCCCGCAGGACCCCGAGTAATTCCC |
||||||
|
ATAGTGCGTACACCACAAGTGAGATATGATACTTCGACCCAGAGGTAAAGATAAGATCTAGTATTAACCCCGGAGCGAAGGGAGAATGGTACGATCTTGAACAGACTACTCATCGCCGATATGAGTCGAAGATAATGCTGTCATCAAAAGTGGCTTTGTTGAGGTTAACACTGTAGACTGGATGCAAGGCCGATGAATTATAAGTC |
||||||
|
CGGGCTCGGAGAACAGACTAGGGGTACGAAAAGGTTCCGAAATTAGCACGCGCGCGTATAAAAAGATCGACGACCATGCCCGAGTTAGCTCACAGGAACAACTTTGGATAGTTAGATCCCAGCTGACAGTTCGAACTACGCAATCAGGGCTCCTCTGGATTCATACTCTAAGCATGAGAAGGCACAGAGCAAACAGCTACTTGGAT |
||||||
|
CTTCAGTTAATGATGCCTCAGAGGTCGGCGTTGAACCGCGTAACAGACTACTATCTTTATGCGCAGTACAGTTGTAATATGACTAAGGCGCCCGCGAACCGTTCCAACGTGCCGAGAAGGTTGGCCTACAAGGAAGAAGCCGGTCATTCAGTCTTCAAGGCCAACGGTCCTGCACAGATGATTACGCACCGATCAGTATAATGTGT |
||||||
|
CCATTGGGTGAGTTGATTCCATGATTCGTAGAAGCCACTACTAGGTGAGCTAGGCTCCTCTACAGTATAGAGAAGAGCCTTTAAGCCTATCCTGGAGCCTCTCACCCCACAATCGTAAGAACTTGGGTGCGTGAATGACTGAAGTATACATCACCTTAACTCATATTGTTGATCCGCTGTTGTCTGATTGGTAGGCTTGGTAGCGC |
||||||
|
CGAGCGCTTTTTGCACACCGAACGTGTCAGTTCCACATGAGCGTGACAGAGTGCCCGCGCATGGGGTAATCCCGTATCAGAACAGTAAACTAGGTCATGTCCTCCATCGTCTTAGAAGGGGACAACCCCGCAGGGTATGCTAAGAAGTGGAGTAGAGAGTGTTGTGCTGAACACGCGTATCCGGCGGTTTCAAAGTCCAAGGTTGA |
||||||
|
TGTCGTCCCTCTTCTTTTCACTCACATGTATGCCGCTAATACAGACCAACTAAAGAAGAACCAGCTACTAGTGCCATACCTCAAAAGCTAGAACTGTAATAACACACCGCTCGTTGTGGGCCGATTGTATATTAGTAAAGCAGCCAATATTTGTAACACGGCCGATGACCGTGCAAGTTTCCCTTGGAGGCAATGGCATCAAATTC |
||||||
|
TGAGGTGATTGTTTATCCAGATTGGCTGTTTGTCTGAGAAGCTTGTAGCGAGATTCGACTACTAGCTATAGCGACTCAAAATGCTGCGCATTCCCAACTAAAGTAAAACGCAAGCTTAGATGCAGAATTGAGATCACTTGTCTGGATTCATTTTTAATGTGGCGCTACAGGTGCATGTCATGCCCGGTAGGTTGAGAGGCTCTCAA |
||||||
|
ACGTGGAGCGATCCTACCGGTGTATGTGTACCATCCTGGCTGAAAGGCAGTCGACTGGCCACCGTTCGGGCGGCTGCGTTAGAGCTGACTACTCGCATAGGTCTAGAGCGATGACCCCTTGTTTGTCAGCGTATATCCTGGGTAATCGTTGTACCGCCTTTCAGGCCGATCCTAGGCAAGACTACTAGAACTAGGAGAACATTGCG |
||||||
|
TAGACCGCCACCCAGGGTGCTTCCTTAGATCAATCCCGCGTGTAATTAAGCGTAGGGGAGACGCTCCTGAGAACAGACTCTCAGCTAAGCGTAGGAGGAGCATTTTTTTTGGATGAGGCCTCCTCTGTGGATACAAACGGCTCGGTCAACCAGCCACACGAATCAAGTGGAGGATATGTTGTAGTCCGCTCATTCCGAACTTTTAC |
||||||
|
GATATTTACGCAGTCGGACACCCCTCTTCCTTAATGCTTCGTTAAAATATCGGTCCGTGCGAGTTCTGGTGGCCCGTAGGCCTCGCTACTTAGAGGTGGGCCCCCAAGGGCACGCATAATCGGCGCCTACCGCGGTAGATCAAAGTGAACGAAGATCTTGTAAAGATTCTATGCGGAGGCAGAACATCTTACTAGGTAAGAGATTC |
||||||
|
GCTAAAGCTATCCAAGGATACCGGGCGGTGCTAAACTATTACAGTATAACATCAATCAATCACCGCTTCGCCTTCCTCGTAAGTCATACTGCATATGGGCTTCCACTATAGAGTAATCCCGTGAGTGGACGAGAAAGCACTACTAGACAACACGGACACCGTTACAGTCATAGTTGCGGCAGTGCTCATAAGTCCTTCACACAGGG |
||||||
|
TCCCGCTATATCTGATCTTTGTGACTGCTGGGGTATAACTCAGCTGGCACAGAACAGACCCGTAGTCCAGAAAGACTTGAGTATAATGACGCCATGCTCGACCGGAGGTCAAGTACGGGAGAGGCTTACGCAGAATCCCCCAAGAGGCTCGTTAACTGACCGCAATCGATTTCAATTCCCTCCGAGCTCACCGAGCGCTGGTATAG |
||||||
|
TTCCTGACTGCTTGAGCCAGCGCTATATTGCGCGCAAACCAGTCGCGTGACGTACCGTCATACCGAGTGATGTAAAGTTGATTATTGGGATCAGCTAATTCCTCGCGGTGTTAGTTCATCACTTTTGAGTCCGACAGACTACTAGTTCACTACTTCGAAGTTTGCTCTTACAAGCGAGGAACGGCCTGCCGAGTATACAGGGGCGT |
||||||
|
CAGGCCTCGCGATTAACCTTATGCGGCCTCTCAGAGCCCCGTCTAGCGAAGGTAATATGAGAACAGACTACAGAGACTACGCTCTGCCCCCTAAGGACTGGGAATTTATGGCCCTATATCGCCCTTATTCGCCATAACTTCTAAGATTTGCTATTACCATTCTGAGGCAGTTTAACTAAATGGCTATCCAACCTATGAGGACTGAA |
||||||
|
GATCCAGAACAACTTACTAGTCCGTAGGGCTGGACCTCTTCATGCCCGGGCGTCGGGCACATACGCGTATCAAAATGGGAGATGGCATTATCTACTTCTCCTGTGATTTTGAACGTAGTCACCCCACATCATACTTTTCACACCTCGTACTGGTGATTCCATTCTACCCAACGATACGTAATTGACCCCGCTTTTGATTGGAATTT |
||||||
|
CAAGAGTCGTACGAGCCCTCCGTCATCAATGCTTGCGATTAGAGTTCACGGTAGAATCCACCAGAGCAGAAGAGAACGCCAGTAGCGACCAGAAAGCCTTTAGAAAAGGCAGACTACTAGAATGTTGTGTGTAAGTGTACCAAACATTGATTCGGACGGTTGTCGTGTTCGAACCAGGTGATTTGGTGAGGTTTCAGCGCCTAGTG |
||||||
|
TTGTGATGATCTTCGTAAACATCCGGTGGGAGCCCCCCTCTCCTCGATGACTGTTTGACTATGATCCATTTACCTTGACTCGCAGGACGACAAGCCATTATTCATGCCGTGTGATAAGAACGCTCTACTAGGTTCTGTCCATGCCCAGCACAGATCAAGGGACCCGGCGGGCCCGGGTCAGAACTTTGGTCACCATTCGCAAATCA |
||||||
|
AGAGGCACCTGGCGCCAAAGGCATTTAATGAACATGGCGAACTGCCAGACGAGCATGGTAGAACAGAAGCCTAGGACCATCCCGACATAACAACCACTATTTATAATTGAACTATCTTGGCACACACGCTATTGGCGTTGCACTGAGACCGTTCATCGCCTTCACTGTGACCATTCGCCTATAGACATATAACTAACTTGGCTTCA |
@ -0,0 +1,21 @@ |
|||||||
|
15 20 2000 |
||||||
|
AGTGGTTTAATCGGACGAGGCGTGTCCCTCAGCCCGATAACCATCCCGTCCTGTGTGCGACCGTTGAGCATCGTATTAGTTCCGTAGGATTTTGCGGTCGTCTATTTGATATAAAGTCAGGTATATATGGCCACAAGTTCGCGTGGACCGTTAGCGCACCAACACTGTAATATAACTGCCTTAAGGTAGCGACTCGCCAAGCGCAGGGGCAGCCCTGACAGTTTCCACGAAACTCAAGAGAGTATGTAGCGACAGTCCTTCGCAAGACAATCGTACGTGTCTACCGAAACTTAATTTCGTTAGTGGTTTAATCGGA |
||||||
|
CGAGGCGTGTCCCTCAGCCCGATAACCATCCCGTCCTGTGTGCGACCGTTGAGCATCGTATTAGTTCCGTAGGATTTTGCGGTCGTCTATTTGATATAAAGTCAGGTATATATGGCCACAAGTTCGCGTGGACCGTTAGCGCACCAACACTGTAATATAACTGCCTTAAGGTAGCGACTCGCCAAGCGCAGGGGCATCGAAGAGTGTGCATGCCCTGACAGTTTCCACGAAACTCAAGAGAGTATGTAGCGACAGTCCTTCGCAAGACAATCGTACGTGTCTACCGAAACTTAATTTCGTTAGTGGTTTAATCGGA |
||||||
|
TCGCTAAGTGGTGATACCGGCTGATAAGAAAGTAAGATTTCAGCATGACCCTGTTGATTCCACCCCTTCCTTTCATGGTGAGGCTTGTCTTTGCGGCGCCTCACGGTACCTGTGGACTGTACACACGAAGCACAACTTCCGAACTATTCGTTTGTAGACTATAAATCACCATGCTCATCAAGCTCAAAATTTCTCCTTACACCGACCGCGGTGGGAAAAAACGCAACGAAGCTCCAATTATCTCCAGTCTCTGCACGTGTAGAGATGGTGGAAAGCTAAGAGATGCCTTCGCCACATTAAGTCCCGCACAACGTTA |
||||||
|
ATTGGCAAAACCGATAGGATCCCGCGACTATGACGTCGCTTTTCGCTAAGTGTGGGCTGACCCTCCTACAAATAAGTCTCGTTTTAACCCTGGCCATTGCTTACAACCGCCGAAGTCGCGCTTCAATCAAAGGTGCAGGGTTATAATAGACATACAATTAGGATGTTTGACCGACATGCCTTGTTAACTTTAATTGACGTTACAGATTGATTATGCGATCTCTTTATGTTCTCAATTTAATATACCTCCGCTGGTTCCTATTGGGAGCCTTCAACACATAATAAATCCTTGTACCTCTGATTGAGTCTCTTTGCCT |
||||||
|
ATGTTCCTTAAGTAATTAATAGTACGTACACCGGTATTCGCTAGCCGTGCATCTTGACCCCCCCAAGGCGAACAGTTTGGATTTGCGAAGTCCCACGAAGGGGGCTTAAGGCTTGAGCCACATCCAGTTATGAAAGTATATCATCGGCACCCAGGAGGCTAGACAGGAGGTCAGAAAATTCCGCATTAGCGTCGTTGCGCAAGGCCGTCGCCGCCCGTGCTTCCAGGATTAGATCGCCTGCCAGACAGTCCGACTCCGTTGACAATAGAAGACAAGCTTATGCCCCGATTCACTCACCACCCAGACAGGCCCGCAT |
||||||
|
TGCTCGGACTGATATCCGCGTATGCGTACGTAATGTCTAGCAGGCGGTCGAGCCATAGGCTTCAATAGGGGTGTTGCGACTAAGCGATTGGCACAGGAAGCATTGGAATTAACACCGCAGTCATCTAAGTGTGCATACGGGCATGTGGAGATTTTTCTACGAATGATGCGTCAACGACCCATGGAATGAGTTTTTAGTTGTTACCCATTTTTATAATAACGTGCGCGGTTTATCTTATCCCTTATAATGATCTCTAACATAGGCGTACCTGAAAAGAATGCATTAAGCCGCAAAGGAGCCCAATTCTCAGCCGTCG |
||||||
|
AATCTAAGTTACTTCATGGTTCACGGTGCCACATCGACTGATCATCCATGCCTAGTGCTGTACTTAACCCATCATATTTCCTAAGTGCTTCGAACCCTTCGATCGGGGTGGTCATCTGTCCGTGACAAGGCTGCTAATAACCCACGCCGGTATAACACTGATTACGTTATACGCCTTATTCGGCAGTGACTGGCGTGCCACGTGCCAGGTAAAAAGAAATCTGGAACAGGGCTCCTCTTTCATAAGTGTGCATTTAAATAAGGGGAATAGTAAGGTCTCATTTGTAGTGCACGTGCCTTTCAATTATAGGCCCATA |
||||||
|
CGCCATCCACGTTTTAGTAATGTACCCAGGCCAACTAACACATAGCAACCGTCAGTTTTCACAGTTGTCATCTTGCCGCCCGAATAAGCCCCGCTGACCAACGTCTGAGGACGTTCTCCGCGGAGATGAGGGTATAGCGTCGTCGTACCTGCATTACCGAACAACTCTCCATCTTTAGGGAATTACCCATCTTAGCTATAGGACACAAGAGTCGACAGTAATTGTGGACTGGCTTTTGCGGTCTCGGTTCAATCAAGGAAAACCCTCTTGCACTACAATCGCAGCGTGTGCATTCAGAGCCCTTATCATACCTCGA |
||||||
|
AGGCGAGGTGAGGTCATGACCTGTCTAACCCCTTAGCGCCGGTGTAAATTCAATGCACGTAACGCTAGAGGCCTTAGGCCTCGATTCATCCTTGTGATCCATGTAATCGAACGACGCCTATCTAGTTCCGGAGCTTCGAACGAGGCCGATTAAAAACCCGTTGGCCGGTTGATCTGTGCTGCTCAAGATGTAACATCCCGAACTCAGCGCATCACGCCCCGCCAGAGCCTTTGGGAGCAGGGCCAGCGCTCGCTGGTTGTGCATGCGCTGCAATTCAAACACCTGTGGCACGAGTGCCCCAAAGGACCATCATCGA |
||||||
|
TTGCAATACAGTGCCCCTTGTGCTGTTTGCTAGGCGAATACAGGCGACCGACACAAAGCCCGGCCCTATATCAGTACGAGGCAACATACTGCTCGCTAAACTAGCGTATAAATTTGGACACCATAATGCGCAATGCACGCGGTATAGGTGGTCTTGTGGTAAGAGGGATTTCTAAATATCGTATTCCCCAACGCAGGTATACGAAGCCCATGGTAATTTAAGCGTTTAAACAGCTAGAACTCGCTCGCTCTTTGTGCATGTACTAGGTCCTTGTGTAGAAGAGGCGCAATGCCTTGCTATAGACCTTTGTCCCGTC |
||||||
|
GTGCGAAAATGCATAATTATAACTTTTCGCCTCGGGCGCGTCCACGGTATTACGAAGTCGAAGCGCGCATCCACTGACAATTCACAGACAGCAAAAATTGTTGCATTTAATCACGGGGACCCTATGGTGCAGTTGCGAGACCACATATGACCGGCTTTGTAGCACCGGCCCCAGTTTAATTCCCCTGACTAATGGTAGATACGACGACCGCCCCCCACAGACTCACTCACCTCCAGCACCAGCTCAACGGTGACCCCTTTCTGTTCTAATGCGGTATTCGCTAAGTGTCACTAAGGTTAGTGCGGCTTTTGCTGCA |
||||||
|
GTAGTCCGTGGCATATGGAAGGGGAGCTTTACTCCCTGATCGGTGAGTGTGGAGACGTTTAGCGTACTGGTCACGGCAAGAGACGTTGTGAGTGTTGTATATGTTTCTTAGGAAAGCGGACGGCGTTACGCATGAGTAAGACGGCCTAAAGAATGAACCATGATTGATAATCTATTAATTGTTAGGTAAGGATAAGCAAAAAGGTGCTGCTGGGTCTTCAAGTAAGGGATATTCCCTCGCAGTGTGTGCATGACTCATATGGTTAACCCGTCTAAGCAAAAATCTCGACAGGAAGGTGAGTCGGCGCCATATGAGG |
||||||
|
GGCGACAGCTGGTAACACGGCTATCGGGGCCGAATTGCCGACACTTGGCGCATCGCTGGAAGTGCTTCAGATAGTTATGACGGTGAAACACGCTCCGGCACAGCCTATAGTATGTTCGTAGCGTACAAAAGCTAGGCAGCCGTAATATCAGCGCTTAATGCTTTAATTGGCATGTGTCCTATTTGTGACGTTGATGTCGATGATAATCCGCACAGAACAACTCATGCATATTCGACGAGTGTGCATGATAGACACTGCGTTGTTGCCATGTATCTCCTGAAGCACGCACACAACGAAGGTCGCGTGCTTTTTCCGG |
||||||
|
ATACTACGATCAAGCGAAAAGGGGACAATCGTTGGCAGGGTCACTAGGGCAGGGTCTTAGAGAATCAGTGCAACGTTTTAATTCGCTCACCTGTGCCGCTAAGTGTGCGATGTGTAATCTCCAATGGGAAATGAATCCTTCGGCTCGAGTAATGATTGCGTATGGTATTGGCCCAACGTAGGACTCAAGTCCCTGGTCGTAGCGCGATCTGTAATGTAAACTTACCATACGAGCAGGCTACGTTGAGGAGCGCTCGCGGAACGGTATAGAAAACGGTACGTCATATTGGCCCTTGTGACTCCTCCTCCGCTTGGAT |
||||||
|
TTTTATTACATCTGTTCGCTAATCCTGCATGGTAAACAGTTACAGGAATTGCGATACTCCACTGGGCCACCACTACTTCACTAACTGGTAAAATGCCGGTCAGCTCATAGGTCCAAATAACGTTTATGGGTGTTAGCAATGTTAGCGATGCGATCTGTAAGATCCGAATCGTATCACAGCGCGCTCCTGCCAACGTCACCTGCTCCACCTAGCACTTGTCGATAACTCCCCCTCCAATCTACAAACACCAACTCGAAAATTGACACGCGCTTCTCGGCTGTTGGTGCACTCTGATGTTAATATGATGCCATGAGCC |
||||||
|
TGCCAGATACCTACCCTCTATATTCCAAACGTGAGTGAACTAAGTTCGATTACGAACCCGTACGTGGCTGAGCACGGCTAGTACCGGCCCGATTGTACCGTTCATATTGATTAGAGTGACCGGAGCACACAATCATCGCCCCCTTAAGCTATAACTGTCCCCGGAGCCTGAGCCTTTGACAACTTCGATAGGTTAATAAAAGAGTCTCGCTAAGTAGCCATATGATGAGTGATAGGGAGGCCTAGACCTGGACAACCCCTCATTTACGTCCGAACTCGGAGTGAAGTGTAATGTGAGCTCTTAAAAGGAGCTGGAT |
||||||
|
AGGCTCTCTGATTTAGCGGTGACCGCGTCCCGATTCTCACTCCTCAGAAGGTCTGGTAGCGCTACGGGGATGGGAACCCAGACACTCGAATCGAATCGGTATGACACTAGTACAAGGGGGCCTTTAACGCACGAGAATAACACAATTCCTTCGCATACATAGCTAACAGCAGACATAGGTCTTGATAAAAACTGTGCTGCTTCCTCAGAGTCGCTAAGAAAGCATGCCAGTGCACACTGGACATTACGCCGCAGTACAGCAATTCGCGTCTCAGATCAACCTGGGGAAATAAAACGTCTTTGCGTTAGCCCTTTGT |
||||||
|
TGCCTTGATGTCCAGGCATAGGTCATGTACGGGCTACCGTCATGTCCATGTCAGAATCCGAATGATCCTCTGGATTCCGATCCCGGCAGAGGGTAACTGTGCGACCTCAGCTTCCTCATCCCGCTATCGCTCACGGCCGGTCCTAGTGCGGCATGGATAAGCTCATCCAGGATGATTTACCCAAACCCTTTCACGTGGTGGTGGGGCGCGACTGTCACGCAGGAGAGTCGCCCGAGCTGTGGGCAGGAATACTTTCCTAAAGTGTGCATGTTAGGAAGAGACGTTAACTGCGCCTCCCTATCCTATCTGAGTGGCG |
||||||
|
ATAACGACCTGTGTGTTCATCGTATCTTCTCGAACACTTATGTAGATTCGCGTGGCTACGTTGTACATTCACTCCACTCAAGAGCGAAGGGTGACGTTTTCACTCCTCGCTGGAAAACCTAGAACGGGCTGTTTTTTACGATCAAAACAAAACCACTTGATAATTGTACTATTGTCTGGTAAGCTAAGTGTGCAATCAAGATCAACTCAATCCCCTGCCACCATAGTGTGGGCACCACGTAGAGAATTCGTCGAACAGATAACGCAAACTGACAGGGAGCTTAATGAACCATCAGCCGATCACCTTCGTGAGCATC |
||||||
|
TCGCTACTGGTGCATCCTATCTATTGATATTGACAACCCGGGATTAGTGACAACCGATTTCAGACTAAACTAGTTAGTAAAGCATTTCTCTATCTCCGCCGAGTGGACGGTGATCTAAGCAAGTAGGTGCCAGGAGGCCCATAACCGCCAATGACTTTCATGATCTAATCGACGGTTCGTTTTGAGGTTGGGGTACGCTCATAACCTTTATGTTTTGGTACACGCCTGTCACCTGCGCCGTGGTATCTGAGACATTTGTCTCCTGGACTAGTTGATTCCAGTATTCACAGAACGCCGGGATACGTTTCCGTCAATA |
@ -0,0 +1,77 @@ |
|||||||
|
import jinja2 |
||||||
|
import os |
||||||
|
|
||||||
|
def main(): |
||||||
|
|
||||||
|
# Jinja env |
||||||
|
env = jinja2.Environment(loader=jinja2.FileSystemLoader('.')) |
||||||
|
|
||||||
|
problems = [ |
||||||
|
{ |
||||||
|
'chapter': '2', |
||||||
|
'problem': 'a', |
||||||
|
'title': 'Implement Motif Enumeration', |
||||||
|
'description': 'Given a collection of strings of DNA, find all motifs (kmers of length k and Hamming distance d from all DNA strings).', |
||||||
|
'url': 'http://rosalind.info/problems/ba2a/' |
||||||
|
}, |
||||||
|
{ |
||||||
|
'chapter': '2', |
||||||
|
'problem': 'b', |
||||||
|
'title': 'Find a Median String', |
||||||
|
'description': 'Given a kmer length k and a set of strings of DNA, find the kmer(s) that minimize the L1 norm of the distance from it to all other DNA strings.', |
||||||
|
'url': 'http://rosalind.info/problems/ba2b/' |
||||||
|
}, |
||||||
|
{ |
||||||
|
'chapter': '2', |
||||||
|
'problem': 'c', |
||||||
|
'title': 'Find a Profile-most Probable k-mer in a String', |
||||||
|
'description': 'Given a profile matrix, find the most probable k-mer to generate the given DNA string.', |
||||||
|
'url': 'http://rosalind.info/problems/ba2c/' |
||||||
|
}, |
||||||
|
{ |
||||||
|
'chapter': '2', |
||||||
|
'problem': 'd', |
||||||
|
'title': 'Implement GreedyMotifSearch', |
||||||
|
'description': 'Find a collection of motif strings using a greedy motif search. Return first-occurring profile-most probable kmer.', |
||||||
|
'url': 'http://rosalind.info/problems/ba2d/' |
||||||
|
}, |
||||||
|
{ |
||||||
|
'chapter': '2', |
||||||
|
'problem': 'e', |
||||||
|
'title': 'Implement GreedyMotifSearch with Pseudocounts', |
||||||
|
'description': 'Re-implement problem BA2d (greedy motif search) using pseudocounts, which avoid setting probabilities to an absolute value of zero.', |
||||||
|
'url': 'http://rosalind.info/problems/ba2e/' |
||||||
|
}, |
||||||
|
{ |
||||||
|
'chapter': '2', |
||||||
|
'problem': 'f', |
||||||
|
'title': 'Implement RandomizedMotifSearch with Pseudocounts', |
||||||
|
'description': 'Re-implement problem BA2e (greedy motif search with pseudocounts) but use a random, instead of greedy, algorithm to pick motif kmers from each DNA string.', |
||||||
|
'url': 'http://rosalind.info/problems/ba2f/' |
||||||
|
}, |
||||||
|
{ |
||||||
|
'chapter': '2', |
||||||
|
'problem': 'g', |
||||||
|
'title': 'Implement GibbsSampler', |
||||||
|
'description': 'Generate probabilities of each kmer in a DNA string using its profile. Use these to assemble a list of probabilities. GibbsSampler uses this random number generator to generate a random k-mer.', |
||||||
|
'url': 'http://rosalind.info/problems/ba2g/' |
||||||
|
}, |
||||||
|
] |
||||||
|
|
||||||
|
print("Writing problem boilerplate code") |
||||||
|
|
||||||
|
t = 'template.go.j2' |
||||||
|
for problem in problems: |
||||||
|
contents = env.get_template(t).render(**problem) |
||||||
|
fname = 'ba'+problem['chapter']+problem['problem']+'.go' |
||||||
|
if not os.path.exists(fname): |
||||||
|
print("Writing to file %s..."%(fname)) |
||||||
|
with open(fname,'w') as f: |
||||||
|
f.write(contents) |
||||||
|
else: |
||||||
|
print("File %s already exists, skipping..."%(fname)) |
||||||
|
|
||||||
|
print("Done") |
||||||
|
|
||||||
|
if __name__=="__main__": |
||||||
|
main() |
@ -0,0 +1,49 @@ |
|||||||
|
package rosalindchapter{{chapter}} |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Print problem description for Rosalind.info |
||||||
|
// Problem BA{{chapter}}{{problem}}: {{title}} |
||||||
|
func BA{{chapter}}{{problem}}Description() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA{{chapter}}{{problem}}:", |
||||||
|
"{{title}}", |
||||||
|
"", |
||||||
|
"{{description}}", |
||||||
|
"", |
||||||
|
"URL: {{url}}", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Run the problem |
||||||
|
func BA{{chapter}}{{problem}}(filename string) { |
||||||
|
|
||||||
|
BA{{chapter}}{{problem}}Description() |
||||||
|
|
||||||
|
// Read the contents of the input file |
||||||
|
// into a single string |
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
//// Input file contents |
||||||
|
//input := lines[0] |
||||||
|
//params := lines[1] |
||||||
|
//result := rosa.PatternCount(input, pattern) |
||||||
|
// |
||||||
|
//fmt.Println("") |
||||||
|
//fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
//fmt.Println(result) |
||||||
|
} |
||||||
|
|
@ -0,0 +1,60 @@ |
|||||||
|
package rosalindchapter3 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strconv" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Print problem description for Rosalind.info
|
||||||
|
// Problem BA3a: Generate k-mer Composition of a String
|
||||||
|
func BA3aDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA3a:", |
||||||
|
"Generate k-mer Composition of a String", |
||||||
|
"", |
||||||
|
"Given an input string, generate a list of all kmers that are in the input string.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba3a/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Run the problem
|
||||||
|
func BA3a(filename string) { |
||||||
|
|
||||||
|
BA3aDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
k_str := lines[0] |
||||||
|
k, err := strconv.Atoi(k_str) |
||||||
|
if err != nil { |
||||||
|
msg := fmt.Sprintf("Error: string to int conversion failed for %s\n", |
||||||
|
k_str) |
||||||
|
log.Fatalf(msg) |
||||||
|
} |
||||||
|
|
||||||
|
input := lines[1] |
||||||
|
|
||||||
|
result, _ := rosa.KmerComposition(input, k) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
for _, kmer := range result { |
||||||
|
fmt.Printf("%s\n", kmer) |
||||||
|
} |
||||||
|
fmt.Printf("\n") |
||||||
|
} |
@ -0,0 +1,54 @@ |
|||||||
|
package rosalindchapter3 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strings" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Print problem description for Rosalind.info
|
||||||
|
// Problem BA3b: Reconstruct string from genome path
|
||||||
|
func BA3bDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA3b:", |
||||||
|
"Reconstruct string from genome path", |
||||||
|
"", |
||||||
|
"Reconstruct a string from its genome path, i.e., sequential fragments of overlapping DNA.", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba3b/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Run the problem
|
||||||
|
func BA3b(filename string) { |
||||||
|
|
||||||
|
BA3bDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Trim each line and there are your contigs
|
||||||
|
for i, line := range lines { |
||||||
|
lines[i] = strings.Trim(line, " ") |
||||||
|
} |
||||||
|
|
||||||
|
genome, err := rosa.ReconstructGenomeFromPath(lines) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error when calling ReconstructGenomeFromPath()") |
||||||
|
} |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(genome) |
||||||
|
} |
@ -0,0 +1,54 @@ |
|||||||
|
package rosalindchapter3 |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"strings" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Print problem description for Rosalind.info
|
||||||
|
// Problem BA3c: Construct the overlap graph of a set of k-mers
|
||||||
|
func BA3cDescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA3c:", |
||||||
|
"Construct the overlap graph of a set of k-mers", |
||||||
|
"", |
||||||
|
"Given a set of overlapping k-mers, construct the overlap graph and print a sorted adjacency matrix", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/ba3c/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Run the problem
|
||||||
|
func BA3c(filename string) { |
||||||
|
|
||||||
|
BA3cDescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Trim each line and there are your contigs
|
||||||
|
for i, line := range lines { |
||||||
|
lines[i] = strings.Trim(line, " ") |
||||||
|
} |
||||||
|
|
||||||
|
g, err := rosa.OverlapGraph(lines) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("Error when calling ReconstructGenomeFromPath()") |
||||||
|
} |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
fmt.Println(g.String()) |
||||||
|
} |
@ -0,0 +1,9 @@ |
|||||||
|
package rosalindchapter3 |
||||||
|
|
||||||
|
import "testing" |
||||||
|
|
||||||
|
func TestChapter03(t *testing.T) { |
||||||
|
BA3a("for_real/rosalind_ba3a.txt") |
||||||
|
BA3b("for_real/rosalind_ba3b.txt") |
||||||
|
BA3c("for_real/rosalind_ba3c.txt") |
||||||
|
} |
@ -0,0 +1,2 @@ |
|||||||
|
50 |
||||||
|
GGGGGAAACTTACGGAGTACAAGAAGACCCGGCACAAAGAGAAAACACGTTGCTCGTTAGCTTAAGTTAAGACGTATCGGATATCTATCGTATCCTCGTAGTATTGCTAGCCACTTCACTGGACCAGGCTTACGTATTAGCCTTATGACCCCATTTCGTCTCCGCTGCTACAGCTGTGGAGTTGACGCGTCCGGTGGGCCCTCCGTTAGCAGGTCAGCTCATATTTTCGGCAAGAAATTACCCGGAACGGACCGAAAATGGGGTACAACATGCCCACCCACAACTTAGTACACAACGCTCAGCAAGTAGCTAACGACCGCTGCCGTCGTCAGTATTAGACGCACTTAACCGTACGGAATCCGTGAGTCCTGTTTCCGCCGATCGAATTACGCGCCCGGGTCGTGGGTCCAAAGGTGGCCGATCTCACGTACTGGTGAGTCGCGCGGTCACTTGGCTGTGAGGTCCACCGGCGGCCACAGTAATCTCTGGTGCACCCAGAATCGAGTCTGGATTGTGCACAAAGCTGCCCGCCTCTATTTCTCGGACCTGGCAGAACGCAACGGATGGGTTGAAGATTGGGCCGGTTCCGATGCCCCAAAGTACCCACATTTACTAGGGTGAGGCTGTTCTTTTGAGAGTAGAGACGAAAGCACCCCGACGTAACTGCTGCACACGGGGCTGCTCGGGATACTGTGCCGGAACTAGCGAGGCTCTACCCTCATCGGAAACCAGGCCTCATAATTCTTACAGCGTACTGTGTACTCCACAAGGAGCTGACCAGACATTCCACGTCCATGGATTCGGCTCATGCATACCTCCCGATCCACTCCTGAGCACATTGGATGGACACTTGAACGATGTCCTTAGCGCACGAGACATCAATTCGTGACGGTAGATTGCTCTCACCCTGATGCGGGTAAGTCACGTATTACCCGGCGTGCGGTATGTAGTAATACAGCTATCTACAACAAATGCAACCCGGCAGGTCTCCCATAGACCA |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,981 @@ |
|||||||
|
TGTACCTCGGGCTTTAAGGT |
||||||
|
CGGAACGGGAATCCTGGGAG |
||||||
|
ACTAGCTGCACTATCGGTTT |
||||||
|
CGATCTCCAGGAAGATGTTG |
||||||
|
AGCGGGAACTGCTGCAGGGC |
||||||
|
GGGCTCCCGCGCCAGTGCCC |
||||||
|
AATGCCTTTCTTTAGGCAGC |
||||||
|
AGAAACCGAAATCGTGCCCC |
||||||
|
TCATGGTTACAAAATTTACG |
||||||
|
CCAGGTCAAAGGGAGCTATA |
||||||
|
GCGCCCTGGTAAGTTACGCA |
||||||
|
GACAATGGCCCGTGTGAATG |
||||||
|
CGACTAGATAGATCCAATTG |
||||||
|
ATTTTGGACCGCTGTCCTTC |
||||||
|
CGACGTTCAGCGAAGAACGA |
||||||
|
ACAGCCATCAAAGGAGAGCC |
||||||
|
AGCAACAATAATTACGTCAC |
||||||
|
AAATGGTCGAAACTAGCTGC |
||||||
|
CAACCTGAATGTCGGGTCCG |
||||||
|
AAAATTTTGGACCGCTGTCC |
||||||
|
TGGTATCTCAAGAACCCTCT |
||||||
|
GGAGGGCCAACCTGAATGTC |
||||||
|
ACTTGTAACCCGTCGGAACG |
||||||
|
GCCATTTGTGCCTTTTTGGT |
||||||
|
GAACCCTCTCTCTACAGGCT |
||||||
|
GTGCCTTTTTGGTGTGGAAC |
||||||
|
AAGGTCCCACACAGCGTGCC |
||||||
|
TCTCTCTACAGGCTAGTCAT |
||||||
|
CTGCTGGAAACGACTAGATA |
||||||
|
GCTTTAAGGTTAGGGGGTAA |
||||||
|
AGCGAGGCCCTTGACACGAA |
||||||
|
GATTGAATAGAGATATTCGA |
||||||
|
CTCTCTCTACAGGCTAGTCA |
||||||
|
CATCTTTTTGCCCATTATAA |
||||||
|
CTGCAGGGCTCCCGCGCCAG |
||||||
|
CATTAACACGACGTTCAGCG |
||||||
|
TTCAGCGCCCTGGTAAGTTA |
||||||
|
GAATGTGGAAATCGATCGTA |
||||||
|
ATTCAGCGCCCTGGTAAGTT |
||||||
|
TTTGGCCGAACCAGGTCAAA |
||||||
|
ATCGGAGGAGTAGTTGGTAC |
||||||
|
CGAAATCGTGCCCCATGTTC |
||||||
|
GGTTGTGATCAGCGGCCGGG |
||||||
|
GCAGCATTCAGCGCCCTGGT |
||||||
|
GAGGTCCATTATTAACACGG |
||||||
|
CCCATTATAATCCACATGGC |
||||||
|
GTGATCAGCGGCCGGGACAA |
||||||
|
GGGCCAACCTGAATGTCGGG |
||||||
|
CGGAGGAGTAGTTGGTACCA |
||||||
|
GCGAAGAACGAATACGACAG |
||||||
|
GTCACCATTCCCAGCAAATG |
||||||
|
CAACTTGTAACCCGTCGGAA |
||||||
|
CCCGTGTGAATGTGGAAATC |
||||||
|
ACAGAGCGAGGCCCTTGACA |
||||||
|
TGTCCTTCCAGTGCTCGCGG |
||||||
|
GCAATTACGAATGACCAGGC |
||||||
|
AGAACGAATACGACAGAGCG |
||||||
|
CTAGTCATGCGGGCTCGCAC |
||||||
|
ATCCAGGAGAGATTGAATAG |
||||||
|
GAGAGTCACATACACAATGC |
||||||
|
TCCCCTGGAAGCGCGCTCCC |
||||||
|
GTTGGTACCAACGCAAGAGG |
||||||
|
AAATCGATCGTATACCGCAG |
||||||
|
CAGCCATCAAAGGAGAGCCC |
||||||
|
GTCCATTATTAACACGGGCT |
||||||
|
TTAGGCAGCATTCAGCGCCC |
||||||
|
GATCAGCGGCCGGGACAATG |
||||||
|
ATTACGAATGACCAGGCTCA |
||||||
|
GAGCCCCTTGCGCTGTATCG |
||||||
|
TGGTCACTTGCTGCTGGAAA |
||||||
|
AAAATTTACGTCCGAGCTAT |
||||||
|
GGAGTAGTTGGTACCAACGC |
||||||
|
GAAACGACTAGATAGATCCA |
||||||
|
GTGCATCCAGGAGAGATTGA |
||||||
|
GCCGAAATCACGCGCATTGT |
||||||
|
TAGCAACAATAATTACGTCA |
||||||
|
CTCAAGAACCCTCTCTCTAC |
||||||
|
GAATCCTGGGAGGGCCAACC |
||||||
|
TGCCCCATGTTCGCGTTTAA |
||||||
|
CAATGCGGCTGTCGATCTCC |
||||||
|
GGGAGCTATAACAGCCATCA |
||||||
|
TACGAATGACCAGGCTCAAT |
||||||
|
GACATCAAGGTCCCACACAG |
||||||
|
CCTTCCAGTGCTCGCGGGGT |
||||||
|
TTTCTTTAGGCAGCATTCAG |
||||||
|
TCCCAGCAAATGCCTTTCTT |
||||||
|
ACTGCTGCAGGGCTCCCGCG |
||||||
|
CGTTCAGCGAAGAACGAATA |
||||||
|
GGAACGGGAATCCTGGGAGG |
||||||
|
GGCTAGTCATGCGGGCTCGC |
||||||
|
GGTCGAAACTAGCTGCACTA |
||||||
|
TCGGCAAAGTTAGAGCGGGA |
||||||
|
CAGGTCAAAGGGAGCTATAA |
||||||
|
ATAGCAACAATAATTACGTC |
||||||
|
TAATTACGTCACCATTCCCA |
||||||
|
GCAAAAATTTTGGACCGCTG |
||||||
|
GGTCAAAGGGAGCTATAACA |
||||||
|
TAGCTTCACAAATGGTCGAA |
||||||
|
GGAACTGCTGCAGGGCTCCC |
||||||
|
CACTTGCTGCTGGAAACGAC |
||||||
|
CCTGAATGTCGGGTCCGAGT |
||||||
|
AGGTTAGGGGGTAAGGCCAT |
||||||
|
CCAGCAAATGCCTTTCTTTA |
||||||
|
GGAGAGCCCCTTGCGCTGTA |
||||||
|
CCCATTCTTCCCCTGGAAGC |
||||||
|
CCATTCCCAGCAAATGCCTT |
||||||
|
TCGAGAGTCACATACACAAT |
||||||
|
AACCCTCTCTCTACAGGCTA |
||||||
|
CATGTTCGCGTTTAAGATGA |
||||||
|
GATCCAATTGGCCGAAATCA |
||||||
|
GTAGTTGGTACCAACGCAAG |
||||||
|
AACGGGAATCCTGGGAGGGC |
||||||
|
TGGAAATCGATCGTATACCG |
||||||
|
ACCGCTGTCCTTCCAGTGCT |
||||||
|
CGAGCTATAGCAGCAAAAAT |
||||||
|
AGCCCCTTGCGCTGTATCGT |
||||||
|
GATATTCGAGAGTCACATAC |
||||||
|
CGAGTTTGGCCGAACCAGGT |
||||||
|
ACCAGGTCAAAGGGAGCTAT |
||||||
|
CAAAGGTGGTTGTGATCAGC |
||||||
|
TCAGCGGCCGGGACAATGGC |
||||||
|
GGTAAGGCCATTTGTGCCTT |
||||||
|
GAAGTTCGGCAAAGTTAGAG |
||||||
|
AAAAATGGTATCTCAAGAAC |
||||||
|
AAACGACTAGATAGATCCAA |
||||||
|
ATCGTAGCTTCACAAATGGT |
||||||
|
TGAGTGACATCAAGGTCCCA |
||||||
|
CATTCTTCCCCTGGAAGCGC |
||||||
|
GGTCCCACACAGCGTGCCAT |
||||||
|
CAATTACGAATGACCAGGCT |
||||||
|
CTTGTAACCCGTCGGAACGG |
||||||
|
TGATCAGCGGCCGGGACAAT |
||||||
|
ATCCACATGGCTATAGCAAC |
||||||
|
GGCTGTCGATCTCCAGGAAG |
||||||
|
CACATGGCTATAGCAACAAT |
||||||
|
TTAAGATGAGTGACATCAAG |
||||||
|
GAGATTGAATAGAGATATTC |
||||||
|
CCTGGGAGGGCCAACCTGAA |
||||||
|
GGGCTTTAAGGTTAGGGGGT |
||||||
|
AGAGCGAGGCCCTTGACACG |
||||||
|
CCCACACAGCGTGCCATTCA |
||||||
|
TGGCTATAGCAACAATAATT |
||||||
|
CCTGGTAAGTTACGCATTAA |
||||||
|
GCCCTGGTAAGTTACGCATT |
||||||
|
CCCGTCGGAACGGGAATCCT |
||||||
|
ACGTTCAGCGAAGAACGAAT |
||||||
|
AAGTTACGCATTAACACGAC |
||||||
|
GCATCCAGGAGAGATTGAAT |
||||||
|
GGTTTGGTCACTTGCTGCTG |
||||||
|
CCCGCGCCAGTGCCCATCTC |
||||||
|
CCGCTGTCCTTCCAGTGCTC |
||||||
|
GTGTACCTCGGGCTTTAAGG |
||||||
|
ACAAAGGTGGTTGTGATCAG |
||||||
|
CAGTGCTCGCGGGGTGTACC |
||||||
|
AATCGTGCCCCATGTTCGCG |
||||||
|
CCTTTATCTCGTGCATCCAG |
||||||
|
GGGAACTGCTGCAGGGCTCC |
||||||
|
GAAATCGTGCCCCATGTTCG |
||||||
|
TAACACGGGCTCATCTTTTT |
||||||
|
AACACTAAACAAAGGTGGTT |
||||||
|
ATGTTCGCGTTTAAGATGAG |
||||||
|
GCTGCTGGAAACGACTAGAT |
||||||
|
GTATCTCAAGAACCCTCTCT |
||||||
|
CCAACGCAAGAGGTCCATTA |
||||||
|
CACGGGCTCATCTTTTTGCC |
||||||
|
TGCTGCTGGAAACGACTAGA |
||||||
|
TTGTTGTCGTCATGGTTACA |
||||||
|
CGCACGCGGTCAACTTGTAA |
||||||
|
GCCGGGACAATGGCCCGTGT |
||||||
|
GGAATCCTGGGAGGGCCAAC |
||||||
|
TAGATAGATCCAATTGGCCG |
||||||
|
AATGGCCCGTGTGAATGTGG |
||||||
|
CTTCACAAATGGTCGAAACT |
||||||
|
AGAGCGGGAACTGCTGCAGG |
||||||
|
TCCAGTGCTCGCGGGGTGTA |
||||||
|
ACACAGCGTGCCATTCATCC |
||||||
|
TCTTTAGGCAGCATTCAGCG |
||||||
|
CCAGTGCCCATCTCGGCGAA |
||||||
|
TACAGGCTAGTCATGCGGGC |
||||||
|
GTCGGAACGGGAATCCTGGG |
||||||
|
TCGGAACGGGAATCCTGGGA |
||||||
|
GAGTAGTTGGTACCAACGCA |
||||||
|
CCCTGGAAGCGCGCTCCCGC |
||||||
|
AACAATAATTACGTCACCAT |
||||||
|
CTGAATGTCGGGTCCGAGTT |
||||||
|
CGCGTTTAAGATGAGTGACA |
||||||
|
TCGGGCTTTAAGGTTAGGGG |
||||||
|
ATACCGCAGAAGTTCGGCAA |
||||||
|
ACGAATACGACAGAGCGAGG |
||||||
|
TGGTTACAAAATTTACGTCC |
||||||
|
CGCATTAACACGACGTTCAG |
||||||
|
GCGAAGAAACCGAAATCGTG |
||||||
|
ACTAAACAAAGGTGGTTGTG |
||||||
|
TTGCCCATTATAATCCACAT |
||||||
|
CCGGGACAATGGCCCGTGTG |
||||||
|
TCCAGGAGAGATTGAATAGA |
||||||
|
TAATCCACATGGCTATAGCA |
||||||
|
AATGGTCGAAACTAGCTGCA |
||||||
|
AAGAACGAATACGACAGAGC |
||||||
|
AATCCTGGGAGGGCCAACCT |
||||||
|
CGAAGAACGAATACGACAGA |
||||||
|
AATGCGGCTGTCGATCTCCA |
||||||
|
AGATGAGTGACATCAAGGTC |
||||||
|
ATCTCAAGAACCCTCTCTCT |
||||||
|
GTGCCCATCTCGGCGAAAAA |
||||||
|
ATAGATCCAATTGGCCGAAA |
||||||
|
AGGCCCTTGACACGAACACT |
||||||
|
CACAAATGGTCGAAACTAGC |
||||||
|
ATTGTTGTCGTCATGGTTAC |
||||||
|
TGTCGATCTCCAGGAAGATG |
||||||
|
GTTCGGCAAAGTTAGAGCGG |
||||||
|
GTCAAAGGGAGCTATAACAG |
||||||
|
CACGACGTTCAGCGAAGAAC |
||||||
|
CGTGCCCCATGTTCGCGTTT |
||||||
|
TGGACCGCTGTCCTTCCAGT |
||||||
|
TCTCGTGCATCCAGGAGAGA |
||||||
|
AGTGACATCAAGGTCCCACA |
||||||
|
CTCTACAGGCTAGTCATGCG |
||||||
|
CAGCGGCCGGGACAATGGCC |
||||||
|
CGTCCGAGCTATAGCAGCAA |
||||||
|
GCGGCCGGGACAATGGCCCG |
||||||
|
GAAGAAACCGAAATCGTGCC |
||||||
|
AGGAGAGCCCCTTGCGCTGT |
||||||
|
CAAGGTCCCACACAGCGTGC |
||||||
|
GTGCTCGCGGGGTGTACCTC |
||||||
|
CTTCCCCTGGAAGCGCGCTC |
||||||
|
GGCTCAATCGGAGGAGTAGT |
||||||
|
GAACCCATTCTTCCCCTGGA |
||||||
|
GTATACCGCAGAAGTTCGGC |
||||||
|
GAATGACCAGGCTCAATCGG |
||||||
|
ATAACAGCCATCAAAGGAGA |
||||||
|
GAGCTATAACAGCCATCAAA |
||||||
|
GGTCCGAGTTTGGCCGAACC |
||||||
|
CCCCTTGCGCTGTATCGTAG |
||||||
|
CTGGTAAGTTACGCATTAAC |
||||||
|
CCTTGACACGAACACTAAAC |
||||||
|
AGATCCAATTGGCCGAAATC |
||||||
|
AGTGCTCGCGGGGTGTACCT |
||||||
|
GTCAACTTGTAACCCGTCGG |
||||||
|
TACGCATTAACACGACGTTC |
||||||
|
ACAAAATTTACGTCCGAGCT |
||||||
|
GCAACAATAATTACGTCACC |
||||||
|
CAATTGGCCGAAATCACGCG |
||||||
|
ACCCGTCGGAACGGGAATCC |
||||||
|
GGTTACAAAATTTACGTCCG |
||||||
|
ACGACAGAGCGAGGCCCTTG |
||||||
|
ATTGAATAGAGATATTCGAG |
||||||
|
TCATGCGGGCTCGCACGCGG |
||||||
|
GCAAAGTTAGAGCGGGAACT |
||||||
|
TGTATCGTAGCTTCACAAAT |
||||||
|
CCCTCTCTCTACAGGCTAGT |
||||||
|
TCTACAGGCTAGTCATGCGG |
||||||
|
GATAGATCCAATTGGCCGAA |
||||||
|
CCGAAATCGTGCCCCATGTT |
||||||
|
TTTAAGGTTAGGGGGTAAGG |
||||||
|
CCGCCTTTATCTCGTGCATC |
||||||
|
GGCCGGGACAATGGCCCGTG |
||||||
|
AATGTGGAAATCGATCGTAT |
||||||
|
ACACGGGCTCATCTTTTTGC |
||||||
|
GAATGTCGGGTCCGAGTTTG |
||||||
|
TGGAAGCGCGCTCCCGCCTT |
||||||
|
GCTCCCGCCTTTATCTCGTG |
||||||
|
CGCCCTGGTAAGTTACGCAT |
||||||
|
GTCGTCATGGTTACAAAATT |
||||||
|
GCGGTCAACTTGTAACCCGT |
||||||
|
ATCGTGCCCCATGTTCGCGT |
||||||
|
GAAACCGAAATCGTGCCCCA |
||||||
|
TGTGGTGCAATTACGAATGA |
||||||
|
CGAAATCACGCGCATTGTTG |
||||||
|
AACCTGAATGTCGGGTCCGA |
||||||
|
CGCCTTTATCTCGTGCATCC |
||||||
|
GTTTAAGATGAGTGACATCA |
||||||
|
ATAATTACGTCACCATTCCC |
||||||
|
AGGAGTAGTTGGTACCAACG |
||||||
|
GGTTAGGGGGTAAGGCCATT |
||||||
|
TCATCTTTTTGCCCATTATA |
||||||
|
ACAAATGGTCGAAACTAGCT |
||||||
|
TCGCGGGGTGTACCTCGGGC |
||||||
|
CGGGCTCATCTTTTTGCCCA |
||||||
|
GCCATCAAAGGAGAGCCCCT |
||||||
|
TTTACGTCCGAGCTATAGCA |
||||||
|
TGCGCTGTATCGTAGCTTCA |
||||||
|
ACGACTAGATAGATCCAATT |
||||||
|
ACAATGCGGCTGTCGATCTC |
||||||
|
GTGAATGTGGAAATCGATCG |
||||||
|
CCGAAATCACGCGCATTGTT |
||||||
|
TGGTGCGAAGAAACCGAAAT |
||||||
|
CTATCGGTTTGGTCACTTGC |
||||||
|
CCCAGCAAATGCCTTTCTTT |
||||||
|
ACCCTCTCTCTACAGGCTAG |
||||||
|
GCTGCAGGGCTCCCGCGCCA |
||||||
|
TGCGGCTGTCGATCTCCAGG |
||||||
|
CCAGGCTCAATCGGAGGAGT |
||||||
|
TTCGAGAGTCACATACACAA |
||||||
|
AATTACGTCACCATTCCCAG |
||||||
|
GGTAAGTTACGCATTAACAC |
||||||
|
CTGTGGTGCAATTACGAATG |
||||||
|
TGTTGGTGCGAAGAAACCGA |
||||||
|
AGGGCCAACCTGAATGTCGG |
||||||
|
AAGGTTAGGGGGTAAGGCCA |
||||||
|
AATTTACGTCCGAGCTATAG |
||||||
|
ATACACAATGCGGCTGTCGA |
||||||
|
TCGTGCCCCATGTTCGCGTT |
||||||
|
CAGAAGTTCGGCAAAGTTAG |
||||||
|
TTGAATAGAGATATTCGAGA |
||||||
|
TCAGCGCCCTGGTAAGTTAC |
||||||
|
AAATCGTGCCCCATGTTCGC |
||||||
|
ATGCCTTTCTTTAGGCAGCA |
||||||
|
TGTCGTCATGGTTACAAAAT |
||||||
|
CCATGTTCGCGTTTAAGATG |
||||||
|
GCATTCAGCGCCCTGGTAAG |
||||||
|
CCCATGTTCGCGTTTAAGAT |
||||||
|
ATCCTGGGAGGGCCAACCTG |
||||||
|
AACACGACGTTCAGCGAAGA |
||||||
|
GAATAGAGATATTCGAGAGT |
||||||
|
GTACCTCGGGCTTTAAGGTT |
||||||
|
CAGGCTCAATCGGAGGAGTA |
||||||
|
TTTAGGCAGCATTCAGCGCC |
||||||
|
ATTCTTCCCCTGGAAGCGCG |
||||||
|
ACGAATGACCAGGCTCAATC |
||||||
|
GGCTCATCTTTTTGCCCATT |
||||||
|
GAGCGAGGCCCTTGACACGA |
||||||
|
AATGACCAGGCTCAATCGGA |
||||||
|
AAGAGGTCCATTATTAACAC |
||||||
|
GGGTGTACCTCGGGCTTTAA |
||||||
|
TTTTTGGTGTGGAACCCATT |
||||||
|
TGACCAGGCTCAATCGGAGG |
||||||
|
ACACTAAACAAAGGTGGTTG |
||||||
|
GGGGTGTACCTCGGGCTTTA |
||||||
|
ACGTCACCATTCCCAGCAAA |
||||||
|
CGAAACTAGCTGCACTATCG |
||||||
|
CCTCGGGCTTTAAGGTTAGG |
||||||
|
TTAACACGGGCTCATCTTTT |
||||||
|
AAATGGTATCTCAAGAACCC |
||||||
|
TGACATCAAGGTCCCACACA |
||||||
|
GAGGCCCTTGACACGAACAC |
||||||
|
CTTGCGCTGTATCGTAGCTT |
||||||
|
GTCCGAGTTTGGCCGAACCA |
||||||
|
GGAGGAGTAGTTGGTACCAA |
||||||
|
TTCCAGTGCTCGCGGGGTGT |
||||||
|
GAACGAATACGACAGAGCGA |
||||||
|
CGGGTCCGAGTTTGGCCGAA |
||||||
|
AATTACGAATGACCAGGCTC |
||||||
|
GTCGGGTCCGAGTTTGGCCG |
||||||
|
TATAGCAGCAAAAATTTTGG |
||||||
|
AGTAGTTGGTACCAACGCAA |
||||||
|
TGCTGGAAACGACTAGATAG |
||||||
|
TGGCCCGTGTGAATGTGGAA |
||||||
|
TGTTCGCGTTTAAGATGAGT |
||||||
|
AGCGGCCGGGACAATGGCCC |
||||||
|
ATTCCCAGCAAATGCCTTTC |
||||||
|
TACCTCGGGCTTTAAGGTTA |
||||||
|
AGTCATGCGGGCTCGCACGC |
||||||
|
GGCAGCATTCAGCGCCCTGG |
||||||
|
CGAGAGTCACATACACAATG |
||||||
|
GTGTGGAACCCATTCTTCCC |
||||||
|
CATCCAGGAGAGATTGAATA |
||||||
|
GGCCGAAATCACGCGCATTG |
||||||
|
TTGACACGAACACTAAACAA |
||||||
|
TCTCAAGAACCCTCTCTCTA |
||||||
|
TGCATCCAGGAGAGATTGAA |
||||||
|
ATCTCCAGGAAGATGTTGGT |
||||||
|
CACATACACAATGCGGCTGT |
||||||
|
TTGGTGCGAAGAAACCGAAA |
||||||
|
AGTGCCCATCTCGGCGAAAA |
||||||
|
GCGCTCCCGCCTTTATCTCG |
||||||
|
TAGATCCAATTGGCCGAAAT |
||||||
|
GGCCCTTGACACGAACACTA |
||||||
|
CAAAAATTTTGGACCGCTGT |
||||||
|
CATCAAGGTCCCACACAGCG |
||||||
|
CAGCAAATGCCTTTCTTTAG |
||||||
|
CATACACAATGCGGCTGTCG |
||||||
|
ATCGTATACCGCAGAAGTTC |
||||||
|
GAATACGACAGAGCGAGGCC |
||||||
|
TCAACTTGTAACCCGTCGGA |
||||||
|
TGTAACCCGTCGGAACGGGA |
||||||
|
TGCAATTACGAATGACCAGG |
||||||
|
TCGTAGCTTCACAAATGGTC |
||||||
|
TCCATTATTAACACGGGCTC |
||||||
|
TTGGCCGAAATCACGCGCAT |
||||||
|
CCTCTCTCTACAGGCTAGTC |
||||||
|
ATTGGCCGAAATCACGCGCA |
||||||
|
GCTAGTCATGCGGGCTCGCA |
||||||
|
AGGTCCCACACAGCGTGCCA |
||||||
|
GGAAATCGATCGTATACCGC |
||||||
|
CGCGCCAGTGCCCATCTCGG |
||||||
|
TCGATCGTATACCGCAGAAG |
||||||
|
CCAATTGGCCGAAATCACGC |
||||||
|
ATACGACAGAGCGAGGCCCT |
||||||
|
CGCTCCCGCCTTTATCTCGT |
||||||
|
TGGCCGAAATCACGCGCATT |
||||||
|
AATAATTACGTCACCATTCC |
||||||
|
CAGCGAAGAACGAATACGAC |
||||||
|
TACGACAGAGCGAGGCCCTT |
||||||
|
GAAGCGCGCTCCCGCCTTTA |
||||||
|
TCTTCCCCTGGAAGCGCGCT |
||||||
|
GAGTGACATCAAGGTCCCAC |
||||||
|
GCGGGCTCGCACGCGGTCAA |
||||||
|
CGGCTGTCGATCTCCAGGAA |
||||||
|
CCGTCGGAACGGGAATCCTG |
||||||
|
GTAAGTTACGCATTAACACG |
||||||
|
ATCTTTTTGCCCATTATAAT |
||||||
|
AATACGACAGAGCGAGGCCC |
||||||
|
GACACGAACACTAAACAAAG |
||||||
|
GTGTGAATGTGGAAATCGAT |
||||||
|
TGCACTATCGGTTTGGTCAC |
||||||
|
GTCATGCGGGCTCGCACGCG |
||||||
|
ACCCATTCTTCCCCTGGAAG |
||||||
|
ATCTCGTGCATCCAGGAGAG |
||||||
|
AAGTTAGAGCGGGAACTGCT |
||||||
|
CTATAGCAACAATAATTACG |
||||||
|
GTGCAATTACGAATGACCAG |
||||||
|
CAGGCTAGTCATGCGGGCTC |
||||||
|
ATAATCCACATGGCTATAGC |
||||||
|
GCTATAACAGCCATCAAAGG |
||||||
|
AGATGTTGGTGCGAAGAAAC |
||||||
|
ATGGTCGAAACTAGCTGCAC |
||||||
|
GGAGAGATTGAATAGAGATA |
||||||
|
CGACAGAGCGAGGCCCTTGA |
||||||
|
TCGCACGCGGTCAACTTGTA |
||||||
|
GAAATCACGCGCATTGTTGT |
||||||
|
TATACCGCAGAAGTTCGGCA |
||||||
|
CAGCATTCAGCGCCCTGGTA |
||||||
|
TTTTGGTGTGGAACCCATTC |
||||||
|
TATAATCCACATGGCTATAG |
||||||
|
ATCACGCGCATTGTTGTCGT |
||||||
|
ATTATAATCCACATGGCTAT |
||||||
|
TGTCGGGTCCGAGTTTGGCC |
||||||
|
AAGTTCGGCAAAGTTAGAGC |
||||||
|
ACGGGAATCCTGGGAGGGCC |
||||||
|
ATTTACGTCCGAGCTATAGC |
||||||
|
CTGGAAGCGCGCTCCCGCCT |
||||||
|
GCAAATGCCTTTCTTTAGGC |
||||||
|
TTACGCATTAACACGACGTT |
||||||
|
AGGTGGTTGTGATCAGCGGC |
||||||
|
GCCCGTGTGAATGTGGAAAT |
||||||
|
GTCCGAGCTATAGCAGCAAA |
||||||
|
AGGTCAAAGGGAGCTATAAC |
||||||
|
GCGGCTGTCGATCTCCAGGA |
||||||
|
TTGCGCTGTATCGTAGCTTC |
||||||
|
AAAGTTAGAGCGGGAACTGC |
||||||
|
TTCCCCTGGAAGCGCGCTCC |
||||||
|
CCCCATGTTCGCGTTTAAGA |
||||||
|
TATCTCAAGAACCCTCTCTC |
||||||
|
CCGCGCCAGTGCCCATCTCG |
||||||
|
GGAACCCATTCTTCCCCTGG |
||||||
|
ATAGCAGCAAAAATTTTGGA |
||||||
|
CGTTTAAGATGAGTGACATC |
||||||
|
TATAACAGCCATCAAAGGAG |
||||||
|
CCAGGAAGATGTTGGTGCGA |
||||||
|
TACGTCACCATTCCCAGCAA |
||||||
|
GCCTTTCTTTAGGCAGCATT |
||||||
|
AACAGCCATCAAAGGAGAGC |
||||||
|
GCATTAACACGACGTTCAGC |
||||||
|
CTCCCGCCTTTATCTCGTGC |
||||||
|
CGTGCATCCAGGAGAGATTG |
||||||
|
CTGGAAACGACTAGATAGAT |
||||||
|
GCATTGTTGTCGTCATGGTT |
||||||
|
CGGCCGGGACAATGGCCCGT |
||||||
|
TTATTAACACGGGCTCATCT |
||||||
|
TAGCAGCAAAAATTTTGGAC |
||||||
|
TCAAGAACCCTCTCTCTACA |
||||||
|
AGTCACATACACAATGCGGC |
||||||
|
CAACGCAAGAGGTCCATTAT |
||||||
|
CGGGCTCGCACGCGGTCAAC |
||||||
|
CTCGCGGGGTGTACCTCGGG |
||||||
|
ATGTGGAAATCGATCGTATA |
||||||
|
AGCTATAACAGCCATCAAAG |
||||||
|
TTAAGGTTAGGGGGTAAGGC |
||||||
|
TCCAATTGGCCGAAATCACG |
||||||
|
AACGACTAGATAGATCCAAT |
||||||
|
GGCCAACCTGAATGTCGGGT |
||||||
|
CCTTTCTTTAGGCAGCATTC |
||||||
|
AGTTGGTACCAACGCAAGAG |
||||||
|
TGTGAATGTGGAAATCGATC |
||||||
|
CATGGTTACAAAATTTACGT |
||||||
|
TTGCTGCTGGAAACGACTAG |
||||||
|
CATGCGGGCTCGCACGCGGT |
||||||
|
CAACAATAATTACGTCACCA |
||||||
|
GAGAGATTGAATAGAGATAT |
||||||
|
GTGCGAAGAAACCGAAATCG |
||||||
|
GGGCTCGCACGCGGTCAACT |
||||||
|
GCAAGAGGTCCATTATTAAC |
||||||
|
GTCACTTGCTGCTGGAAACG |
||||||
|
GGCCGAACCAGGTCAAAGGG |
||||||
|
CGCGCATTGTTGTCGTCATG |
||||||
|
CATTATAATCCACATGGCTA |
||||||
|
GGTGCAATTACGAATGACCA |
||||||
|
GGGAATCCTGGGAGGGCCAA |
||||||
|
CATCTCGGCGAAAAATGGTA |
||||||
|
CGTGTGAATGTGGAAATCGA |
||||||
|
GCTATAGCAACAATAATTAC |
||||||
|
CCGCAGAAGTTCGGCAAAGT |
||||||
|
ACTAGATAGATCCAATTGGC |
||||||
|
AAACAAAGGTGGTTGTGATC |
||||||
|
TCGCGTTTAAGATGAGTGAC |
||||||
|
TCGGGTCCGAGTTTGGCCGA |
||||||
|
AAAATGGTATCTCAAGAACC |
||||||
|
GGACAATGGCCCGTGTGAAT |
||||||
|
GACCAGGCTCAATCGGAGGA |
||||||
|
CAATAATTACGTCACCATTC |
||||||
|
ATCGGTTTGGTCACTTGCTG |
||||||
|
TCGGCGAAAAATGGTATCTC |
||||||
|
AACCCATTCTTCCCCTGGAA |
||||||
|
TCCCGCCTTTATCTCGTGCA |
||||||
|
GATCTCCAGGAAGATGTTGG |
||||||
|
GAAAAATGGTATCTCAAGAA |
||||||
|
GGCCCGTGTGAATGTGGAAA |
||||||
|
GTCCTTCCAGTGCTCGCGGG |
||||||
|
TTGTAACCCGTCGGAACGGG |
||||||
|
ACGCAAGAGGTCCATTATTA |
||||||
|
GCTCGCGGGGTGTACCTCGG |
||||||
|
TAAGGTTAGGGGGTAAGGCC |
||||||
|
CCCCTGGAAGCGCGCTCCCG |
||||||
|
GCTATAGCAGCAAAAATTTT |
||||||
|
TCTCGGCGAAAAATGGTATC |
||||||
|
ATCCAATTGGCCGAAATCAC |
||||||
|
ATGGCCCGTGTGAATGTGGA |
||||||
|
GCCCCTTGCGCTGTATCGTA |
||||||
|
TTGTCGTCATGGTTACAAAA |
||||||
|
AACCGAAATCGTGCCCCATG |
||||||
|
CCAGTGCTCGCGGGGTGTAC |
||||||
|
CCATTATAATCCACATGGCT |
||||||
|
ATGTTGGTGCGAAGAAACCG |
||||||
|
ATGGTTACAAAATTTACGTC |
||||||
|
AGTTTGGCCGAACCAGGTCA |
||||||
|
GTACCAACGCAAGAGGTCCA |
||||||
|
GAGGAGTAGTTGGTACCAAC |
||||||
|
CAAAGTTAGAGCGGGAACTG |
||||||
|
TTAGGGGGTAAGGCCATTTG |
||||||
|
GAAGATGTTGGTGCGAAGAA |
||||||
|
ATGGCTATAGCAACAATAAT |
||||||
|
AAATCACGCGCATTGTTGTC |
||||||
|
TATTCGAGAGTCACATACAC |
||||||
|
AAATGCCTTTCTTTAGGCAG |
||||||
|
CGAATACGACAGAGCGAGGC |
||||||
|
AGATAGATCCAATTGGCCGA |
||||||
|
TGACACGAACACTAAACAAA |
||||||
|
GCCAACCTGAATGTCGGGTC |
||||||
|
CTGCTGCAGGGCTCCCGCGC |
||||||
|
CTCGTGCATCCAGGAGAGAT |
||||||
|
TTTGCCCATTATAATCCACA |
||||||
|
ACGAACACTAAACAAAGGTG |
||||||
|
TGCAGGGCTCCCGCGCCAGT |
||||||
|
AGAAGTTCGGCAAAGTTAGA |
||||||
|
TAGAGATATTCGAGAGTCAC |
||||||
|
CTGCACTATCGGTTTGGTCA |
||||||
|
GGCCATTTGTGCCTTTTTGG |
||||||
|
CACACAGCGTGCCATTCATC |
||||||
|
CAAATGGTCGAAACTAGCTG |
||||||
|
TTACAAAATTTACGTCCGAG |
||||||
|
AACCAGGTCAAAGGGAGCTA |
||||||
|
GCTCAATCGGAGGAGTAGTT |
||||||
|
CCCTGGTAAGTTACGCATTA |
||||||
|
CCGAACCAGGTCAAAGGGAG |
||||||
|
TCCTTCCAGTGCTCGCGGGG |
||||||
|
GCGGGAACTGCTGCAGGGCT |
||||||
|
ACACGAACACTAAACAAAGG |
||||||
|
GGGACAATGGCCCGTGTGAA |
||||||
|
TCAATCGGAGGAGTAGTTGG |
||||||
|
TAAGATGAGTGACATCAAGG |
||||||
|
CCACATGGCTATAGCAACAA |
||||||
|
TGAATAGAGATATTCGAGAG |
||||||
|
AGAGATTGAATAGAGATATT |
||||||
|
ACGGGCTCATCTTTTTGCCC |
||||||
|
CTTTTTGCCCATTATAATCC |
||||||
|
CTCGCACGCGGTCAACTTGT |
||||||
|
CTTCCAGTGCTCGCGGGGTG |
||||||
|
CTCATCTTTTTGCCCATTAT |
||||||
|
AGGCCATTTGTGCCTTTTTG |
||||||
|
AATTTTGGACCGCTGTCCTT |
||||||
|
ATTTGTGCCTTTTTGGTGTG |
||||||
|
ACACGACGTTCAGCGAAGAA |
||||||
|
TTGGTCACTTGCTGCTGGAA |
||||||
|
GTATCGTAGCTTCACAAATG |
||||||
|
ACGCGCATTGTTGTCGTCAT |
||||||
|
CCCTTGCGCTGTATCGTAGC |
||||||
|
GGAAGCGCGCTCCCGCCTTT |
||||||
|
CGTATACCGCAGAAGTTCGG |
||||||
|
AAAGGAGAGCCCCTTGCGCT |
||||||
|
GATGAGTGACATCAAGGTCC |
||||||
|
CGCTGTATCGTAGCTTCACA |
||||||
|
TCACATACACAATGCGGCTG |
||||||
|
AAGAACCCTCTCTCTACAGG |
||||||
|
CAGAGCGAGGCCCTTGACAC |
||||||
|
CCACACAGCGTGCCATTCAT |
||||||
|
GGTCAACTTGTAACCCGTCG |
||||||
|
TATCTCGTGCATCCAGGAGA |
||||||
|
AAGGAGAGCCCCTTGCGCTG |
||||||
|
GGTCCATTATTAACACGGGC |
||||||
|
GTCGATCTCCAGGAAGATGT |
||||||
|
TTAGAGCGGGAACTGCTGCA |
||||||
|
AAAGGTGGTTGTGATCAGCG |
||||||
|
TGGTTGTGATCAGCGGCCGG |
||||||
|
GTTAGAGCGGGAACTGCTGC |
||||||
|
CCAACCTGAATGTCGGGTCC |
||||||
|
GTTCAGCGAAGAACGAATAC |
||||||
|
CTCCCGCGCCAGTGCCCATC |
||||||
|
TCGTGCATCCAGGAGAGATT |
||||||
|
CGCGGGGTGTACCTCGGGCT |
||||||
|
CACTAAACAAAGGTGGTTGT |
||||||
|
CCCTTGACACGAACACTAAA |
||||||
|
CGGTTTGGTCACTTGCTGCT |
||||||
|
ACTATCGGTTTGGTCACTTG |
||||||
|
TGGAAACGACTAGATAGATC |
||||||
|
GTGGTGCAATTACGAATGAC |
||||||
|
TGGTGCAATTACGAATGACC |
||||||
|
GACGTTCAGCGAAGAACGAA |
||||||
|
AATCGATCGTATACCGCAGA |
||||||
|
TTTGTGCCTTTTTGGTGTGG |
||||||
|
CATTCAGCGCCCTGGTAAGT |
||||||
|
GCGCCAGTGCCCATCTCGGC |
||||||
|
GTAACCCGTCGGAACGGGAA |
||||||
|
AACTGCTGCAGGGCTCCCGC |
||||||
|
AGAGATATTCGAGAGTCACA |
||||||
|
GTCCCACACAGCGTGCCATT |
||||||
|
AGCTATAGCAGCAAAAATTT |
||||||
|
TTTTGGACCGCTGTCCTTCC |
||||||
|
TCGGAGGAGTAGTTGGTACC |
||||||
|
CTAGCTGCACTATCGGTTTG |
||||||
|
TCCAGGAAGATGTTGGTGCG |
||||||
|
AGATTGAATAGAGATATTCG |
||||||
|
CAGGAAGATGTTGGTGCGAA |
||||||
|
TCAAAGGAGAGCCCCTTGCG |
||||||
|
CCATTATTAACACGGGCTCA |
||||||
|
AAGGGAGCTATAACAGCCAT |
||||||
|
CTATAACAGCCATCAAAGGA |
||||||
|
AAGGTGGTTGTGATCAGCGG |
||||||
|
GAGGGCCAACCTGAATGTCG |
||||||
|
GCGAAAAATGGTATCTCAAG |
||||||
|
TTATAATCCACATGGCTATA |
||||||
|
AGGCTCAATCGGAGGAGTAG |
||||||
|
TGCGGGCTCGCACGCGGTCA |
||||||
|
TCACTTGCTGCTGGAAACGA |
||||||
|
AGGGAGCTATAACAGCCATC |
||||||
|
GGACCGCTGTCCTTCCAGTG |
||||||
|
TTCACAAATGGTCGAAACTA |
||||||
|
TAACACGACGTTCAGCGAAG |
||||||
|
CCGTGTGAATGTGGAAATCG |
||||||
|
CGGGCTTTAAGGTTAGGGGG |
||||||
|
TTGTGATCAGCGGCCGGGAC |
||||||
|
GTGACATCAAGGTCCCACAC |
||||||
|
TGGCCGAACCAGGTCAAAGG |
||||||
|
GACCGCTGTCCTTCCAGTGC |
||||||
|
TTCTTCCCCTGGAAGCGCGC |
||||||
|
GTCGAAACTAGCTGCACTAT |
||||||
|
TACGTCCGAGCTATAGCAGC |
||||||
|
CCTTGCGCTGTATCGTAGCT |
||||||
|
CGGTCAACTTGTAACCCGTC |
||||||
|
CAGTGCCCATCTCGGCGAAA |
||||||
|
GCTTCACAAATGGTCGAAAC |
||||||
|
GGTGGTTGTGATCAGCGGCC |
||||||
|
TGTGCCTTTTTGGTGTGGAA |
||||||
|
GGCTCCCGCGCCAGTGCCCA |
||||||
|
TGTGGAACCCATTCTTCCCC |
||||||
|
TCTTTTTGCCCATTATAATC |
||||||
|
CTTTATCTCGTGCATCCAGG |
||||||
|
AACGAATACGACAGAGCGAG |
||||||
|
GATGTTGGTGCGAAGAAACC |
||||||
|
TGCCCATTATAATCCACATG |
||||||
|
AATCGGAGGAGTAGTTGGTA |
||||||
|
GGGTAAGGCCATTTGTGCCT |
||||||
|
GTTTGGCCGAACCAGGTCAA |
||||||
|
TCTCTACAGGCTAGTCATGC |
||||||
|
GAAGAACGAATACGACAGAG |
||||||
|
CGCGCTCCCGCCTTTATCTC |
||||||
|
AATCCACATGGCTATAGCAA |
||||||
|
CTTTTTGGTGTGGAACCCAT |
||||||
|
AGCGCGCTCCCGCCTTTATC |
||||||
|
GAGCTATAGCAGCAAAAATT |
||||||
|
AGCAAAAATTTTGGACCGCT |
||||||
|
TAGTTGGTACCAACGCAAGA |
||||||
|
CAAAGGAGAGCCCCTTGCGC |
||||||
|
ACAGGCTAGTCATGCGGGCT |
||||||
|
GAGAGCCCCTTGCGCTGTAT |
||||||
|
GTTTGGTCACTTGCTGCTGG |
||||||
|
ATTATTAACACGGGCTCATC |
||||||
|
GAAACTAGCTGCACTATCGG |
||||||
|
GCTGTCCTTCCAGTGCTCGC |
||||||
|
TAAACAAAGGTGGTTGTGAT |
||||||
|
CGTCACCATTCCCAGCAAAT |
||||||
|
TCCGAGCTATAGCAGCAAAA |
||||||
|
GGTGTGGAACCCATTCTTCC |
||||||
|
ACCGCAGAAGTTCGGCAAAG |
||||||
|
GGTATCTCAAGAACCCTCTC |
||||||
|
CGCTGTCCTTCCAGTGCTCG |
||||||
|
CATTTGTGCCTTTTTGGTGT |
||||||
|
CTCTCTACAGGCTAGTCATG |
||||||
|
CAGCGCCCTGGTAAGTTACG |
||||||
|
GCCTTTATCTCGTGCATCCA |
||||||
|
ACTTGCTGCTGGAAACGACT |
||||||
|
AACCCGTCGGAACGGGAATC |
||||||
|
CCGAGTTTGGCCGAACCAGG |
||||||
|
CTGGGAGGGCCAACCTGAAT |
||||||
|
GGTCACTTGCTGCTGGAAAC |
||||||
|
CTCCAGGAAGATGTTGGTGC |
||||||
|
CGCGGTCAACTTGTAACCCG |
||||||
|
CACTATCGGTTTGGTCACTT |
||||||
|
GTCATGGTTACAAAATTTAC |
||||||
|
AAGATGTTGGTGCGAAGAAA |
||||||
|
GCAGGGCTCCCGCGCCAGTG |
||||||
|
CTGTCGATCTCCAGGAAGAT |
||||||
|
TTCTTTAGGCAGCATTCAGC |
||||||
|
CACGCGGTCAACTTGTAACC |
||||||
|
TGCCTTTTTGGTGTGGAACC |
||||||
|
GAAATCGATCGTATACCGCA |
||||||
|
AGGCAGCATTCAGCGCCCTG |
||||||
|
GCACGCGGTCAACTTGTAAC |
||||||
|
CAGGGCTCCCGCGCCAGTGC |
||||||
|
TTTAAGATGAGTGACATCAA |
||||||
|
CACGCGCATTGTTGTCGTCA |
||||||
|
GAGTTTGGCCGAACCAGGTC |
||||||
|
ACCGAAATCGTGCCCCATGT |
||||||
|
AGGTCCATTATTAACACGGG |
||||||
|
CAAGAACCCTCTCTCTACAG |
||||||
|
TTTGGTCACTTGCTGCTGGA |
||||||
|
CTTGCTGCTGGAAACGACTA |
||||||
|
ACACAATGCGGCTGTCGATC |
||||||
|
GTTACGCATTAACACGACGT |
||||||
|
AACTTGTAACCCGTCGGAAC |
||||||
|
AGCCATCAAAGGAGAGCCCC |
||||||
|
GCTGTCGATCTCCAGGAAGA |
||||||
|
CAAAGGGAGCTATAACAGCC |
||||||
|
TGGGAGGGCCAACCTGAATG |
||||||
|
GACTAGATAGATCCAATTGG |
||||||
|
TTGTGCCTTTTTGGTGTGGA |
||||||
|
GCGTTTAAGATGAGTGACAT |
||||||
|
CGGGGTGTACCTCGGGCTTT |
||||||
|
GCGCGCTCCCGCCTTTATCT |
||||||
|
ATGCGGCTGTCGATCTCCAG |
||||||
|
CGCAGAAGTTCGGCAAAGTT |
||||||
|
ATGACCAGGCTCAATCGGAG |
||||||
|
CATCAAAGGAGAGCCCCTTG |
||||||
|
ATAGAGATATTCGAGAGTCA |
||||||
|
TGCTCGCGGGGTGTACCTCG |
||||||
|
TTGGTACCAACGCAAGAGGT |
||||||
|
AGCATTCAGCGCCCTGGTAA |
||||||
|
AGGGCTCCCGCGCCAGTGCC |
||||||
|
AACTAGCTGCACTATCGGTT |
||||||
|
CAATCGGAGGAGTAGTTGGT |
||||||
|
GTTCGCGTTTAAGATGAGTG |
||||||
|
GTAGCTTCACAAATGGTCGA |
||||||
|
CATTGTTGTCGTCATGGTTA |
||||||
|
GGGGTAAGGCCATTTGTGCC |
||||||
|
CGAAAAATGGTATCTCAAGA |
||||||
|
ATCAGCGGCCGGGACAATGG |
||||||
|
CTGTCCTTCCAGTGCTCGCG |
||||||
|
GGGAGGGCCAACCTGAATGT |
||||||
|
TCACGCGCATTGTTGTCGTC |
||||||
|
GATCGTATACCGCAGAAGTT |
||||||
|
AGCGCCCTGGTAAGTTACGC |
||||||
|
TAAGTTACGCATTAACACGA |
||||||
|
CCGAGCTATAGCAGCAAAAA |
||||||
|
TAGAGCGGGAACTGCTGCAG |
||||||
|
ACATCAAGGTCCCACACAGC |
||||||
|
ATCGATCGTATACCGCAGAA |
||||||
|
GTGGAACCCATTCTTCCCCT |
||||||
|
ATTAACACGACGTTCAGCGA |
||||||
|
CGGCGAAAAATGGTATCTCA |
||||||
|
TAACAGCCATCAAAGGAGAG |
||||||
|
TGCCCATCTCGGCGAAAAAT |
||||||
|
AATAGAGATATTCGAGAGTC |
||||||
|
AAGCGCGCTCCCGCCTTTAT |
||||||
|
CTTTAGGCAGCATTCAGCGC |
||||||
|
GCACTATCGGTTTGGTCACT |
||||||
|
GGCTTTAAGGTTAGGGGGTA |
||||||
|
GAGATATTCGAGAGTCACAT |
||||||
|
TCAGCGAAGAACGAATACGA |
||||||
|
AAATTTACGTCCGAGCTATA |
||||||
|
GTGGAAATCGATCGTATACC |
||||||
|
GCCCATTATAATCCACATGG |
||||||
|
TAAGGCCATTTGTGCCTTTT |
||||||
|
CTTTCTTTAGGCAGCATTCA |
||||||
|
CTGTATCGTAGCTTCACAAA |
||||||
|
TTGGTGTGGAACCCATTCTT |
||||||
|
GGAGCTATAACAGCCATCAA |
||||||
|
TTCGCGTTTAAGATGAGTGA |
||||||
|
AGGGGGTAAGGCCATTTGTG |
||||||
|
TGAATGTGGAAATCGATCGT |
||||||
|
ACGCATTAACACGACGTTCA |
||||||
|
ATTAACACGGGCTCATCTTT |
||||||
|
AGGAAGATGTTGGTGCGAAG |
||||||
|
GAGCGGGAACTGCTGCAGGG |
||||||
|
AATGGTATCTCAAGAACCCT |
||||||
|
CTACAGGCTAGTCATGCGGG |
||||||
|
CCAGGAGAGATTGAATAGAG |
||||||
|
CGAAGAAACCGAAATCGTGC |
||||||
|
GAACTGCTGCAGGGCTCCCG |
||||||
|
ACCAGGCTCAATCGGAGGAG |
||||||
|
TATCGGTTTGGTCACTTGCT |
||||||
|
TTTTGCCCATTATAATCCAC |
||||||
|
TTCAGCGAAGAACGAATACG |
||||||
|
GGCTCGCACGCGGTCAACTT |
||||||
|
TCACCATTCCCAGCAAATGC |
||||||
|
TGCGAAGAAACCGAAATCGT |
||||||
|
AGAGGTCCATTATTAACACG |
||||||
|
GGCGAAAAATGGTATCTCAA |
||||||
|
CACAATGCGGCTGTCGATCT |
||||||
|
TACCAACGCAAGAGGTCCAT |
||||||
|
TACACAATGCGGCTGTCGAT |
||||||
|
CACGAACACTAAACAAAGGT |
||||||
|
GAACACTAAACAAAGGTGGT |
||||||
|
CTCAATCGGAGGAGTAGTTG |
||||||
|
CGCATTGTTGTCGTCATGGT |
||||||
|
AGATATTCGAGAGTCACATA |
||||||
|
ACCTGAATGTCGGGTCCGAG |
||||||
|
TTACGAATGACCAGGCTCAA |
||||||
|
GCCCCATGTTCGCGTTTAAG |
||||||
|
AGCTTCACAAATGGTCGAAA |
||||||
|
GAACGGGAATCCTGGGAGGG |
||||||
|
CGGGAACTGCTGCAGGGCTC |
||||||
|
TGGTCGAAACTAGCTGCACT |
||||||
|
TCGATCTCCAGGAAGATGTT |
||||||
|
CCATTCTTCCCCTGGAAGCG |
||||||
|
AAGATGAGTGACATCAAGGT |
||||||
|
CATTCCCAGCAAATGCCTTT |
||||||
|
TCCACATGGCTATAGCAACA |
||||||
|
GCTCCCGCGCCAGTGCCCAT |
||||||
|
CGGCAAAGTTAGAGCGGGAA |
||||||
|
CTTTAAGGTTAGGGGGTAAG |
||||||
|
CCCATCTCGGCGAAAAATGG |
||||||
|
CCATTTGTGCCTTTTTGGTG |
||||||
|
GCTGTATCGTAGCTTCACAA |
||||||
|
CCTGGAAGCGCGCTCCCGCC |
||||||
|
AAACTAGCTGCACTATCGGT |
||||||
|
AGAACCCTCTCTCTACAGGC |
||||||
|
CGGGACAATGGCCCGTGTGA |
||||||
|
CATTATTAACACGGGCTCAT |
||||||
|
GCGCATTGTTGTCGTCATGG |
||||||
|
CTCGGCGAAAAATGGTATCT |
||||||
|
AACGCAAGAGGTCCATTATT |
||||||
|
ACGACGTTCAGCGAAGAACG |
||||||
|
GCTGGAAACGACTAGATAGA |
||||||
|
ACATGGCTATAGCAACAATA |
||||||
|
CGGGAATCCTGGGAGGGCCA |
||||||
|
CAAATGCCTTTCTTTAGGCA |
||||||
|
TCGTCATGGTTACAAAATTT |
||||||
|
TAGGGGGTAAGGCCATTTGT |
||||||
|
GCAGCAAAAATTTTGGACCG |
||||||
|
TTTGGTGTGGAACCCATTCT |
||||||
|
ACGTCCGAGCTATAGCAGCA |
||||||
|
ATGTCGGGTCCGAGTTTGGC |
||||||
|
CTAGATAGATCCAATTGGCC |
||||||
|
GTTGGTGCGAAGAAACCGAA |
||||||
|
CGCAAGAGGTCCATTATTAA |
||||||
|
GGCTATAGCAACAATAATTA |
||||||
|
AAGAAACCGAAATCGTGCCC |
||||||
|
GCCAGTGCCCATCTCGGCGA |
||||||
|
AAACCGAAATCGTGCCCCAT |
||||||
|
GGGCTCATCTTTTTGCCCAT |
||||||
|
GCCGAACCAGGTCAAAGGGA |
||||||
|
TTATCTCGTGCATCCAGGAG |
||||||
|
AAGGCCATTTGTGCCTTTTT |
||||||
|
TGTGATCAGCGGCCGGGACA |
||||||
|
GTCACATACACAATGCGGCT |
||||||
|
GGCAAAGTTAGAGCGGGAAC |
||||||
|
CGCCAGTGCCCATCTCGGCG |
||||||
|
ACGCGGTCAACTTGTAACCC |
||||||
|
GCTCATCTTTTTGCCCATTA |
||||||
|
CGAACCAGGTCAAAGGGAGC |
||||||
|
TATCGTAGCTTCACAAATGG |
||||||
|
GAGTCACATACACAATGCGG |
||||||
|
AGAGCCCCTTGCGCTGTATC |
||||||
|
ATCAAGGTCCCACACAGCGT |
||||||
|
TAGTCATGCGGGCTCGCACG |
||||||
|
GTTAGGGGGTAAGGCCATTT |
||||||
|
CAATGGCCCGTGTGAATGTG |
||||||
|
GCGAGGCCCTTGACACGAAC |
||||||
|
TCAAAGGGAGCTATAACAGC |
||||||
|
CAAGAGGTCCATTATTAACA |
||||||
|
TCTCCAGGAAGATGTTGGTG |
||||||
|
TCCCGCGCCAGTGCCCATCT |
||||||
|
AATCACGCGCATTGTTGTCG |
||||||
|
ATCTCGGCGAAAAATGGTAT |
||||||
|
AATGTCGGGTCCGAGTTTGG |
||||||
|
TCAAGGTCCCACACAGCGTG |
||||||
|
GGTACCAACGCAAGAGGTCC |
||||||
|
CGTCATGGTTACAAAATTTA |
||||||
|
GACAGAGCGAGGCCCTTGAC |
||||||
|
GGAAGATGTTGGTGCGAAGA |
||||||
|
AACACGGGCTCATCTTTTTG |
||||||
|
CTATAGCAGCAAAAATTTTG |
||||||
|
ATGGTATCTCAAGAACCCTC |
||||||
|
TGTGGAAATCGATCGTATAC |
||||||
|
TGCCTTTCTTTAGGCAGCAT |
||||||
|
TCGGTTTGGTCACTTGCTGC |
||||||
|
TGGTACCAACGCAAGAGGTC |
||||||
|
CGAATGACCAGGCTCAATCG |
||||||
|
TTGGACCGCTGTCCTTCCAG |
||||||
|
TGTTGTCGTCATGGTTACAA |
||||||
|
TAGGCAGCATTCAGCGCCCT |
||||||
|
GCCCATCTCGGCGAAAAATG |
||||||
|
ACAATAATTACGTCACCATT |
||||||
|
TACAAAATTTACGTCCGAGC |
||||||
|
AATTGGCCGAAATCACGCGC |
||||||
|
TTTGGACCGCTGTCCTTCCA |
||||||
|
GCGGGGTGTACCTCGGGCTT |
||||||
|
GCAGAAGTTCGGCAAAGTTA |
||||||
|
CCCGCCTTTATCTCGTGCAT |
||||||
|
CAAAATTTACGTCCGAGCTA |
||||||
|
CCATCTCGGCGAAAAATGGT |
||||||
|
TGAATGTCGGGTCCGAGTTT |
||||||
|
TCCTGGGAGGGCCAACCTGA |
||||||
|
TCCGAGTTTGGCCGAACCAG |
||||||
|
AGGCTAGTCATGCGGGCTCG |
||||||
|
TATTAACACGGGCTCATCTT |
||||||
|
GCTGCACTATCGGTTTGGTC |
||||||
|
TATAGCAACAATAATTACGT |
||||||
|
GTTACAAAATTTACGTCCGA |
||||||
|
GGGTCCGAGTTTGGCCGAAC |
||||||
|
GCCTTTTTGGTGTGGAACCC |
||||||
|
AGGAGAGATTGAATAGAGAT |
||||||
|
TGGTGTGGAACCCATTCTTC |
||||||
|
ACAATGGCCCGTGTGAATGT |
||||||
|
TAACCCGTCGGAACGGGAAT |
||||||
|
AGTTACGCATTAACACGACG |
||||||
|
CAGGAGAGATTGAATAGAGA |
||||||
|
GCGCTGTATCGTAGCTTCAC |
||||||
|
GCTCGCACGCGGTCAACTTG |
||||||
|
GGTGCGAAGAAACCGAAATC |
||||||
|
ATGCGGGCTCGCACGCGGTC |
||||||
|
GGTGTACCTCGGGCTTTAAG |
||||||
|
TACCGCAGAAGTTCGGCAAA |
||||||
|
GTGCCCCATGTTCGCGTTTA |
||||||
|
GTAAGGCCATTTGTGCCTTT |
||||||
|
GCCCTTGACACGAACACTAA |
||||||
|
GAACCAGGTCAAAGGGAGCT |
||||||
|
TTACGTCCGAGCTATAGCAG |
||||||
|
ATGAGTGACATCAAGGTCCC |
||||||
|
ACCAACGCAAGAGGTCCATT |
||||||
|
ATCAAAGGAGAGCCCCTTGC |
||||||
|
ATTACGTCACCATTCCCAGC |
||||||
|
TCACAAATGGTCGAAACTAG |
||||||
|
ACATACACAATGCGGCTGTC |
||||||
|
ACCATTCCCAGCAAATGCCT |
||||||
|
AGCTGCACTATCGGTTTGGT |
||||||
|
CAGCAAAAATTTTGGACCGC |
||||||
|
GTGGTTGTGATCAGCGGCCG |
||||||
|
TGGAACCCATTCTTCCCCTG |
||||||
|
AGAGTCACATACACAATGCG |
||||||
|
TAGCTGCACTATCGGTTTGG |
||||||
|
CTAAACAAAGGTGGTTGTGA |
||||||
|
AGTTAGAGCGGGAACTGCTG |
||||||
|
GTTGTGATCAGCGGCCGGGA |
||||||
|
CGTCGGAACGGGAATCCTGG |
||||||
|
CACCATTCCCAGCAAATGCC |
||||||
|
CCTTTTTGGTGTGGAACCCA |
||||||
|
AAATTTTGGACCGCTGTCCT |
||||||
|
CATGGCTATAGCAACAATAA |
||||||
|
TCGAAACTAGCTGCACTATC |
||||||
|
AAAGGGAGCTATAACAGCCA |
||||||
|
ATATTCGAGAGTCACATACA |
||||||
|
AAAAATTTTGGACCGCTGTC |
||||||
|
TTAACACGACGTTCAGCGAA |
||||||
|
TCGTATACCGCAGAAGTTCG |
||||||
|
CGAACACTAAACAAAGGTGG |
||||||
|
AGCAGCAAAAATTTTGGACC |
||||||
|
AGTTCGGCAAAGTTAGAGCG |
||||||
|
TTCCCAGCAAATGCCTTTCT |
||||||
|
ATTCGAGAGTCACATACACA |
||||||
|
GTTGTCGTCATGGTTACAAA |
||||||
|
GGGGGTAAGGCCATTTGTGC |
||||||
|
TTCGGCAAAGTTAGAGCGGG |
||||||
|
AACAAAGGTGGTTGTGATCA |
||||||
|
TGCTGCAGGGCTCCCGCGCC |
||||||
|
TGGTAAGTTACGCATTAACA |
||||||
|
CTTGACACGAACACTAAACA |
||||||
|
CCATCAAAGGAGAGCCCCTT |
||||||
|
AGCGAAGAACGAATACGACA |
||||||
|
TTGGCCGAACCAGGTCAAAG |
||||||
|
TTTATCTCGTGCATCCAGGA |
||||||
|
CTCGGGCTTTAAGGTTAGGG |
||||||
|
CGTAGCTTCACAAATGGTCG |
||||||
|
TTACGTCACCATTCCCAGCA |
||||||
|
GGAAACGACTAGATAGATCC |
||||||
|
CGAGGCCCTTGACACGAACA |
||||||
|
ACCTCGGGCTTTAAGGTTAG |
||||||
|
TCCCACACAGCGTGCCATTC |
||||||
|
TTTTTGCCCATTATAATCCA |
||||||
|
AGCAAATGCCTTTCTTTAGG |
||||||
|
CGATCGTATACCGCAGAAGT |
@ -0,0 +1,49 @@ |
|||||||
|
import jinja2 |
||||||
|
import os |
||||||
|
|
||||||
|
def main(): |
||||||
|
|
||||||
|
# Jinja env |
||||||
|
env = jinja2.Environment(loader=jinja2.FileSystemLoader('.')) |
||||||
|
|
||||||
|
problems = [ |
||||||
|
{ |
||||||
|
'chapter': '3', |
||||||
|
'problem': 'a', |
||||||
|
'title': 'Generate k-mer Composition of a String', |
||||||
|
'description': 'Given an input string, generate a list of all kmers that are in the input string.', |
||||||
|
'url': 'http://rosalind.info/problems/ba3a/' |
||||||
|
}, |
||||||
|
{ |
||||||
|
'chapter': '3', |
||||||
|
'problem': 'b', |
||||||
|
'title': 'Reconstruct string from genome path', |
||||||
|
'description': 'Reconstruct a string from its genome path, i.e., sequential fragments of overlapping DNA.', |
||||||
|
'url': 'http://rosalind.info/problems/ba3b/' |
||||||
|
}, |
||||||
|
{ |
||||||
|
'chapter': '3', |
||||||
|
'problem': 'c', |
||||||
|
'title': 'Construct the overlap graph of a set of k-mers', |
||||||
|
'description': 'Given a set of overlapping k-mers, construct the overlap graph and print a sorted adjacency matrix', |
||||||
|
'url': 'http://rosalind.info/problems/ba3c/' |
||||||
|
}, |
||||||
|
] |
||||||
|
|
||||||
|
print("Writing problem boilerplate code") |
||||||
|
|
||||||
|
t = 'template.go.j2' |
||||||
|
for problem in problems: |
||||||
|
contents = env.get_template(t).render(**problem) |
||||||
|
fname = 'ba'+problem['chapter']+problem['problem']+'.go' |
||||||
|
if not os.path.exists(fname): |
||||||
|
print("Writing to file %s..."%(fname)) |
||||||
|
with open(fname,'w') as f: |
||||||
|
f.write(contents) |
||||||
|
else: |
||||||
|
print("File %s already exists, skipping..."%(fname)) |
||||||
|
|
||||||
|
print("Done") |
||||||
|
|
||||||
|
if __name__=="__main__": |
||||||
|
main() |
@ -0,0 +1,49 @@ |
|||||||
|
package rosalindchapter{{chapter}} |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Print problem description for Rosalind.info |
||||||
|
// Problem BA{{chapter}}{{problem}}: {{title}} |
||||||
|
func BA{{chapter}}{{problem}}Description() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem BA{{chapter}}{{problem}}:", |
||||||
|
"{{title}}", |
||||||
|
"", |
||||||
|
"{{description}}", |
||||||
|
"", |
||||||
|
"URL: {{url}}", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Run the problem |
||||||
|
func BA{{chapter}}{{problem}}(filename string) { |
||||||
|
|
||||||
|
BA{{chapter}}{{problem}}Description() |
||||||
|
|
||||||
|
// Read the contents of the input file |
||||||
|
// into a single string |
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("rosa.ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
//// Input file contents |
||||||
|
//input := lines[0] |
||||||
|
//params := lines[1] |
||||||
|
//result := rosa.PatternCount(input, pattern) |
||||||
|
// |
||||||
|
//fmt.Println("") |
||||||
|
//fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
//fmt.Println(result) |
||||||
|
} |
||||||
|
|
@ -0,0 +1,4 @@ |
|||||||
|
# rosalind go package |
||||||
|
|
||||||
|
This directory contains the `rosalind` Go package. |
||||||
|
|
@ -0,0 +1,5 @@ |
|||||||
|
Input |
||||||
|
GCGGTTATGCACCGTTCAAATTAGCAAACCACTAAGCGACGTAGTCTGGATTGATTTCTCCCTACCAGTGACCCAAGACGCGTTAGTGAGTTAAGTTCATATCCAGTACCTGCCGCCCTCTGTACTTGGGCGTCCGATTCGCATGCTTACTCAGGTGGAGGACACGATAATCTGATTAAACTGAGCTAAACCAGGTGGAACCAGAAACCAGGTGGGGAGTCTCGCTTCAAGCCGTTCTTGCGATCAAACCAGGTGGTCCATTATGAAACCAGGTGGCTAAACCAGGTGGTCCAGATCCTCGAATGATGTCGGTGCACATCAAAACCAGGTGGGGTGGTGGAACGTAAAACCAGGTGGCATAAACCAGGTGGGCCGGTTCGTAAACCAGGTGAAACCAGGTGGGGTGGAAACCAGGTGGGTTACAAATTACGTTGAGATGGCCCAAACCAGGTGGTGGGCTTCACCCATGTCAACAAACCACCCTATGGAACTAAACCAGGTGGAACCAGGTGGTGAAGGCTTATCCTCAGGAAAAACCAGGTGGAGGTGGTGAAATAAAACCAGGTGGACCAGGTGGATAACCCTCGCCTCGCTTCTCAACCGAGACCTGGATAAACCAGGTGGGGTGGTCCACCGATTTTTGAGACACTAGAAACCAGGTGGGCGGGGAAACCAGGTGGCAAACCAGGTGGGGTGGACGGAAACCAGGTGGATATGTCATAAAACCAAACCAGGTGGTGCACCCCCATGGTGTGTCTTATCCGTGCGTATAAACCAGGTGGTCGCACGGCTTCCACTTGCTGAGAATAGGCCCGCAGGGTCAGTGCCATGCCCTCCGTCACTCGATATGTGTTGTAAGAGTGGTTACCCCTTCATTGAAGTCGCCCACAGCCCCACCTGCATTGCTAGACTATCACCCTACAGTAGGCCTTTTCGCCTTCTTCAAGCAGCAATCTCTTATCCGCGGATGGGCGCGGCGAGCGTGGCGTCCCCGAACATTTTTACCTAACGTGTTTTGTTGGCCGCAAGCCTTCCCTCTAGTCCACCTCAGCCATTCAGCCTAGTAGCTTTCAAGCCGAGCCTTCCATATCTAATGGACCGTCCAGAATTTCACACGTTTCACAGGGCTGTGTTCGACCGCCCGTAATGCTGTTTCACAGGCGATCGCCTTGCGGTTTTTTCACAGATCGCAGCCGATGGACATGCCAACTCGATTTTCACAGAGTTTTTCACAGCGGTTTCACAGCACAGCAGTGATTGTTTCACAGCAATTTTCACTTTCACAGGGGCCCTTTTCACAGCTCAGGGCTCTTTTCACTTTCACAGTTTCACAGCGCTCCTTTCACAGAGCGGGGAAATTTAAGGGAACACTCAAGGGAACAAGGGAACACACAAAGGGAACACAACACAACACATAAGGGAACACTTTCACAGAACACAAAAGTCCGAAATCATCAGCGGCGAAGGGATTTCACAGACAGACACTTTCACAGCGCATTTCACAGATACGTACTTTCACAGGCGTACTTTCACAGACTTTCACAGAGGACAAGCTCAATTTTCACAGACAGGCTGGATAAATTTCACAGCGGTAAGGGTTTCACAGCACACATAAGGGAACACGAATTTCACAGCAGGGAACACCTCTACGAGTAATCTATTACTCTACCTACTGAAGGGAACACACCGAAGACCTACTATTACCTATTACTCTTAAAGGGAACACATTACAAGGGAACACACTCTCTCGTCATATCTCACCTCTCTATTACTCTTAAGGGAACACCTTCTCGATCAACCTATTACTCTATGGAGATAGAGATATTCCAGACATATGGAGATAACATGGAGATATGGAGATAATGGAGATGGAGATAGCTCTTATATTTATCCTATGGAGATATGATACTATTAATGGAGATAATTCTAATGGAGATATAATTACTCTAAGAGGATGGGATCTCGGGCTATTACTCTAATGGAGATAAGCACTATTACTCTAGGAAATGGAGATATGTCAATGGAGATATGTAATGGAGATAGAGGGAGATGGAGTCGCCATTTCATAATCGCCATTTCATAGTTCAGGAATCGCCATTTCCGCCATTTCTAAGATGGAGTCGCCATTTCTACGTATGGAGATAGGATCGCCATTTCATACGACCCGTTGGATATCGCCATTTCCTCGCCATTTCTGGTGACATTTCTCGCCATTTCATTTCTGGAGATAGATGGATCTCGCCATTTCATAGGAATCGCCATTTCCACGTAGGGGGGGCCACAATCCGTAGGTCGGAATTCAGACTCGCCATTTCCCATCGCCATTTCTTCACCTGTATGCCGATCCCTTCGCCATTTCTCATGGAGATAACTCTCTCTCGCCATTTCTCGCCATTTCCATTTCACTCTCATTCGCCATCGCCATTTCCATTCGCCATTTCATCGCCATTTCTTCAGGATAAGATATCGCCATTTCGACTCTCATTCGCATACTGACTCTCATTCTCATCTCGCCATTTCTCATCTGACTCTCATCCTGGGGGAAACTTGCGACTCTCATCACACTTCCGTCGACTCTCATACTGGCGGATAGCATAGGAGCCATTTAAAGACTCTCATTCTCATTCGAGACTCTCATTCAAATCCTACGAGGACTCTCATATAGACTCTCATATCATTACGAGGACTCTCATATACGAGCCATGCATGTGGCGACGACTCTCATCTACGAGCCATGCAAGCAGAATCTACGAGCGACTCTCATTACGAGCCATGTGACCGTACGAGCCATGCATGCATGCCATGCTGACTCTCATCGAGTACGAGCCATGGAAGTTCTTGTTGGTTCGTAGCCCAAGAGCTGAAGTTACGAGCCTACGAGCCATGAAGTTACTTTTACGAGCCATGAAGCTTACGATACGAGCCATGCGAGCCATGCATCCGCGCTACGAGCCATGTTCCAGTACGAGCCATGTTAGTTGCTGAAGTTAAGTTTGGCGCTGAAGTTTGTACGAGCCATGTGCCCGCTGAAGTTTGTTGTACGAGCCATGCATGCTGAAGTTAATGGCTGAAGTTAGCGTTTGCGGGCAGATCCTCATTCTACGATACGAGCCATGCCATGCAGCTGAAGTTAAGTTGGGTTACGAGCCATGCGAGCCATGTGAAGTACGAGCCATGCTGGCTGAAGTTGTTTGTGCTGCTGAAGTTGCTCTTGTCTCTAGCTGAAGTTGCCAACAGGGCTGAAGCTGAAGTTTAAGCTGAAGTTGCGAGCAGGCTGAAGTTATCGGATTGGGGCTGAAGTTCAACCTCCCGTCCCCCCACACTATATTCCCGTCCCCCCCCGCGCACGCGCCGTCTCCCGTCCCCCCTATCCCGTGCGCACGCGACGCGATCCCGTCCCCCCAGAGTGCGCGCACGCGTCCCCCTTCCCGTCCCCCTCTCCCGGGCGCACGCGTCGCTCAACATTTCCGCGCACGCGTCGCGCACGCGGGCGCACGCGGGTCCCGTCCCCCCCCCTCTTCGGCGCACGCGGAATTCCCGTCGCGCACGCGTCCCGTCCCGCGCACGCGTCGCGCACGCGACTGCCCTAACCAACAGTGCGCACGCGCCGGTAACCCGGTAACCCGGTAACCGCGCACGCGGGCGCACGCGCGTAACCCGCGCACGCGCCGCGCACGCGGCCCGGTTCCCGTCCCCCCCGGTAACCCGGTAACTCCCGTCCCCCGTAACCCGGTGCGCACGCGCCCGGCGCACGCGGAGCGCACGCGCCCCCCCCGGTAATAGCGCACGCGCCCGGGCGCACGCGCCCGGTAACCCGGTAACCCGGGCGCGCGCACGCGGCGGCGCACGCGGCGCACGCGGCGCACGCG |
||||||
|
11 566 18 |
||||||
|
Output |
||||||
|
AAACCAGGTGG |
@ -0,0 +1,5 @@ |
|||||||
|
Input |
||||||
|
CGGAAGCGAGATTCGCGTGGCGTGATTCCGGCGGGCGTGGAGAAGCGAGATTCATTCAAGCCGGGAGGCGTGGCGTGGCGTGGCGTGCGGATTCAAGCCGGCGGGCGTGATTCGAGCGGCGGATTCGAGATTCCGGGCGTGCGGGCGTGAAGCGCGTGGAGGAGGCGTGGCGTGCGGGAGGAGAAGCGAGAAGCCGGATTCAAGCAAGCATTCCGGCGGGAGATTCGCGTGGAGGCGTGGAGGCGTGGAGGCGTGCGGCGGGAGATTCAAGCCGGATTCGCGTGGAGAAGCGAGAAGCGCGTGCGGAAGCGAGGAGGAGAAGCATTCGCGTGATTCCGGGAGATTCAAGCATTCGCGTGCGGCGGGAGATTCAAGCGAGGAGGCGTGAAGCAAGCAAGCAAGCGCGTGGCGTGCGGCGGGAGAAGCAAGCGCGTGATTCGAGCGGGCGTGCGGAAGCGAGCGG |
||||||
|
12 |
||||||
|
Output |
||||||
|
CGGCGGGAGATT CGGGAGATTCAA CGTGCGGCGGGA CGTGGAGGCGTG CGTGGCGTGCGG GCGTGCGGCGGG GCGTGGAGGCGT GCGTGGCGTGCG GGAGAAGCGAGA GGAGATTCAAGC GGCGGGAGATTC GGGAGATTCAAG GTGCGGCGGGAG TGCGGCGGGAGA |
@ -0,0 +1,5 @@ |
|||||||
|
Input: |
||||||
|
CACAGTAGGCGCCGGCACACACAGCCCCGGGCCCCGGGCCGCCCCGGGCCGGCGGCCGCCGGCGCCGGCACACCGGCACAGCCGTACCGGCACAGTAGTACCGGCCGGCCGGCACACCGGCACACCGGGTACACACCGGGGCGCACACACAGGCGGGCGCCGGGCCCCGGGCCGTACCGGGCCGCCGGCGGCCCACAGGCGCCGGCACAGTACCGGCACACACAGTAGCCCACACACAGGCGGGCGGTAGCCGGCGCACACACACACAGTAGGCGCACAGCCGCCCACACACACCGGCCGGCCGGCACAGGCGGGCGGGCGCACACACACCGGCACAGTAGTAGGCGGCCGGCGCACAGCC |
||||||
|
10 2 |
||||||
|
Output: |
||||||
|
GCACACAGAC GCGCACACAC |
@ -0,0 +1,5 @@ |
|||||||
|
Input |
||||||
|
CTTGCCGGCGCCGATTATACGATCGCGGCCGCTTGCCTTCTTTATAATGCATCGGCGCCGCGATCTTGCTATATACGTACGCTTCGCTTGCATCTTGCGCGCATTACGTACTTATCGATTACTTATCTTCGATGCCGGCCGGCATATGCCGCTTTAGCATCGATCGATCGTACTTTACGCGTATAGCCGCTTCGCTTGCCGTACGCGATGCTAGCATATGCTAGCGCTAATTACTTAT |
||||||
|
9 3 |
||||||
|
Output |
||||||
|
AGCGCCGCT AGCGGCGCT |
File diff suppressed because one or more lines are too long
@ -0,0 +1,5 @@ |
|||||||
|
Input |
||||||
|
CCGAAGCAATTGAAACCCCCCCGGCCTGGGAGGCGCAAAAATCTGACCTCTTTGTGAGTTGACCACTTAATTTATGTCTGACCACGAGAAGGGCTACTGATTTGGTACGTCGGGTCATGACCCCCAGTTCTTAGCCGCCTGCTCCAATCTCTGACTTGTTTATCGAGGGGATGGAGTAACGAAATGCGATTCGCCCGCTCAGGCCAAGGTATATATTTGAGTAGCGGAAGGTTGCACTACCTACAACCACGGCACACCGGCACGTTGTCGTGCCCTGGCGGCCTGCGCACTTTCGCCACTGTCAAGTACGACTTCCCAAGCTCAACCAACATTCATAATCCGGTGCAATTCATACCGTATCATCGTGCTATAAGCGACGCCGATTCTCGGGGCCTGATAATTGAGACTGGACTACATAGTGGGTGCCCTCTCTGCGAGTAAGTGACGGAACAACGGAGATCAGGGACCAAATGGTAGCAAAACAGATCGAGGTACACGCAGGTAGCTGTCCGTGGAGTAGACCGCGCTTAGCGTCTGTTAGAGTATCATCGGGGTATTAGACACAGGAACCTCTATGCTGTTAAAAGGCCATACCCCGTAATTGTGCAAATTTGTTACGTTCAAATCTACGCAGTGAGGGTCCTAAGGTGATGGCAGGGATTGGAACTTCTCCGCTGGCTCTTAGATTACTTAGCCAGTCTACCCTCGAAGATACAAATCCTTCCACCAGAGGGAGCTCATTGAAATTCATTCCATGCTACTCGACCGCGCGTATGGGTGCGGGGCTCTATGGGATCTAACTCGATCCTTCAGAGTCCTTATTCAAATGCATTTCCGTCCCCGTATGTTTCGACGAAGCCGAAGCCCAAACCCTGGGATGGACGAATTAAGGACAGTACAGGCAATAGTGTTCTCCCATACTCGGAACAGACGCCTCATTTTTTCGCGAAATCGATCTGGGTTGGAAGAAGTTCCAGTGCAGAGTTCCTATCACACAATTCGTTCTCGGGGCTTCCGGCCCATAAGCGATACTACTGTCTTTGCGAGCTAACGATTACATTCGGGGGAACTTAGCTCGGACTGGACCAGGTACATGATCCAAAGCGCGATGTCTGTCTGTTACCCTCACCGCCGCTCTTTTATCGGGTA |
||||||
|
GCGTAGTAGGTTCGCGTACCTAGTTCCGCCGAAAAGACAAAGGAGAAGGGAATGCTCCTAGTAGTTTCAGTCTAGCAAACATGTTATAACGCTAACTGTGTGCTGCAAAAAGGATTTGAACCCAAATTTTAAAGCGCTGATCGACAGAACGCTGTTGAAGAGGCGATGGTACTGAGATTCCCCAGAAACCACCTCCGCGCTATGTGCTCAAGACAACCCGCATTCGTTTTTACTAGATTTGGAGCCGAGTTGTGATTTGGATATTTTCACATAAGACCGAGCAGGAAATATACCTTGTTGCAGCTATTGACCCCGTTCTCTCGGAAATCCATGGAATAGTCTTCGGATATTCGTACCAATGGGCGCGATGTTGCGATAAGAGAGCACATTTCATTAAGTGGTGCTCCGCCGCTAAGATGGGAAGGGGCGAGTCTATCGCAGCATCGAAGGCTGAGTTGGCCATTGCCGAGAGTATACATATTTACGATCACACTCGCATAGTCCCACGCATTACGTCCGAGATAGTATGTCCCAATGCAACCTAAAGCCGCGAGATTCCCTAAGGAGAAAATTAAACACTGGAAATTAGGTGATGCTACATCCCATGGACACTTTCGGAACAATATCGGTGACACACATCATCCGTGATCCCGTGATATTTCATCCATGGAGAGAGTATGGTTTTACTACACCTGGTCTAGGCCAAGCCTAACCCCCTGTTCATCCGTTTTATACGAGTATTACCTTGACGACCATAGAGGATAGACTCGGTATCCCGCACACTCTACACACACGACTTAATCCGCTCCACGACCTTCCTAGCGATCTTTGGCGCAGCCGGTTCGCGTATTTTACGACCAACTCGATGGATCCCAATTATCCCCCTGGTAGTGCCCCTCCGCCTGAGAATTCGACGGGCGAGGTCCGGGGGACCGACATAGAGTGGAATGCTTCTTTCCGGGATAACACGTGATTGACATAAAAATGTAGGGCAGATAGGCATCGTTAGCACCTCTCTCCTTGCTGCACTGCGTTTATCGATCGAATTCAAGACTTGTGCATGTTGAAAACAACCTCGCGTTATCCCTGCTATTTGCTTCAGAGCCGTAGGAGGGGACCATGCGTGAGTCCTCCTGAGCAACCTCAATT |
||||||
|
Output |
||||||
|
844 |
File diff suppressed because one or more lines are too long
@ -0,0 +1,10 @@ |
|||||||
|
Input |
||||||
|
5 2 |
||||||
|
TCTGAGCTTGCGTTATTTTTAGACC |
||||||
|
GTTTGACGGGAACCCGACGCCTATA |
||||||
|
TTTTAGATTTCCTCAGTCCACTATA |
||||||
|
CTTACAATTTCGTTATTTATCTAAT |
||||||
|
CAGTAGGAATAGCCACTTTGTTGTA |
||||||
|
AAATCCATTAAGGAAAGACGACCGT |
||||||
|
Output |
||||||
|
AAACT AAATC AACAC AACAT AACCT AACTA AACTC AACTG AACTT AAGAA AAGCT AAGGT AAGTC AATAC AATAT AATCC AATCT AATGC AATTC AATTG ACAAC ACACA ACACC ACACG ACACT ACAGA ACAGC ACATC ACATG ACCAT ACCCT ACCGT ACCTA ACCTC ACCTG ACCTT ACGAC ACGAG ACGAT ACGCT ACGGT ACGTC ACGTT ACTAA ACTAG ACTAT ACTCA ACTCC ACTCG ACTCT ACTGA ACTGC ACTGT ACTTA ACTTC ACTTT AGAAA AGAAC AGAAG AGAAT AGACA AGACT AGATA AGATC AGCAT AGCCA AGCGT AGCTA AGCTC AGCTG AGCTT AGGAT AGGTA AGGTC AGTAA AGTAC AGTAT AGTCC AGTCG AGTCT AGTGA AGTTG ATAAA ATAAC ATACA ATACC ATAGA ATATA ATATC ATATG ATATT ATCAG ATCCC ATCCG ATCCT ATCGA ATCGC ATCTA ATCTC ATCTG ATGAC ATGAT ATGCA ATGCC ATGGA ATGGC ATGTA ATGTC ATTAA ATTAC ATTAG ATTAT ATTCA ATTCC ATTCG ATTGA ATTGC ATTGG ATTGT ATTTA ATTTC ATTTG ATTTT CAAAG CAACC CAACT CAAGA CAAGC CAATA CAATT CACAC CACAG CACCT CACGT CACTA CACTT CAGAA CAGAC CAGAT CAGGT CAGTA CAGTC CATAA CATAC CATAG CATAT CATCC CATCT CATGA CATGT CATTA CATTG CATTT CCAAG CCATA CCATG CCATT CCCGT CCCTA CCCTT CCGAA CCGAC CCGAT CCGCT CCGGT CCGTA CCGTC CCGTG CCGTT CCTAC CCTAT CCTCA CCTCC CCTTA CCTTC CCTTG CCTTT CGAAA CGAAG CGACA CGACT CGAGT CGATA CGATG CGATT CGCAA CGCAT CGCCA CGCGA CGCTA CGCTC CGCTT CGGAC CGGAT CGGCA CGGTA CGGTC CGGTT CGTAA CGTAC CGTCA CGTCG CGTCT CGTTA CGTTT CTAAC CTAAG CTAAT CTACA CTACC CTACG CTACT CTAGA CTAGC CTAGG CTAGT CTATA CTATC CTATG CTATT CTCAT CTCCG CTCGT CTCTA CTCTT CTGAA CTGAG CTGCA CTGCC CTGTA CTGTT CTTAA CTTAC CTTAG CTTAT CTTCA CTTGA CTTTA CTTTC CTTTG CTTTT GAAAT GAACA GAACT GAAGT GAATG GAATT GACAC GACAT GACCA GACCT GACGT GACTT GAGAA GAGAT GAGCT GATAA GATAC GATAG GATAT GATCA GATCC GATCG GATCT GATGT GATTA GATTC GATTG GATTT GCAAT GCACT GCATC GCATT GCCAT GCCGT GCCTA GCCTT GCGAT GCGGT GCGTC GCGTT GCTAA GCTAC GCTAG GCTAT GCTGA GCTGT GCTTA GCTTT GGAAT GGACA GGATA GGATC GGATT GGCTA GGGAT GGTAC GGTAG GGTAT GGTCA GGTCG GGTTA GTAAA GTAAG GTACA GTACC GTACG GTAGA GTATA GTATC GTATG GTATT GTCAA GTCAG GTCCG GTCCT GTCGA GTCGC GTCGT GTCTA GTCTG GTGAA GTGAG GTGCA GTGCG GTTAA GTTAC GTTAG GTTAT GTTCA GTTCC GTTCG GTTGA GTTTA TAAAC TAAAG TAACA TAACC TAACT TAAGA TAAGC TAATA TAATC TACAC TACAG TACCC TACCG TACCT TACGA TACGC TACGT TACTA TACTC TACTG TAGAA TAGAC TAGAG TAGAT TAGCC TAGCG TAGGA TAGTC TATAA TATAC TATAT TATCA TATCC TATCG TATGA TATGC TATGG TATGT TATTA TATTG TCAAC TCAAT TCACC TCACG TCACT TCAGA TCATA TCATG TCCAA TCCAC TCCAG TCCAT TCCCA TCCCT TCCGA TCCGC TCCGT TCCTA TCCTG TCCTT TCGAA TCGAC TCGAT TCGCC TCGCT TCGGA TCGGC TCGGG TCGGT TCGTC TCTAC TCTAG TCTAT TCTCC TCTCT TCTGG TCTGT TCTTA TCTTT TGAAA TGAAC TGAAT TGACA TGACC TGACT TGAGA TGAGC TGAGT TGATA TGATC TGATG TGATT TGCAA TGCAC TGCAG TGCAT TGCCA TGCCG TGCCT TGCGA TGCGT TGCTT TGGAA TGGAT TGGTA TGTAA TGTAG TGTAT TGTCC TGTCG TGTGG TGTTA TTAAA TTAAC TTAAG TTAAT TTACA TTACC TTACG TTACT TTAGA TTAGC TTAGG TTAGT TTATA TTATC TTATG TTATT TTCAA TTCAC TTCAT TTCCA TTCCC TTCCT TTCGA TTCGG TTCGT TTCTA TTCTG TTGAA TTGAC TTGAG TTGAT TTGCA TTGCG TTGGA TTGGG TTGTG TTTAA TTTAC TTTAG TTTAT TTTCA TTTCC TTTCG TTTGA TTTGG TTTTA TTTTG |
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@ -0,0 +1,5 @@ |
|||||||
|
Input |
||||||
|
ACACCA |
||||||
|
CCGAACACCCGTACACCGAACACCACACCACACCTTGCACACCACACCTACACCACACACCACACCGGACACCCACACCCACACCACGAACACCGAGAGTACACCTACACCTGACACCGGGGATCGTCACACCAAGTGGTGATACACCCACACCCTTTACACCTACACCACACCCGTACACCCTGAACACCACACCTAGAGAGTTGCACACCTCACACCGAAGGCACACCACACCATCCACACCATAAACACCGTTAACACCGTAGAACACCCAGCACACCCTTACCGCATACACCGACGTTAGACACCCACACCGGCAGTCACACCGTACACCCATTCGGTCCACACCCTACACCGCCTGCCACACCTACTGAGTTACACCGCATGACACCATTATCCGAACACACCAATATACACCAACACCATACACCATTTAACACCCCAAAACACCGACACCGACACCGCAAGCCCACACCACACCCACACCACAGACACCTACACCGTTTAGACACCAACACCGACACCACACCCCACACCCAAGACACCGCTACACCCTGCTGGACACCGACACCTACACCTCACACCGGACACCGCACACACCGCCACACCAATCACACCACACCACACCAGTACAACACCGACACCTACACCACACCACACCCAGATACACCCACACCGGACACCACACCAAACACCATTACACCCACACCGGTACACCACACCTCGTACACCAAGTAGACACCCAACACACCACACCTTGATGACACCTGACACCATACACCAAACACCACACCGAGGTAGACACCACACCGCCATCGACCACACCCTGACACCATACACCACACCACACCTAGTCGACACCCACACCCTCACACCTGACACCCGCGGCATACACCCACACCACTTACACCTACACCGGGGGAAACACCGAAACACCTCAACACCGGACACCACACCTAAGACACCGGGCGATACACCTGACCCTGACACCACACCACACCCAACACCCGAACACCACACCCAAACCTTGACACCCACACCAAAACACCCTTTATTAAAACACCCCGACCACCAAACACCACACCCCACACCGAACACCCACACCGCATACACCGGTCACACCTTATCTCGCCCACACCCTACACCCCACACCACACCACACCACACCGTACCACACCACACCCCCACACCAAAACACCACACCACACCGGTTACACCCCACACCAACACCCACACCATTACACCTACACCGCAACACCTGCACACCACACCAAGACTGGAGACACCTACCACACCCTCGTTTACACCACCTGACACCTTACACCTCCGACACCAAAAACCCGTTGGGTCATCGGATCAGGACACCTTTACACCACACCTTCGAGGACACCACGGACACCACACCCCACACCACACCGGTACACCGCGTTCACACCTCACACCGACACCACACCCCCTGAACTGTATACACCACACCACACCAACCCAACACCCTAGAAGACACCTGCCACACCTTACACCACACCACCGACACCAACACCCAAACACCTTTGACACACCACACCAACACCGTACACCGCAACACCCGCATTACACCTTACACCACACCACACCCCCCTACACCCACACCACACCCTCGGACACCAGTACACCACACCACAGATAGACACCATACACCTTACACCACATACACCTTTCACACCACACCCACACCCCGCTTAGACACCGACACCACACCACACCTGACACCACACCTCGCACACCGCCCTTACACCACACCCCAGCAGAAAACGAACACCCACACCACACCACACACCACACCACACCACACCGACACCTGACACCTAAACACCCCCACACCACACCTCTCCAACACCACACCAACACCTACACCAGAAAGACACCGACACCCGACACCCGCTGTTGTACACCCACACCATCGACACCACACCACACCACACCCTACACCGGCACACCATGCAAACACCACACACCTGGACACCCACACCACACCGCACACCACACCACACCTACACCACCGACACCACACCACACACCTACTCCACAACACCTACACCAAACACCCTACACCTACACCTACACCTACATACACCTACACCTAATATTATGGACACCACACCTTCAGACACCGTACACCACACACCCTATGTTACACCACAGGCAGAATTTGACACCTCACACCCACACCCACACCCGCACACCACACCAACACCACACCACACCCCCAACACCGCTCTTACACCTTACACCGACACCAACACCGACACCGACACCACACCCCAATATCCCTCACACCACACCTAACCAGTATACACCGTTGACAACACCCCAATTTACACCCCATACACCTCAGACCACACACCGGACGGGCAACACCTACACCGATGTTACTTTACACCGGGCTCGCGGACACCACTCGACACCAACACCCGACACCTTACACCACACCAGCTGCGTGAACACCTACACCATCCCAACACCACACCGACACCGTATGGACACCTACACCTCGAGAGTTCCGCTAGAACACCACACCCATACACCATACACCGCGTACACCGAACACCGACACCCACACCACACCCAATGACACCGATGACACCGGCTCGATACACCTACACCGAACACCATCAGACACCGCGTACACCCAACACCTGACACCAACACCGCGGCACACCTAGTGACACCTACACCTACACCACACCATACACCCTACACCGATGAACACCAACACCACTCTAAACACCCAGGACACCAACACACCTAGACACCACACCAACGACAGAGACACCCTACACCTGCCAAGCTTTACACCATTGGTGAATCACACACCACACCAACACCACACCACACCGCTTACACCCGACCCGAAAACACCCACACCACACCAACACCACACCACATTACTCCCGTTACACCTACACCAACACCACACCTTTACACCACACCCAGCAACACCACACCAAATGGACACCACACCACACCACACCTTAGCCGATGTGCCGACACCGCTGTCGTCACACCAGTGACACCTTAGCGTACACACCACACCCAACACCTACACCACACCCGAAACACCTGACACCACACCACACCACACCCTACACCACACCATGACCACACACCAGCCGACACCACACCATACACCTACACCGAAACACCTTTCTACACCACACCACACCTGAACACCTAGTCACACCACGACACCAACACCTGACCACACCGGGGGACACCTTTGGAACGACACCTAACACCGCCACACCACACCACACCCGACACCTATAACACCACACCACACCACACCAAAGGCACACCTTAACACCCACACCAAGGGCTACACCACACCACACCTCCAAAACAAGGGACACCACACCCAACACCACACCACACCGCGTGGACACCACACCTTGACACCAAATTGTGCACACCACACCTGCACACCTTAAGAACGACACCGTCAGTACACCGAAACCCTATGACACCTGGGACACCTGGCACACCAACTACACCACACCCACACCACACACCTGGACACCGTTTCGCGAGTGTGGGTTGCTTGACACCACACCACACCGCGGCCTTACACCGCACACCGTAAACACCGTTGACACCTCATTACTCGACACCACACCGCACACCCACACCCGACACCGAACACCACACCTGGGCATACACACCACACCGTACACCTACACCACACCTGTGCTACACCAGGGGTACACCACACCTAGTACACCACACCGATACACCCACACCACACCACACCCACCAACACCACACCATCAAGAACACCCTATACACCCACACCACACCTACACCACACCCTACACCACACCACACCACACCATCGACACCTACACCACACCAACACCACACCAAACACCACACCCACACCCGGACACCACACCCACACCACACCATAACACCTAACACCACACACCTACACCTACTCTGCTAAACACCCAACACCTCTACACCCTGCCGACACCGCGACACCGGCGACACCCTGTTACACCACACCTCACACCTTCGACACCAGCCAGAGACACCGGACACCGACACCCCGAACACCAACACACCCGA |
||||||
|
Output |
||||||
|
19 24 38 49 56 80 128 164 186 225 230 239 387 403 413 419 426 471 482 508 520 604 613 618 623 646 651 679 684 691 713 727 747 770 777 784 801 829 836 841 897 947 986 991 1011 1036 1075 1148 1153 1158 1173 1186 1194 1199 1220 1232 1262 1267 1303 1329 1369 1386 1395 1407 1444 1467 1472 1477 1516 1521 1530 1555 1560 1599 1604 1625 1640 1648 1653 1666 1680 1698 1728 1733 1745 1770 1800 1805 1812 1817 1822 1856 1872 1877 1889 1933 1942 1947 1952 1972 1983 2004 2016 2021 2032 2041 2046 2073 2131 2153 2172 2218 2223 2229 2234 2272 2290 2312 2430 2440 2460 2465 2486 2497 2547 2560 2595 2645 2678 2716 2721 2745 2751 2772 2788 2793 2831 2849 2854 2860 2865 2900 2905 2911 2916 2941 2947 2960 2975 2980 2991 2996 3001 3040 3063 3081 3102 3107 3112 3124 3129 3142 3152 3157 3188 3193 3216 3224 3279 3284 3305 3310 3315 3320 3345 3357 3362 3385 3397 3402 3418 3431 3445 3517 3526 3537 3580 3585 3643 3675 3694 3712 3728 3739 3753 3772 3777 3792 3797 3824 3835 3847 3852 3857 3862 3877 3882 3888 3893 3900 3919 3930 3935 3950 4032 4053 4088 |
@ -0,0 +1,4 @@ |
|||||||
|
Input |
||||||
|
CTTCTCACGTACAACAAAATC |
||||||
|
Output |
||||||
|
2161555804173 |
@ -0,0 +1,4 @@ |
|||||||
|
Input |
||||||
|
GCACTAAAGCACCAGCGAGACTAGACAGTGCCTTACGCTGTATAGGGATAAAAGTTGTCAAGATGACTTGCGGGAATCGTTAGGCTGACACGCACTAATGCTCGCCTTCCGGGTGTTCTGTGAGTACGGTTGATCACGGTCGCCCTGCGGATGTACTACCATGAAAGTTGATCACGTGCCGCGCGCTCCCTAAGCTTAGAAGTTTGCACAATCTGCATTCTATCCTGCCACGCCTTCAATAATAAGTGGTGTATGCAATTTGGAGTCGATCTGGGAACCAACGATTAACTTGGGAAGTGGCTATATCAAAATACGATGTCTTCAGCGTCGCGGTCGACGCTGCGCAACGAACGAAAAGTCCGATGGACCCGAACTCTGATTATACCGAATCTCCGCTTTTACGACTCGCCACATACCGGCATAAGCCATTCTGGGGCTTTGCCCCCTTAGGTCTAGCCCACCCCCGACCTAGCTTGAGCGTGTCACACCCCAACAGCCGCATTACGCCCGCTCACCGACACTTGGCGGTCGTATAAGAAATCCAAAACCGAGACGAAAACTGAAGAATAAGGTTCATTCAGCATTGTGGAGTTGACAACATCAGTATGAGGGTGAGTTGCGTCAAAGTCGAAGAATATGGAGGGTCAAATCACGAGATGTAACATCCACGCGAACACTTAGCTAGTAATCATTTTTCCGTAAAGAGTCGTTGAGTCCGACCAGTTGAAGCTCAGTGTTTATCCGGTAGGGAATTGTAGGATCAACGATAGGGTCGCGGAACCGCCGTATTATAGAAAGAGATAGTCCCAACGTTCTTTATGCACTTCGCTGAGAGAGGGTGACCGGGCACGCAGAGACTTTGGCTTTGTAGCCCCATTCCGCGGCTCTTCGGATACTGACTGAGCTGTAGTCGGCACATCCTTTACAACAAAAAAGCTCATGTCCGAGATTTTAATGGCGGCGCACGGTCACTCGGAGTTGACGAATGCGCAGCGAATCGTTGGTTCCAGATAAAGGCAAGGCTGTGTTACTGTTTCGGAGGGCAATCGTCAACGAGCAAAGATGTTAGAATAGAAATCGGAGCGAGGCTCCCAGCAAATATGAGTTAGGATCTTTTTTGCGAAAGGGTTGGTCTCCATCTCCTCTCGCCTGCGAGCGAGTCCCCGAAGCACGTTCAACCTATTTGATTCGGTGCAGGACACCCTAGATTAGCATACAGGTATAATATCAGGAAGAGTCACCTTTCATTCCCGACCAGTAGGATGTATAGGAATGAGACTATCCAGTTCTTTGTCAGCTCAAGACAGCGTTGGCAATACGGCCGAGTATTGGGGGGAATACCCCGGAACATAGTATTGTGCCTTAGCTATTGCCCTAGATACCACGCGGCCCTTGAGCATTTGTCTACACTTTGGTGATCCTAGGCACCCCGCGCTCGTGGCAACGTCAGCATCTTGTGATAGCAAAGCGTATGTACCTGTAATGTAACATCAAAGTATATCGGCACCCTAGTGGGGGCGAAGGTTGGATCGCTTATCACTCGGGACGACGGTGGTATCCAGCCACAGTGTTGCTCATTAACGACCACACAGCTCTTGGAATCGAGCCATGGACAGGGGACGCCCCAGGATACATGATGTTCCTGTGAGCACAAGCACTATGGCAGGCTTAGAGCTAATTCTTCCATTGGGCCGGTAAGACGCCAGAGAAAGTCACCGGTGTGAGAAAGGGTTTCGTGTGGGGGAGGCGTCAAACAACAAGGATTTACGTCGAACCGATCAGCCCTTGTCTGATTCATTCCAGGTTTAAGCGAGCCCTGGCGGTGACCTCCCGGGGATTCTTGGTGACGATAAGTGTAGACTGGTTTATGACTGTCTATAAGTGCAAGCAGTCCGCGACTCGGCCGCTCCTCAGATCTCGTCCTCCCAATCCTTACGAGGCACTATTCCGGCCCTAAAAACTTACCTACCAACCGGACATAGCGAACGGTCTAAGTTTTCGGAAATTGAATAACACTCGAACAAAGGAGCCCAATACATGGCACAAGCACACATAAAGCTTGGCGCTGCTGACGGCCGGCCCCCACAGCAGGTGGGTATATCAGGATAATGCTCTACCTCCTCGGGGATGACCAGAGACGAACGTTCGGACGCTATTAGTTAGTGGTCGCCCAGATATTCTCCTAATCAAGCCCTCGAAGGCTAGTCTAAATTTTAGCAAAAACTCGTATAGCAGCACATGCGGTAGACTGGGCCTCAGCCAGGTAGAGCTGTGGCTGCACTCGAGCAATCACTACCGTATAGAGTGGTGTTATTTCGGGGTGAATGTCAGGGGTGGTCCAAAATCACAAACACGTCTATTCGCACCCGGGAATGCTCATGTTCCCACGGCGGGCCTGTACAGATGTGAGAGGCAGCGATCATACAAAGTTGCCTGGCCTCCCCACGAACACACGGCGGCCCATTAGGTCTGAACAGGTTTATCGTTAATATATTTTGCGGTGG |
||||||
|
Output |
||||||
|
CCACCGCAAAATATATTAACGATAAACCTGTTCAGACCTAATGGGCCGCCGTGTGTTCGTGGGGAGGCCAGGCAACTTTGTATGATCGCTGCCTCTCACATCTGTACAGGCCCGCCGTGGGAACATGAGCATTCCCGGGTGCGAATAGACGTGTTTGTGATTTTGGACCACCCCTGACATTCACCCCGAAATAACACCACTCTATACGGTAGTGATTGCTCGAGTGCAGCCACAGCTCTACCTGGCTGAGGCCCAGTCTACCGCATGTGCTGCTATACGAGTTTTTGCTAAAATTTAGACTAGCCTTCGAGGGCTTGATTAGGAGAATATCTGGGCGACCACTAACTAATAGCGTCCGAACGTTCGTCTCTGGTCATCCCCGAGGAGGTAGAGCATTATCCTGATATACCCACCTGCTGTGGGGGCCGGCCGTCAGCAGCGCCAAGCTTTATGTGTGCTTGTGCCATGTATTGGGCTCCTTTGTTCGAGTGTTATTCAATTTCCGAAAACTTAGACCGTTCGCTATGTCCGGTTGGTAGGTAAGTTTTTAGGGCCGGAATAGTGCCTCGTAAGGATTGGGAGGACGAGATCTGAGGAGCGGCCGAGTCGCGGACTGCTTGCACTTATAGACAGTCATAAACCAGTCTACACTTATCGTCACCAAGAATCCCCGGGAGGTCACCGCCAGGGCTCGCTTAAACCTGGAATGAATCAGACAAGGGCTGATCGGTTCGACGTAAATCCTTGTTGTTTGACGCCTCCCCCACACGAAACCCTTTCTCACACCGGTGACTTTCTCTGGCGTCTTACCGGCCCAATGGAAGAATTAGCTCTAAGCCTGCCATAGTGCTTGTGCTCACAGGAACATCATGTATCCTGGGGCGTCCCCTGTCCATGGCTCGATTCCAAGAGCTGTGTGGTCGTTAATGAGCAACACTGTGGCTGGATACCACCGTCGTCCCGAGTGATAAGCGATCCAACCTTCGCCCCCACTAGGGTGCCGATATACTTTGATGTTACATTACAGGTACATACGCTTTGCTATCACAAGATGCTGACGTTGCCACGAGCGCGGGGTGCCTAGGATCACCAAAGTGTAGACAAATGCTCAAGGGCCGCGTGGTATCTAGGGCAATAGCTAAGGCACAATACTATGTTCCGGGGTATTCCCCCCAATACTCGGCCGTATTGCCAACGCTGTCTTGAGCTGACAAAGAACTGGATAGTCTCATTCCTATACATCCTACTGGTCGGGAATGAAAGGTGACTCTTCCTGATATTATACCTGTATGCTAATCTAGGGTGTCCTGCACCGAATCAAATAGGTTGAACGTGCTTCGGGGACTCGCTCGCAGGCGAGAGGAGATGGAGACCAACCCTTTCGCAAAAAAGATCCTAACTCATATTTGCTGGGAGCCTCGCTCCGATTTCTATTCTAACATCTTTGCTCGTTGACGATTGCCCTCCGAAACAGTAACACAGCCTTGCCTTTATCTGGAACCAACGATTCGCTGCGCATTCGTCAACTCCGAGTGACCGTGCGCCGCCATTAAAATCTCGGACATGAGCTTTTTTGTTGTAAAGGATGTGCCGACTACAGCTCAGTCAGTATCCGAAGAGCCGCGGAATGGGGCTACAAAGCCAAAGTCTCTGCGTGCCCGGTCACCCTCTCTCAGCGAAGTGCATAAAGAACGTTGGGACTATCTCTTTCTATAATACGGCGGTTCCGCGACCCTATCGTTGATCCTACAATTCCCTACCGGATAAACACTGAGCTTCAACTGGTCGGACTCAACGACTCTTTACGGAAAAATGATTACTAGCTAAGTGTTCGCGTGGATGTTACATCTCGTGATTTGACCCTCCATATTCTTCGACTTTGACGCAACTCACCCTCATACTGATGTTGTCAACTCCACAATGCTGAATGAACCTTATTCTTCAGTTTTCGTCTCGGTTTTGGATTTCTTATACGACCGCCAAGTGTCGGTGAGCGGGCGTAATGCGGCTGTTGGGGTGTGACACGCTCAAGCTAGGTCGGGGGTGGGCTAGACCTAAGGGGGCAAAGCCCCAGAATGGCTTATGCCGGTATGTGGCGAGTCGTAAAAGCGGAGATTCGGTATAATCAGAGTTCGGGTCCATCGGACTTTTCGTTCGTTGCGCAGCGTCGACCGCGACGCTGAAGACATCGTATTTTGATATAGCCACTTCCCAAGTTAATCGTTGGTTCCCAGATCGACTCCAAATTGCATACACCACTTATTATTGAAGGCGTGGCAGGATAGAATGCAGATTGTGCAAACTTCTAAGCTTAGGGAGCGCGCGGCACGTGATCAACTTTCATGGTAGTACATCCGCAGGGCGACCGTGATCAACCGTACTCACAGAACACCCGGAAGGCGAGCATTAGTGCGTGTCAGCCTAACGATTCCCGCAAGTCATCTTGACAACTTTTATCCCTATACAGCGTAAGGCACTGTCTAGTCTCGCTGGTGCTTTAGTGC |
File diff suppressed because one or more lines are too long
@ -0,0 +1,974 @@ |
|||||||
|
package rosalind |
||||||
|
|
||||||
|
import ( |
||||||
|
"errors" |
||||||
|
"fmt" |
||||||
|
"math/rand" |
||||||
|
"strings" |
||||||
|
"time" |
||||||
|
) |
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// BA2A
|
||||||
|
|
||||||
|
// Given a collection of strings Dna and an integer d,
|
||||||
|
// a k-mer is a (k,d)-motif if it appears in every
|
||||||
|
// string from Dna with at most d mismatches.
|
||||||
|
func FindMotifs(dna []string, k, d int) ([]string, error) { |
||||||
|
|
||||||
|
for _, input := range dna { |
||||||
|
if !CheckIsDNA(input) { |
||||||
|
msg := fmt.Sprintf("Error: input was not DNA: %s\n", input) |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Pseudocode:
|
||||||
|
// for each dna string:
|
||||||
|
// get hamming neighbor histogram k,d
|
||||||
|
// KmerHistogramMismatches(input,k,d)
|
||||||
|
// find intersection of all hamming neighbor histogram key sets
|
||||||
|
|
||||||
|
// start using GoDS -
|
||||||
|
// efficient data structures.
|
||||||
|
// learn from them and use them.
|
||||||
|
|
||||||
|
// For each dna string:
|
||||||
|
sets := make([]map[string]int, len(dna)) |
||||||
|
for i, input := range dna { |
||||||
|
|
||||||
|
// Get hamming neighbor histogram
|
||||||
|
hist, _ := KmerHistogramMismatches(input, k, d) |
||||||
|
|
||||||
|
// Add each Hamming neighbor to a hash set
|
||||||
|
sets[i] = hist |
||||||
|
} |
||||||
|
|
||||||
|
// Now we want the intersection of
|
||||||
|
// all of the key sets
|
||||||
|
intersect, err := KeySetIntersection(sets) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
return intersect, nil |
||||||
|
} |
||||||
|
|
||||||
|
// Find the intersection of the key sets
|
||||||
|
// for a slice of string to integer maps.
|
||||||
|
func KeySetIntersection(input []map[string]int) ([]string, error) { |
||||||
|
|
||||||
|
saves := []string{} |
||||||
|
for key := range input[0] { |
||||||
|
// Assume this kmer is in each histogram
|
||||||
|
in_everyone := true |
||||||
|
|
||||||
|
// Iterate over each histogram and
|
||||||
|
// make note if it is missing
|
||||||
|
for i := 1; i < len(input); i++ { |
||||||
|
hist := input[i] |
||||||
|
if hist[key] == 0 { |
||||||
|
in_everyone = false |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// If this kmer is in everyone's
|
||||||
|
// Hamming neighbor histogram,
|
||||||
|
// save it
|
||||||
|
if in_everyone { |
||||||
|
saves = append(saves, key) |
||||||
|
} |
||||||
|
} |
||||||
|
return saves, nil |
||||||
|
} |
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// BA2b
|
||||||
|
|
||||||
|
// Given a k-mer pattern
|
||||||
|
// and a longer string text,
|
||||||
|
// find the minimum distance
|
||||||
|
// from k-mer pattern to
|
||||||
|
// any possible k-mer in text.
|
||||||
|
func MinKmerDistance(pattern, text string) (int, error) { |
||||||
|
|
||||||
|
// Algorithm 1 (faster):
|
||||||
|
//
|
||||||
|
// Run a sliding window over the input string,
|
||||||
|
// and extract all k-mers of width window and
|
||||||
|
// add them to a window set.
|
||||||
|
//
|
||||||
|
// Once the set is assembled, compute the
|
||||||
|
// distance from k-mer pattern to k-mers
|
||||||
|
// in the window set, and add to distance map.
|
||||||
|
|
||||||
|
hist, err := KmerHistogram(text, len(pattern)) |
||||||
|
if err != nil { |
||||||
|
msg := fmt.Sprintf("Error: KmerHistogram(%s,%d) returned error", |
||||||
|
text, len(pattern)) |
||||||
|
return -1, errors.New(msg) |
||||||
|
} |
||||||
|
|
||||||
|
min_dist := len(pattern) // max possible value
|
||||||
|
for kmer := range hist { |
||||||
|
d, err := HammingDistance(pattern, kmer) |
||||||
|
if err != nil { |
||||||
|
msg := "Error: HammingDistance() returned error" |
||||||
|
return -1, errors.New(msg) |
||||||
|
} |
||||||
|
if d < min_dist { |
||||||
|
min_dist = d |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// // Algorithm 2 (slower):
|
||||||
|
// //
|
||||||
|
// // Run a sliding window over the input string,
|
||||||
|
// // and compute the distance between the k-mer
|
||||||
|
// // pattern and the k-mer in the window.
|
||||||
|
// //
|
||||||
|
// // This is slow if we have small k and large
|
||||||
|
// // input string length, or many duplicate
|
||||||
|
// // distance calculations (e.g., 1M ATGATGATG).
|
||||||
|
// k := len(pattern)
|
||||||
|
// overlap := len(text) - k + 1
|
||||||
|
// min_dist := k // max possible value
|
||||||
|
// for i := 0; i < overlap; i++ {
|
||||||
|
// this_kmer := text[i : i+k]
|
||||||
|
// dist, err := HammingDistance(this_kmer, pattern)
|
||||||
|
// if err != nil {
|
||||||
|
// msg := "Error: HammingDistance() returned error"
|
||||||
|
// return -1, errors.New(msg)
|
||||||
|
// }
|
||||||
|
// if dist < min_dist {
|
||||||
|
// min_dist = dist
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
return min_dist, nil |
||||||
|
} |
||||||
|
|
||||||
|
// Given a k-mer pattern
|
||||||
|
// and a set of strings,
|
||||||
|
// find the sum (L1 norm)
|
||||||
|
// of the shortest distances
|
||||||
|
// from k-mer pattern to
|
||||||
|
// each input string.
|
||||||
|
func MinKmerDistances(pattern string, inputs []string) (int, error) { |
||||||
|
s := 0 |
||||||
|
for _, text := range inputs { |
||||||
|
d, err := MinKmerDistance(pattern, text) |
||||||
|
s += d |
||||||
|
if err != nil { |
||||||
|
msg := fmt.Sprintf("Error: MinKmerDistance(%s,%s) returned error", |
||||||
|
pattern, text) |
||||||
|
return -1, errors.New(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
return s, nil |
||||||
|
} |
||||||
|
|
||||||
|
func MedianString(dna []string, k int) ([]string, error) { |
||||||
|
|
||||||
|
// Algorithm:
|
||||||
|
|
||||||
|
// start with set of DNA strings dna_i
|
||||||
|
|
||||||
|
// turn each string into set of k-mers
|
||||||
|
// set_dna_i is set of k-mers from string dna_i
|
||||||
|
|
||||||
|
// for this_kmer in all_kmers:
|
||||||
|
//
|
||||||
|
// for set in set_dna_i:
|
||||||
|
//
|
||||||
|
// min_dist = k
|
||||||
|
// for that_kmer in set:
|
||||||
|
// dist = dist(this_kmer,that_kmer)
|
||||||
|
// min_dist = min(min_dist,dist)
|
||||||
|
|
||||||
|
// Turn each DNA string into a set of kmers
|
||||||
|
histograms := make([]map[string]int, len(dna)) |
||||||
|
for i, dna_i := range dna { |
||||||
|
h, err := KmerHistogram(dna_i, k) |
||||||
|
if err != nil { |
||||||
|
msg := fmt.Sprintf("Error: KmerHistogram(%s, %d) returned an error", |
||||||
|
dna_i, k) |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
histograms[i] = h |
||||||
|
} |
||||||
|
|
||||||
|
// Total number of possible kmer
|
||||||
|
maxx := 1 |
||||||
|
for i := 0; i < k; i++ { |
||||||
|
maxx *= 4 |
||||||
|
} |
||||||
|
|
||||||
|
// Track min distance sum d(pattern,dna)
|
||||||
|
// for all possible kmer patterns
|
||||||
|
distances := make([]int, maxx) |
||||||
|
|
||||||
|
// Iterate over every possible kmer
|
||||||
|
for iK := 0; iK < maxx; iK++ { |
||||||
|
|
||||||
|
// Turn integer iK into kmer pattern
|
||||||
|
pattern, err := NumberToPattern(iK, k) |
||||||
|
if err != nil { |
||||||
|
msg := fmt.Sprintf("Error: NumberToPattern(%d,%d) raised an error", |
||||||
|
iK, k) |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// Accumulate a min distance sum \sigma d(pattern,dna)
|
||||||
|
sigma_min_d := 0 |
||||||
|
|
||||||
|
// Iterate over every possible DNA string('s histogram)
|
||||||
|
for _, histogram := range histograms { |
||||||
|
|
||||||
|
// Accumulate a min distance d(pattern,dna)
|
||||||
|
// for this kmer pattern
|
||||||
|
// and this DNA string
|
||||||
|
min_d := k |
||||||
|
|
||||||
|
// Iterate over kmers in this DNA string('s histogram)
|
||||||
|
// (k,v - map)
|
||||||
|
for this_kmer, _ := range histogram { |
||||||
|
d, err := HammingDistance(this_kmer, pattern) |
||||||
|
if err != nil { |
||||||
|
msg := fmt.Sprintf("Error: HammingDistance(%s,%s) returned error", |
||||||
|
this_kmer, pattern) |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
if d < min_d { |
||||||
|
// New minimum
|
||||||
|
min_d = d |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Accumulate
|
||||||
|
sigma_min_d += min_d |
||||||
|
} |
||||||
|
|
||||||
|
distances[iK] = sigma_min_d |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
// Find the kmer corresponding to the minimum distance
|
||||||
|
running_min := distances[0] |
||||||
|
var results_str []string |
||||||
|
for i, d := range distances { |
||||||
|
if d < running_min { |
||||||
|
p, err := NumberToPattern(i, k) |
||||||
|
if err != nil { |
||||||
|
msg := fmt.Sprintf("Error: NumberToPattern(%d,%d) returned error", |
||||||
|
i, k) |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
// New running min, new min kmer
|
||||||
|
running_min = d |
||||||
|
results_str = []string{p} |
||||||
|
|
||||||
|
} else if d == running_min { |
||||||
|
p, err := NumberToPattern(i, k) |
||||||
|
if err != nil { |
||||||
|
msg := fmt.Sprintf("Error: NumberToPattern(%d,%d) returned error", |
||||||
|
i, k) |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
// Another running min, another min kmer
|
||||||
|
results_str = append(results_str, p) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return results_str, nil |
||||||
|
} |
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// BA2c
|
||||||
|
|
||||||
|
// Given a slice of strings, determine
|
||||||
|
// the index of the given string.
|
||||||
|
func indexOfString(list []string, item string) int { |
||||||
|
for i := 0; i < len(list); i++ { |
||||||
|
if list[i] == item { |
||||||
|
return i |
||||||
|
} |
||||||
|
} |
||||||
|
return -1 |
||||||
|
} |
||||||
|
|
||||||
|
// Given a profile matrix,
|
||||||
|
// and given a DNA input string,
|
||||||
|
// evaluate the probability of
|
||||||
|
// every kmer in the DNA string
|
||||||
|
// and find the most probable
|
||||||
|
// kmer in the text - the kmer that
|
||||||
|
// was most likely to have been
|
||||||
|
// generated by profile among all
|
||||||
|
// kmers in text.
|
||||||
|
//
|
||||||
|
// This particular method does not
|
||||||
|
// pay attention to order of occurrence
|
||||||
|
// of kmers.
|
||||||
|
func ProfileMostProbableKmers(dna string, k int, profile [][]float32) ([]string, error) { |
||||||
|
|
||||||
|
nucleotides := []string{"A", "C", "G", "T"} |
||||||
|
|
||||||
|
// Make sure we have well-formed inputs
|
||||||
|
if k < 1 { |
||||||
|
msg := "Error: specified kmer length k was < 1\n" |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
if !CheckIsDNA(dna) { |
||||||
|
msg := fmt.Sprintf("Error: input was not DNA: %s\n", dna) |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
if len(profile) != len(nucleotides) { |
||||||
|
msg := fmt.Sprintf("Error: incorrect number of rows (%d) in profile, need 4, one for each nucleotide\n", len(profile)) |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// Extract all k-mers occurring
|
||||||
|
// in the DNA string.
|
||||||
|
// We use the keys of this map for
|
||||||
|
// iterating over all kmers in the
|
||||||
|
// DNA string.
|
||||||
|
// Keys are not ordered!
|
||||||
|
hist, err := KmerHistogram(dna, k) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
// Compute the probability of each kmer
|
||||||
|
// by doing pairwise multiplication of
|
||||||
|
// probability of the nucleotide that
|
||||||
|
// occurs at the corresponding position.
|
||||||
|
//
|
||||||
|
// Keep track of the running maximum
|
||||||
|
// and the corresponding kmer(s).
|
||||||
|
var max_prob_kmer []string |
||||||
|
max_prob := float32(-1.0) |
||||||
|
for kmer := range hist { |
||||||
|
probability := float32(1.0) |
||||||
|
for j := 0; j < len(kmer); j++ { |
||||||
|
ix := indexOfString(nucleotides, string(kmer[j])) |
||||||
|
probability *= profile[ix][j] |
||||||
|
} |
||||||
|
if probability > max_prob { |
||||||
|
max_prob = probability |
||||||
|
max_prob_kmer = []string{kmer} |
||||||
|
} else if probability == max_prob { |
||||||
|
max_prob_kmer = append(max_prob_kmer, kmer) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return max_prob_kmer, nil |
||||||
|
} |
||||||
|
|
||||||
|
// Only return the _most_ probable kmer.
|
||||||
|
func ProfileMostProbableKmer(dna string, k int, profile [][]float32) (string, error) { |
||||||
|
results, err := ProfileMostProbableKmers(dna, k, profile) |
||||||
|
return results[0], err |
||||||
|
} |
||||||
|
|
||||||
|
// Return a list of kmers of length k that occur in a DNA string.
|
||||||
|
// This list preserves order in which the kmers appear in DNA.
|
||||||
|
// This list does not include duplicates.
|
||||||
|
func KmerInOrderList(dna string, k int) ([]string, error) { |
||||||
|
|
||||||
|
// Ensure we have well-formed inputs
|
||||||
|
if len(dna) < k { |
||||||
|
msg := "Error: KmerInOrderList called with a DNA string smaller than specified kmer size" |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// kmers is a list of kmers that occur, in order
|
||||||
|
// (without duplicates), in dna
|
||||||
|
var kmers []string |
||||||
|
overlap := len(dna) - k + 1 |
||||||
|
for i := 0; i < overlap; i++ { |
||||||
|
kmer := dna[i : i+k] |
||||||
|
|
||||||
|
// Check if we already have this kmer
|
||||||
|
dupe := false |
||||||
|
for j := 0; j < len(kmers); j++ { |
||||||
|
if kmer == kmers[j] { |
||||||
|
dupe = true |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
if !dupe { |
||||||
|
kmers = append(kmers, kmer) |
||||||
|
} |
||||||
|
} |
||||||
|
return kmers, nil |
||||||
|
} |
||||||
|
|
||||||
|
// This uses a probility matrix and evaluates
|
||||||
|
// all possible kmers in a DNA string to determine
|
||||||
|
// which kmers in the DNA string match the profile
|
||||||
|
// most closely.
|
||||||
|
//
|
||||||
|
// The greedy version maintains the order in which
|
||||||
|
// kmers occur in the original DNA string, and stops
|
||||||
|
// as soon as the first match is found.
|
||||||
|
func ProfileMostProbableKmersGreedy(dna string, k int, profile [][]float32) (string, error) { |
||||||
|
|
||||||
|
nucleotides := []string{"A", "C", "G", "T"} |
||||||
|
|
||||||
|
// Make sure we have well-formed inputs
|
||||||
|
if k < 1 { |
||||||
|
msg := "Error: specified kmer length k was < 1\n" |
||||||
|
return "", errors.New(msg) |
||||||
|
} |
||||||
|
if !CheckIsDNA(dna) { |
||||||
|
msg := fmt.Sprintf("Error: input was not DNA: %s\n", dna) |
||||||
|
return "", errors.New(msg) |
||||||
|
} |
||||||
|
if len(profile) != len(nucleotides) { |
||||||
|
msg := fmt.Sprintf("Error: incorrect number of rows (%d) in profile, need 4, one for each nucleotide\n", len(profile)) |
||||||
|
return "", errors.New(msg) |
||||||
|
} |
||||||
|
|
||||||
|
kmers_inorder, err := KmerInOrderList(dna, k) |
||||||
|
if err != nil || len(kmers_inorder) == 0 { |
||||||
|
msg := fmt.Sprintf("Error: call to KmerInOrderList() failed: dna = %s, k = %d", |
||||||
|
dna, k) |
||||||
|
return "", errors.New(msg) |
||||||
|
} |
||||||
|
|
||||||
|
var max_prob_kmer string |
||||||
|
max_prob := float32(-1.0) |
||||||
|
for _, kmer := range kmers_inorder { |
||||||
|
probability := float32(1.0) |
||||||
|
for j := 0; j < len(kmer); j++ { |
||||||
|
ix := indexOfString(nucleotides, string(kmer[j])) |
||||||
|
probability *= profile[ix][j] |
||||||
|
} |
||||||
|
if probability > max_prob { |
||||||
|
max_prob = probability |
||||||
|
max_prob_kmer = kmer |
||||||
|
} else if probability == max_prob { |
||||||
|
// do nothing, be greedy
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return max_prob_kmer, nil |
||||||
|
} |
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// BA2D
|
||||||
|
//
|
||||||
|
// This problem makes about as much sense
|
||||||
|
// as a camel in a jacuzzi.
|
||||||
|
//
|
||||||
|
// After much searching, and re-reading,
|
||||||
|
// found this great explanation:
|
||||||
|
//
|
||||||
|
// http://www.mrgraeme.co.uk/greedy-motif-search/
|
||||||
|
|
||||||
|
// ----------------------------
|
||||||
|
// Scored Motif Matrix struct
|
||||||
|
|
||||||
|
// Create a struct to hold a set of motifs (kmers)
|
||||||
|
// and their associated score. We continually assemble
|
||||||
|
// many of these possible sets of motifs, checking to
|
||||||
|
// find a set of motifs with a minimum score.
|
||||||
|
// The score is not updated dyanmically, see UpdateScore().
|
||||||
|
type ScoredMotifMatrix struct { |
||||||
|
motifs []string |
||||||
|
score int |
||||||
|
} |
||||||
|
|
||||||
|
// Constructor
|
||||||
|
func NewScoredMotifMatrix() ScoredMotifMatrix { |
||||||
|
var s ScoredMotifMatrix |
||||||
|
s.motifs = []string{} |
||||||
|
s.score = -1 |
||||||
|
return s |
||||||
|
} |
||||||
|
|
||||||
|
// Add a motif to the motif matrix
|
||||||
|
func (s *ScoredMotifMatrix) AddMotif(motif string) error { |
||||||
|
if len(s.motifs) > 0 { |
||||||
|
if len(motif) != len(s.motifs[0]) { |
||||||
|
msg := fmt.Sprintf("Error: could not add motif %s: length %d does not match existing motif length %d", |
||||||
|
motif, len(motif), len(s.motifs[0])) |
||||||
|
return errors.New(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
s.motifs = append(s.motifs, motif) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// Update the value of the score of a ScoredMotifMatrix.
|
||||||
|
// This assembles a kmer composed of the most common
|
||||||
|
// nucleotide per position, then computes the sum of
|
||||||
|
// the Hamming distances from that kmer for all motifs.
|
||||||
|
func (s *ScoredMotifMatrix) UpdateScore() error { |
||||||
|
|
||||||
|
if len(s.motifs) == 0 { |
||||||
|
msg := fmt.Sprintf("Error: call to scored matrix motif UpdateScore() method failed: there are no motifs!") |
||||||
|
return errors.New(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// Params
|
||||||
|
t := len(s.motifs) |
||||||
|
k := len(s.motifs[0]) |
||||||
|
|
||||||
|
// Start by assembling a "most common"
|
||||||
|
// mer - the kmer containing the most
|
||||||
|
// probable nucleotide at each position.
|
||||||
|
most_common_kmer := make([]string, k) |
||||||
|
|
||||||
|
// Loop over every nucleotide
|
||||||
|
for ik := 0; ik < k; ik++ { |
||||||
|
|
||||||
|
// Determine most common nucleotide
|
||||||
|
// using a map to count frequencies
|
||||||
|
frequency := make(map[string]int) |
||||||
|
|
||||||
|
// Loop over every DNA string,
|
||||||
|
// count nucleotide frequencies
|
||||||
|
for it := 0; it < t; it++ { |
||||||
|
bp := string(s.motifs[it][ik]) |
||||||
|
frequency[bp] += 1 |
||||||
|
} |
||||||
|
|
||||||
|
// Determine most frequent nucleotide
|
||||||
|
var max_bp string |
||||||
|
var max_freq int |
||||||
|
max_freq = 0 |
||||||
|
for ibp, ibp_freq := range frequency { |
||||||
|
if ibp_freq > max_freq { |
||||||
|
// Set new maximum occurring base pair
|
||||||
|
max_freq = ibp_freq |
||||||
|
max_bp = ibp |
||||||
|
} |
||||||
|
} |
||||||
|
most_common_kmer[ik] = max_bp |
||||||
|
} |
||||||
|
|
||||||
|
commonkmer := strings.Join(most_common_kmer, "") |
||||||
|
|
||||||
|
// Now that we have the common kmer,
|
||||||
|
// we can compute the score of each motif,
|
||||||
|
// and sum their scores to get the total score.
|
||||||
|
s.score = 0 |
||||||
|
for it := 0; it < t; it++ { |
||||||
|
d, _ := HammingDistance(commonkmer, s.motifs[it]) |
||||||
|
s.score += d |
||||||
|
} |
||||||
|
|
||||||
|
// Done
|
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (s *ScoredMotifMatrix) MakeProfile(pseudocounts bool) ([][]float32, error) { |
||||||
|
// Params
|
||||||
|
t := len(s.motifs) |
||||||
|
k := len(s.motifs[0]) |
||||||
|
nucleotides := []string{"A", "C", "G", "T"} |
||||||
|
|
||||||
|
// Profile is a 4 x k matrix of float32s
|
||||||
|
profile := make([][]float32, 4) |
||||||
|
for jj := 0; jj < 4; jj++ { |
||||||
|
profile[jj] = make([]float32, k) |
||||||
|
} |
||||||
|
|
||||||
|
// For each column, i.e. kmer nucleotide location,
|
||||||
|
// compute the probability
|
||||||
|
// of each of the four nucleotides
|
||||||
|
//
|
||||||
|
// P_i = N_i / sum_j N_j
|
||||||
|
//
|
||||||
|
for ik := 0; ik < k; ik++ { |
||||||
|
counts := map[string]int{ |
||||||
|
"A": 0, |
||||||
|
"C": 0, |
||||||
|
"G": 0, |
||||||
|
"T": 0, |
||||||
|
} |
||||||
|
|
||||||
|
// Populate counts
|
||||||
|
for it := 0; it < t; it++ { |
||||||
|
nucleotide := string(s.motifs[it][ik]) |
||||||
|
counts[nucleotide] += 1 |
||||||
|
} |
||||||
|
|
||||||
|
if pseudocounts { |
||||||
|
found_zero := false |
||||||
|
for _, nuc := range nucleotides { |
||||||
|
count := counts[nuc] |
||||||
|
if count == 0 { |
||||||
|
found_zero = true |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
if found_zero { |
||||||
|
for _, nuc := range nucleotides { |
||||||
|
counts[nuc] += 1 |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Sum all values
|
||||||
|
summ := 0 |
||||||
|
for _, nuc := range nucleotides { |
||||||
|
summ += counts[nuc] |
||||||
|
} |
||||||
|
|
||||||
|
// Populate p_i
|
||||||
|
for inuc, nuc := range nucleotides { |
||||||
|
val := float32(counts[nuc]) |
||||||
|
val /= float32(summ) |
||||||
|
profile[inuc][ik] = val |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return profile, nil |
||||||
|
} |
||||||
|
|
||||||
|
// ----------------------------
|
||||||
|
// BA2D and BA2E functions
|
||||||
|
//
|
||||||
|
// Note: the function below is for
|
||||||
|
// BA2D and BA2E, depending on the
|
||||||
|
// value of the pseudocounts boolean.
|
||||||
|
|
||||||
|
// Given an integer k (kmer size) and t (len(dna)),
|
||||||
|
// return a collection of kmer strings
|
||||||
|
// that have the lowest score (highest similarity).
|
||||||
|
// If at any step you find more than one
|
||||||
|
// Profile-most probable k-mer in a given
|
||||||
|
// DNA string, use the one occurring first.
|
||||||
|
// Boolean pseudocounts turns on/off pseudocounts.
|
||||||
|
func GreedyMotifSearch(dna []string, k, t int, pseudocounts bool) ([]string, error) { |
||||||
|
|
||||||
|
var best_smm ScoredMotifMatrix |
||||||
|
|
||||||
|
// bestmotifs is initially an empty list with score 0
|
||||||
|
best_smm = NewScoredMotifMatrix() |
||||||
|
|
||||||
|
// One way of getting kmer motifs
|
||||||
|
// is to create a hash table of all
|
||||||
|
// motifs that occur in the DNA string.
|
||||||
|
// This does not maintain the original
|
||||||
|
// order in which the motifs occur.
|
||||||
|
// To be *greedy* we should pay attention
|
||||||
|
// to which one comes first.
|
||||||
|
motifs, err := KmerInOrderList(dna[0], k) |
||||||
|
if err != nil { |
||||||
|
msg := fmt.Sprintf("Error: call to KmerInOrderList() failed with params:\n\tdna = %s\n\tk = %d", |
||||||
|
dna[0], k) |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
|
||||||
|
for _, kmer_motif := range motifs { |
||||||
|
|
||||||
|
// Create a new scored motif group
|
||||||
|
this_smm := NewScoredMotifMatrix() |
||||||
|
|
||||||
|
// Add our motif, which we chose from dna[0]
|
||||||
|
// This motif kicks off the new motif group
|
||||||
|
this_smm.AddMotif(kmer_motif) |
||||||
|
|
||||||
|
// Loop over all remaining dna strings
|
||||||
|
for i := 1; i < len(dna); i++ { |
||||||
|
|
||||||
|
idna := dna[i] |
||||||
|
|
||||||
|
// Form a profile matrix from
|
||||||
|
// all the motifs from dna strings
|
||||||
|
// up to, but not including, this one
|
||||||
|
profile, _ := this_smm.MakeProfile(pseudocounts) |
||||||
|
|
||||||
|
// Use the profile to find the profile-most
|
||||||
|
// probable kmer in this string of dna, idna
|
||||||
|
result, _ := ProfileMostProbableKmersGreedy(idna, k, profile) |
||||||
|
|
||||||
|
// Add the profile-most probable kmer
|
||||||
|
// to the list of motifs
|
||||||
|
if len(result) > 0 { |
||||||
|
this_smm.AddMotif(result) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
err := this_smm.UpdateScore() |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
if this_smm.score < best_smm.score || best_smm.score < 0 { |
||||||
|
best_smm = this_smm |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return best_smm.motifs, nil |
||||||
|
} |
||||||
|
|
||||||
|
// ----------------------------
|
||||||
|
// BA2F functions
|
||||||
|
|
||||||
|
// Run a greedy motif search using regular counts.
|
||||||
|
func GreedyMotifSearchNoPseudocounts(dna []string, k, t int) ([]string, error) { |
||||||
|
return GreedyMotifSearch(dna, k, t, false) |
||||||
|
} |
||||||
|
|
||||||
|
// ----------------------------
|
||||||
|
// BA2E functions
|
||||||
|
|
||||||
|
// Run a greedy motif search using pseudocounts.
|
||||||
|
func GreedyMotifSearchPseudocounts(dna []string, k, t int) ([]string, error) { |
||||||
|
return GreedyMotifSearch(dna, k, t, true) |
||||||
|
} |
||||||
|
|
||||||
|
// ----------------------------
|
||||||
|
// BA2F functions
|
||||||
|
|
||||||
|
// Run a random motif search with pseudocounts.
|
||||||
|
func RandomMotifSearchPseudocounts(dna []string, k, t int) ([]string, int, error) { |
||||||
|
|
||||||
|
s := rand.NewSource(time.Now().UnixNano()) |
||||||
|
r := rand.New(s) |
||||||
|
|
||||||
|
pseudocounts := true |
||||||
|
|
||||||
|
var result_motifs []string |
||||||
|
var result_score int |
||||||
|
|
||||||
|
// ---------------------------------
|
||||||
|
// Fencepost algorithm:
|
||||||
|
// Create a set of random motifs and calculate
|
||||||
|
// their score, once, before we go into the loop.
|
||||||
|
|
||||||
|
// Create a new scored motif group to create the first profile
|
||||||
|
this_smm := NewScoredMotifMatrix() |
||||||
|
|
||||||
|
// Pick a random kmer motif from each DNA string
|
||||||
|
var ri int |
||||||
|
for i := 0; i < len(dna); i++ { |
||||||
|
// Non-inclusive [0,n)
|
||||||
|
overlap := len(dna[i]) - k + 1 |
||||||
|
ri = r.Intn(overlap) |
||||||
|
result := dna[i][ri : ri+k] |
||||||
|
this_smm.AddMotif(result) |
||||||
|
} |
||||||
|
|
||||||
|
// Update the first (currently best) score
|
||||||
|
this_smm.UpdateScore() |
||||||
|
best_score := this_smm.score |
||||||
|
|
||||||
|
// ---------------------------------
|
||||||
|
// Main loop:
|
||||||
|
// Get the profile from our current scored
|
||||||
|
// motif matrix, and use it to choose the
|
||||||
|
// profile-most probable kmers for the next
|
||||||
|
// round.
|
||||||
|
stop_loop := false |
||||||
|
for stop_loop == false { |
||||||
|
|
||||||
|
//fmt.Printf("----------------------------\n")
|
||||||
|
//fmt.Printf("Current motifs = %s\n", strings.Join(this_smm.motifs, " "))
|
||||||
|
//fmt.Printf("Current best score = %d\n", best_score)
|
||||||
|
|
||||||
|
// Get profile from this_smm first
|
||||||
|
profile, _ := this_smm.MakeProfile(pseudocounts) |
||||||
|
//fmt.Printf("Current profile = \n%v\n", profile)
|
||||||
|
|
||||||
|
// Make a new scored motif matrix
|
||||||
|
next_smm := NewScoredMotifMatrix() |
||||||
|
|
||||||
|
// Loop over all dna strings
|
||||||
|
for i := 0; i < len(dna); i++ { |
||||||
|
|
||||||
|
// Use the profile to find the profile-most
|
||||||
|
// probable kmer in this string of dna, idna
|
||||||
|
result, _ := ProfileMostProbableKmersGreedy(dna[i], k, profile) |
||||||
|
|
||||||
|
// Add the profile-most probable kmer
|
||||||
|
// to the list of motifs
|
||||||
|
if len(result) > 0 { |
||||||
|
next_smm.AddMotif(result) |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
next_smm.UpdateScore() |
||||||
|
next_score := next_smm.score |
||||||
|
|
||||||
|
//fmt.Printf("Next motifs = %s\n", strings.Join(next_smm.motifs, " "))
|
||||||
|
//fmt.Printf("Next score = %d\n", next_score)
|
||||||
|
|
||||||
|
if next_score < best_score { |
||||||
|
best_score = next_score |
||||||
|
this_smm = next_smm |
||||||
|
//fmt.Printf(" +++ Next motifs are better... continuing... new score = %d\n", best_score)
|
||||||
|
} else { |
||||||
|
// This score does not improve the best score,
|
||||||
|
// so stop now and return prior result.
|
||||||
|
result_motifs = this_smm.motifs |
||||||
|
result_score = this_smm.score |
||||||
|
stop_loop = true |
||||||
|
//fmt.Printf(" --- Next motifs are not better... ending... old score = %d\n", best_score)
|
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
return result_motifs, result_score, nil |
||||||
|
} |
||||||
|
|
||||||
|
// Driver function to run multiple random motif searches
|
||||||
|
// and keep the best of all runs.
|
||||||
|
func ManyRandomMotifSearches(dna []string, k, t, n int) ([]string, error) { |
||||||
|
// Initial best motifs
|
||||||
|
min_bm, min_bm_score, _ := RandomMotifSearchPseudocounts(dna, k, t) |
||||||
|
|
||||||
|
// Run algorithm n times,
|
||||||
|
// look for lowest score
|
||||||
|
for i := 0; i < n; i += 1 { |
||||||
|
bm, bm_score, _ := RandomMotifSearchPseudocounts(dna, k, t) |
||||||
|
if bm_score < min_bm_score { |
||||||
|
min_bm_score = bm_score |
||||||
|
min_bm = bm |
||||||
|
} |
||||||
|
} |
||||||
|
return min_bm, nil |
||||||
|
} |
||||||
|
|
||||||
|
// ----------------------------
|
||||||
|
// BA2G functions
|
||||||
|
|
||||||
|
// Implement a Gibbs sampler with pseudocounts.
|
||||||
|
// The Gibbs sampler starts with random kmers,
|
||||||
|
// and samples kmers randomly generated from a
|
||||||
|
// Profile matrix. Better sampling makes the
|
||||||
|
// algorithm faster.
|
||||||
|
func GibbsSampler(dna []string, k, t, n int) ([]string, int, error) { |
||||||
|
|
||||||
|
s := rand.NewSource(time.Now().UnixNano()) |
||||||
|
r := rand.New(s) |
||||||
|
|
||||||
|
pseudocounts := true |
||||||
|
|
||||||
|
var best_motifs []string |
||||||
|
var best_score int |
||||||
|
var ri int |
||||||
|
|
||||||
|
// Create a new scored motif group to create the first profile
|
||||||
|
this_smm := NewScoredMotifMatrix() |
||||||
|
|
||||||
|
// Pick a random kmer motif from each DNA string
|
||||||
|
for i := 0; i < len(dna); i++ { |
||||||
|
// Non-inclusive [0,n)
|
||||||
|
overlap := len(dna[i]) - k + 1 |
||||||
|
ri = r.Intn(overlap) |
||||||
|
result := dna[i][ri : ri+k] |
||||||
|
this_smm.AddMotif(result) |
||||||
|
} |
||||||
|
|
||||||
|
// Update the first (currently best) score
|
||||||
|
this_smm.UpdateScore() |
||||||
|
best_score = this_smm.score |
||||||
|
|
||||||
|
// ---------------------------------
|
||||||
|
// Main loop:
|
||||||
|
// Pick out a random motif, and hold it out
|
||||||
|
// while we calculate a profile and a resulting
|
||||||
|
// profile-most-probable from all remaining
|
||||||
|
// motifs. If the motif score improves,
|
||||||
|
// keep the new motifs, otherwise toss 'em.
|
||||||
|
for j := 0; j < n; j++ { |
||||||
|
|
||||||
|
// Non-inclusive [0,n)
|
||||||
|
ri = r.Intn(t) |
||||||
|
|
||||||
|
// Now, we hold out a random motif from
|
||||||
|
// the current scored motif matrix.
|
||||||
|
|
||||||
|
// Make a new scored motif matrix that
|
||||||
|
// holds out that motif, and assemble it
|
||||||
|
// from all motifs except the holdout
|
||||||
|
holdout_smm := NewScoredMotifMatrix() |
||||||
|
for i := 0; i < len(this_smm.motifs); i++ { |
||||||
|
if i != ri { |
||||||
|
holdout_smm.AddMotif(this_smm.motifs[i]) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Make profile with holdout motif missing
|
||||||
|
profile, err := holdout_smm.MakeProfile(pseudocounts) |
||||||
|
if err != nil { |
||||||
|
return nil, 0, err |
||||||
|
} |
||||||
|
|
||||||
|
// Use the profile to pick the
|
||||||
|
// profile-most-probable kmer
|
||||||
|
// (from DNA string ri)
|
||||||
|
// as our candidate motif
|
||||||
|
var candidate_motif string |
||||||
|
candidate_motif, err = ProfileMostProbableKmer(dna[ri], k, profile) |
||||||
|
if err != nil { |
||||||
|
return nil, 0, err |
||||||
|
} |
||||||
|
|
||||||
|
// Add candidate motif to holdout motif matrix
|
||||||
|
holdout_smm.AddMotif(candidate_motif) |
||||||
|
|
||||||
|
// Update the score and use it to determine if
|
||||||
|
// we keep the candidate motif or the old motif
|
||||||
|
err = holdout_smm.UpdateScore() |
||||||
|
if err != nil { |
||||||
|
return nil, 0, err |
||||||
|
} |
||||||
|
|
||||||
|
// Update current scored motif matrix with
|
||||||
|
// the candidate motif
|
||||||
|
this_smm.motifs[ri] = candidate_motif |
||||||
|
err = this_smm.UpdateScore() |
||||||
|
if err != nil { |
||||||
|
return nil, 0, err |
||||||
|
} |
||||||
|
|
||||||
|
// If candidate motif leads to a better motif matrix
|
||||||
|
// (if holdout score > this score), replace old random
|
||||||
|
// motif with candidate motif.
|
||||||
|
// Otherwise, candidate motif did not improve
|
||||||
|
// the score, so keep the old motif.
|
||||||
|
if this_smm.score < best_score { |
||||||
|
// Clear best_motifs and copy in this_smm.motifs
|
||||||
|
best_motifs = []string{} |
||||||
|
for cc := 0; cc < len(this_smm.motifs); cc++ { |
||||||
|
best_motifs = append(best_motifs, this_smm.motifs[cc]) |
||||||
|
} |
||||||
|
best_score = this_smm.score |
||||||
|
} |
||||||
|
} |
||||||
|
return best_motifs, best_score, nil |
||||||
|
} |
||||||
|
|
||||||
|
// Driver function to run multiple random motif searches
|
||||||
|
// and keep the best of all runs.
|
||||||
|
// n is the number of inner loops in one run of the Gibbs Sampler.
|
||||||
|
// n_starts is the number of times the Gibbs Sampler is run.
|
||||||
|
func ManyGibbsSamplers(dna []string, k, t, n, n_starts int) ([]string, error) { |
||||||
|
|
||||||
|
// Initial best motifs
|
||||||
|
min_bm, min_bm_score, err := GibbsSampler(dna, k, t, n) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
// Run algorithm n times
|
||||||
|
for i := 0; i < n_starts-1; i += 1 { |
||||||
|
// Get a new motifs and score
|
||||||
|
bm, bm_score, err := GibbsSampler(dna, k, t, n) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
// If we did better, save it
|
||||||
|
if bm_score < min_bm_score { |
||||||
|
min_bm_score = bm_score |
||||||
|
min_bm = bm |
||||||
|
} |
||||||
|
} |
||||||
|
return min_bm, nil |
||||||
|
} |
@ -0,0 +1,882 @@ |
|||||||
|
package rosalind |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"math" |
||||||
|
"sort" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
"testing" |
||||||
|
) |
||||||
|
|
||||||
|
/////////////////////////////////
|
||||||
|
// BA2a Test
|
||||||
|
|
||||||
|
func TestKeySetIntersection(t *testing.T) { |
||||||
|
gold := []string{"AAA", "BBB"} |
||||||
|
m1 := map[string]int{ |
||||||
|
"AAA": 1, |
||||||
|
"BBB": 2, |
||||||
|
"CCC": 2, |
||||||
|
"DDD": 2, |
||||||
|
} |
||||||
|
m2 := map[string]int{ |
||||||
|
"AAA": 2, |
||||||
|
"BBB": 3, |
||||||
|
"EEE": 3, |
||||||
|
"FFF": 3, |
||||||
|
} |
||||||
|
m3 := map[string]int{ |
||||||
|
"AAA": 3, |
||||||
|
"BBB": 4, |
||||||
|
"GGG": 4, |
||||||
|
"HHH": 4, |
||||||
|
} |
||||||
|
mslice := make([]map[string]int, 3) |
||||||
|
mslice[0] = m1 |
||||||
|
mslice[1] = m2 |
||||||
|
mslice[2] = m3 |
||||||
|
results, err := KeySetIntersection(mslice) |
||||||
|
if err != nil { |
||||||
|
t.Error(fmt.Sprintf("Error: KeySetIntersection() returned error: %v", err)) |
||||||
|
} |
||||||
|
|
||||||
|
// Sort before comparing
|
||||||
|
sort.Strings(gold) |
||||||
|
sort.Strings(results) |
||||||
|
|
||||||
|
if !EqualStringSlices(results, gold) { |
||||||
|
msg := fmt.Sprintf("Error testing KeySetIntersection()\ncomputed = %v\ngold = %v", |
||||||
|
results, gold) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Test the FindMotifs function using a single problem.
|
||||||
|
func TestFindMotifs(t *testing.T) { |
||||||
|
k := 3 |
||||||
|
d := 1 |
||||||
|
dna := []string{"ATTTGGC", "TGCCTTA", "CGGTATC", "GAAAATT"} |
||||||
|
|
||||||
|
results, err := FindMotifs(dna, k, d) |
||||||
|
if err != nil { |
||||||
|
t.Error(fmt.Sprintf("Error: FindMotifs() returned error: %v", err)) |
||||||
|
} |
||||||
|
gold := []string{"ATA", "ATT", "GTT", "TTT"} |
||||||
|
|
||||||
|
// Sort before comparing
|
||||||
|
sort.Strings(gold) |
||||||
|
sort.Strings(results) |
||||||
|
|
||||||
|
if !EqualStringSlices(results, gold) { |
||||||
|
msg := fmt.Sprintf("Error testing FindMotifs():\ncomputed = %v\ngold = %v", |
||||||
|
results, gold) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Test the FindMotifs function using a test matrix
|
||||||
|
// of debug cases.
|
||||||
|
func TestMatrixFindMotifs(t *testing.T) { |
||||||
|
var tests = []struct { |
||||||
|
k int |
||||||
|
d int |
||||||
|
dna []string |
||||||
|
gold []string |
||||||
|
}{ |
||||||
|
{3, 1, |
||||||
|
[]string{"ATTTGGC", "TGCCTTA", "CGGTATC", "GAAAATT"}, |
||||||
|
[]string{"ATA", "ATT", "GTT", "TTT"}, |
||||||
|
}, |
||||||
|
{3, 0, |
||||||
|
[]string{"ACGT", "ACGT", "ACGT"}, |
||||||
|
[]string{"ACG", "CGT"}, |
||||||
|
}, |
||||||
|
{3, 1, |
||||||
|
[]string{"AAAAA", "AAAAA", "AAAAA"}, |
||||||
|
[]string{"AAA", "AAC", "AAG", "AAT", "ACA", "AGA", "ATA", "CAA", "GAA", "TAA"}, |
||||||
|
}, |
||||||
|
{3, 3, |
||||||
|
[]string{"AAAAA", "AAAAA", "AAAAA"}, |
||||||
|
[]string{"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"}, |
||||||
|
}, |
||||||
|
{3, 0, |
||||||
|
[]string{"AAAAA", "AAAAA", "AACAA"}, |
||||||
|
[]string{}, |
||||||
|
}, |
||||||
|
{3, 0, |
||||||
|
[]string{"AACAA", "AAAAA", "AAAAA"}, |
||||||
|
[]string{}, |
||||||
|
}, |
||||||
|
} |
||||||
|
for _, test := range tests { |
||||||
|
|
||||||
|
// Money shot
|
||||||
|
results, err := FindMotifs(test.dna, test.k, test.d) |
||||||
|
|
||||||
|
if err != nil { |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
|
||||||
|
// Sort before comparing
|
||||||
|
sort.Strings(test.gold) |
||||||
|
sort.Strings(results) |
||||||
|
|
||||||
|
if !EqualStringSlices(results, test.gold) { |
||||||
|
msg := fmt.Sprintf("Error testing FindMotifs()\nk = %d, d = %d, len(dna) = %d\ncomputed = %v\ngold = %v", |
||||||
|
test.k, test.d, len(test.dna), |
||||||
|
results, test.gold) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Test the FindMotifs function using a large
|
||||||
|
// test case loaded from a file.
|
||||||
|
func TestFindMotifsFile(t *testing.T) { |
||||||
|
|
||||||
|
filename := "data/motif_enumeration.txt" |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
// lines[0]: Input
|
||||||
|
params := strings.Split(lines[1], " ") |
||||||
|
k, _ := strconv.Atoi(params[0]) |
||||||
|
d, _ := strconv.Atoi(params[1]) |
||||||
|
|
||||||
|
// lines[-2]: Output
|
||||||
|
// lines[-1]: gold standard
|
||||||
|
gold := strings.Split(lines[len(lines)-1], " ") |
||||||
|
|
||||||
|
// This requires some trickery.
|
||||||
|
|
||||||
|
// 4 lines in the input file are for
|
||||||
|
// input/parameters/output/gold standard.
|
||||||
|
// The rest of the lines are DNA strings.
|
||||||
|
|
||||||
|
// Make space for DNA strings
|
||||||
|
dna := make([]string, len(lines)-4) |
||||||
|
iLstart := 2 |
||||||
|
iLend := len(lines) - 2 |
||||||
|
// Two counters:
|
||||||
|
// one for the line index (iL),
|
||||||
|
// one for the array index (iA).
|
||||||
|
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 { |
||||||
|
dna[iA] = lines[iL] |
||||||
|
} |
||||||
|
|
||||||
|
// Money shot
|
||||||
|
results, err := FindMotifs(dna, k, d) |
||||||
|
|
||||||
|
if err != nil { |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
|
||||||
|
// Sort before comparing
|
||||||
|
sort.Strings(gold) |
||||||
|
sort.Strings(results) |
||||||
|
|
||||||
|
if !EqualStringSlices(results, gold) { |
||||||
|
msg := fmt.Sprintf("Error testing FindMotifs()\ncomputed = %v\ngold = %v", |
||||||
|
results, gold) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/////////////////////////////////
|
||||||
|
// BA2b Test
|
||||||
|
|
||||||
|
// Test the MinKmerDistance function.
|
||||||
|
func TestMatrixMinKmerDistance(t *testing.T) { |
||||||
|
var tests = []struct { |
||||||
|
pattern string |
||||||
|
text string |
||||||
|
d int |
||||||
|
}{ |
||||||
|
{"ATA", "AAATTGACGCAT", 1}, |
||||||
|
{"AAA", "AAAAAAAAAAA", 0}, |
||||||
|
{"AAA", "CCCCCCCCC", 3}, |
||||||
|
{"AAA", "GAAGAAGAAGAA", 1}, |
||||||
|
{"AAAA", "GAAG", 2}, |
||||||
|
{"AAAA", "GAAGAA", 1}, |
||||||
|
} |
||||||
|
for _, test := range tests { |
||||||
|
|
||||||
|
// Money shot
|
||||||
|
c, err := MinKmerDistance(test.pattern, test.text) |
||||||
|
if err != nil { |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
if c != test.d { |
||||||
|
msg := fmt.Sprintf("Error testing MinKmerDistance()\npattern = %s, text = %s\ncomputed = %d\ngold = %d", |
||||||
|
test.pattern, test.text, |
||||||
|
c, test.d) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Test the MinKmerDistances function.
|
||||||
|
func TestMatrixMinKmerDistances(t *testing.T) { |
||||||
|
var tests = []struct { |
||||||
|
pattern string |
||||||
|
inputs []string |
||||||
|
d int |
||||||
|
}{ |
||||||
|
{ |
||||||
|
"AAA", |
||||||
|
[]string{"AAAA", "CCCC", "GGGG", "TTTT"}, |
||||||
|
9}, |
||||||
|
{ |
||||||
|
"AAA", |
||||||
|
[]string{"GAAG", "CAAC", "TAAG", "TAAC"}, |
||||||
|
4}, |
||||||
|
} |
||||||
|
for _, test := range tests { |
||||||
|
|
||||||
|
// Money shot
|
||||||
|
c, err := MinKmerDistances(test.pattern, test.inputs) |
||||||
|
if err != nil { |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
if c != test.d { |
||||||
|
msg := fmt.Sprintf("Error testing MinKmerDistance()\npattern = %s, inputs = %v\ncomputed = %d\ngold = %d", |
||||||
|
test.pattern, test.inputs, |
||||||
|
c, test.d) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Test MedianString
|
||||||
|
func TestMedianString(t *testing.T) { |
||||||
|
k := 3 |
||||||
|
dna := []string{ |
||||||
|
"AAATTGACGCAT", |
||||||
|
"GACGACCACGTT", |
||||||
|
"CGTCAGCGCCTG", |
||||||
|
"GCTGAGCACCGG", |
||||||
|
"AGTACGGGACAG", |
||||||
|
} |
||||||
|
result, _ := MedianString(dna, k) |
||||||
|
|
||||||
|
gold := "GAC" |
||||||
|
|
||||||
|
// Since they only report one kmer, and we report all,
|
||||||
|
// we should check if their kmer is in our slice.
|
||||||
|
var passed_test bool |
||||||
|
for _, r := range result { |
||||||
|
if r == gold { |
||||||
|
passed_test = true |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
if !passed_test { |
||||||
|
// Uh oh, their kmer is not in our slice.
|
||||||
|
msg := fmt.Sprintf("Error testing MostFrequentKmers using test case from file: most frequent kmers in gold not in results.\ncomputed = %q\ngold = %q\n", |
||||||
|
result, gold) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/////////////////////////////////
|
||||||
|
// BA2c Test
|
||||||
|
|
||||||
|
func TestProfileMostProbableKmers(t *testing.T) { |
||||||
|
gold := "CCGAG" |
||||||
|
dna := "ACCTGTTTATTGCCTAAGTTCCGAACAAACCCAATATAGCCCGAGGGCCT" |
||||||
|
k := 5 |
||||||
|
prof := [][]float32{ |
||||||
|
[]float32{0.2, 0.2, 0.3, 0.2, 0.3}, |
||||||
|
[]float32{0.4, 0.3, 0.1, 0.5, 0.1}, |
||||||
|
[]float32{0.3, 0.3, 0.5, 0.2, 0.4}, |
||||||
|
[]float32{0.1, 0.2, 0.1, 0.1, 0.2}, |
||||||
|
} |
||||||
|
result, _ := ProfileMostProbableKmers(dna, k, prof) |
||||||
|
|
||||||
|
// Check if gold answer is in our results slice
|
||||||
|
var passed_test bool |
||||||
|
for _, r := range result { |
||||||
|
if r == gold { |
||||||
|
passed_test = true |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if !passed_test { |
||||||
|
// The correct kmer was not found in our result
|
||||||
|
msg := fmt.Sprintf("Error testing ProfileMostProbableKmer(): found incorrect most probable kmer:\n Gold: %s\n Computed: %s\n", |
||||||
|
gold, strings.Join(result, " ")) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func TestProfileMostProbableKmers2(t *testing.T) { |
||||||
|
gold := "TGTCGC" |
||||||
|
dna := "TGCCCGAGCTATCTTATGCGCATCGCATGCGGACCCTTCCCTAGGCTTGTCGCAAGCCATTATCCTGGGCGCTAGTTGCGCGAGTATTGTCAGACCTGATGACGCTGTAAGCTAGCGTGTTCAGCGGCGCGCAATGAGCGGTTTAGATCACAGAATCCTTTGGCGTATTCCTATCCGTTACATCACCTTCCTCACCCCTA" |
||||||
|
k := 6 |
||||||
|
prof := [][]float32{ |
||||||
|
[]float32{0.364, 0.333, 0.303, 0.212, 0.121, 0.242}, |
||||||
|
[]float32{0.182, 0.182, 0.212, 0.303, 0.182, 0.303}, |
||||||
|
[]float32{0.121, 0.303, 0.182, 0.273, 0.333, 0.303}, |
||||||
|
[]float32{0.333, 0.182, 0.303, 0.212, 0.364, 0.152}, |
||||||
|
} |
||||||
|
result, err := ProfileMostProbableKmers(dna, k, prof) |
||||||
|
if err != nil { |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
|
||||||
|
// Check if gold answer is in our results slice
|
||||||
|
var passed_test bool |
||||||
|
for _, r := range result { |
||||||
|
if r == gold { |
||||||
|
passed_test = true |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if !passed_test { |
||||||
|
// The correct kmer was not found in our result
|
||||||
|
msg := fmt.Sprintf("Error testing ProfileMostProbableKmer(): found incorrect most probable kmer:\n Gold: %s\n Computed: %s\n", |
||||||
|
gold, strings.Join(result, " ")) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/////////////////////////////////
|
||||||
|
// BA2D Test
|
||||||
|
|
||||||
|
// Test our ScoredMotifMatrix structure
|
||||||
|
func TestScoredMotifMatrix(t *testing.T) { |
||||||
|
|
||||||
|
s := NewScoredMotifMatrix() |
||||||
|
|
||||||
|
s.AddMotif("AAAAA") |
||||||
|
|
||||||
|
err := s.UpdateScore() |
||||||
|
if err != nil { |
||||||
|
msg := "Error: UpdateScore() failed with 9 identical kmers" |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
if s.score != 0 { |
||||||
|
msg := fmt.Sprintf("Error: computed incorrect score (computed %d, should be %d)", |
||||||
|
s.score, 0) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
|
||||||
|
s.AddMotif("AAAAA") |
||||||
|
s.AddMotif("AAAAA") |
||||||
|
s.AddMotif("AAAAA") |
||||||
|
s.AddMotif("AAAAA") |
||||||
|
s.AddMotif("AAAAA") |
||||||
|
s.AddMotif("AAAAA") |
||||||
|
s.AddMotif("AAAAA") |
||||||
|
s.AddMotif("AAAAA") |
||||||
|
|
||||||
|
err = s.UpdateScore() |
||||||
|
if err != nil { |
||||||
|
msg := "Error: UpdateScore() failed with 9 identical kmers" |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
if s.score != 0 { |
||||||
|
msg := fmt.Sprintf("Error: computed incorrect score (computed %d, should be %d)", |
||||||
|
s.score, 0) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
|
||||||
|
s.AddMotif("CCCCC") |
||||||
|
|
||||||
|
err = s.UpdateScore() |
||||||
|
if err != nil { |
||||||
|
msg := "Error: UpdateScore() failed with 9 identical kmers and 1 different kmer" |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
if s.score != 5 { |
||||||
|
msg := fmt.Sprintf("Error: computed incorrect score (computed %d, should be %d)", |
||||||
|
s.score, 5) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
|
||||||
|
s.AddMotif("TAAAA") |
||||||
|
|
||||||
|
err = s.UpdateScore() |
||||||
|
if err != nil { |
||||||
|
msg := "Error: UpdateScore() failed with 9 identical kmers and 1 different kmer" |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
if s.score != 6 { |
||||||
|
msg := fmt.Sprintf("Error: computed incorrect score (computed %d, should be %d)", |
||||||
|
s.score, 6) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Test the construction of a profile
|
||||||
|
// from a ScoredMotifMatrix
|
||||||
|
func TestProfileConstruction(t *testing.T) { |
||||||
|
|
||||||
|
// To create a test case for a motif matrix
|
||||||
|
// being turned into a profile, we use the
|
||||||
|
// following calculation from the textbook
|
||||||
|
// (page 74):
|
||||||
|
//
|
||||||
|
// TCGGGGGTTTTT
|
||||||
|
// CCGGTGACTTAC
|
||||||
|
// ACGGGGATTTTC
|
||||||
|
// TTGGGGACTTTT
|
||||||
|
// AAGGGGACTTCC
|
||||||
|
// TTGGGGACTTCC
|
||||||
|
// TCGGGGATTCAT
|
||||||
|
// TCGGGGATTCCT
|
||||||
|
// TAGGGGAACTAC
|
||||||
|
// TCGGGTATAACC
|
||||||
|
//
|
||||||
|
// which results in the following profile:
|
||||||
|
//
|
||||||
|
// .2 .2 0 0 0 0 .9 .1 .1 .1 .3 0
|
||||||
|
// .1 .6 0 0 0 0 0 .4 .1 .2 .4 .6
|
||||||
|
// 0 0 1 1 .9 .9 .1 0 0 0 0 0
|
||||||
|
// .7 .2 0 0 .1 .1 0 .5 .8 .7 .3 .4
|
||||||
|
|
||||||
|
motifs := []string{ |
||||||
|
"TCGGGGGTTTTT", |
||||||
|
"CCGGTGACTTAC", |
||||||
|
"ACGGGGATTTTC", |
||||||
|
"TTGGGGACTTTT", |
||||||
|
"AAGGGGACTTCC", |
||||||
|
"TTGGGGACTTCC", |
||||||
|
"TCGGGGATTCAT", |
||||||
|
"TCGGGGATTCCT", |
||||||
|
"TAGGGGAACTAC", |
||||||
|
"TCGGGTATAACC", |
||||||
|
} |
||||||
|
gold := [][]float32{ |
||||||
|
[]float32{.2, .2, 0, 0, 0, 0, .9, .1, .1, .1, .3, 0}, |
||||||
|
[]float32{.1, .6, 0, 0, 0, 0, 0, .4, .1, .2, .4, .6}, |
||||||
|
[]float32{0, 0, 1, 1, .9, .9, .1, 0, 0, 0, 0, 0}, |
||||||
|
[]float32{.7, .2, 0, 0, .1, .1, 0, .5, .8, .7, .3, .4}, |
||||||
|
} |
||||||
|
|
||||||
|
smg := NewScoredMotifMatrix() |
||||||
|
|
||||||
|
for _, motif := range motifs { |
||||||
|
smg.AddMotif(motif) |
||||||
|
} |
||||||
|
|
||||||
|
result, err := smg.MakeProfile(false) |
||||||
|
if err != nil { |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
|
||||||
|
var passed_test bool |
||||||
|
passed_test = true |
||||||
|
if len(gold) == len(result) { |
||||||
|
if len(gold[0]) == len(result[0]) { |
||||||
|
// Dimensions match,
|
||||||
|
// so now we compare element-wise.
|
||||||
|
for i := 0; i < len(gold); i++ { |
||||||
|
for j := 0; j < len(gold[0]); j++ { |
||||||
|
// Comparing floats,
|
||||||
|
// so don't use !=
|
||||||
|
if !TheseFloatsAreEqual(gold[i][j], result[i][j]) { |
||||||
|
passed_test = false |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} else { |
||||||
|
passed_test = false |
||||||
|
} |
||||||
|
} else { |
||||||
|
passed_test = false |
||||||
|
} |
||||||
|
|
||||||
|
if !passed_test { |
||||||
|
msg := fmt.Sprintf("Error testing MakeProfile() (no pseudocounts) for Scored Motif Matrix: found incorrect motifs\n Gold: %v\n Computed: %v\n", |
||||||
|
gold, result) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func TestProfilePseudocountsConstruction(t *testing.T) { |
||||||
|
// Test case:
|
||||||
|
//
|
||||||
|
// TAAC
|
||||||
|
// GTCT
|
||||||
|
// ACTA
|
||||||
|
// AGGT
|
||||||
|
//
|
||||||
|
// Results in the profile:
|
||||||
|
//
|
||||||
|
// 0.375 0.250 0.250 0.250
|
||||||
|
// 0.125 0.250 0.250 0.250
|
||||||
|
// 0.250 0.250 0.250 0.125
|
||||||
|
// 0.250 0.250 0.250 0.375
|
||||||
|
|
||||||
|
motifs := []string{ |
||||||
|
"TAAC", |
||||||
|
"GTCT", |
||||||
|
"ACTA", |
||||||
|
"AGGT", |
||||||
|
} |
||||||
|
gold := [][]float32{ |
||||||
|
[]float32{0.375, 0.250, 0.250, 0.250}, |
||||||
|
[]float32{0.125, 0.250, 0.250, 0.250}, |
||||||
|
[]float32{0.250, 0.250, 0.250, 0.125}, |
||||||
|
[]float32{0.250, 0.250, 0.250, 0.375}, |
||||||
|
} |
||||||
|
|
||||||
|
smg := NewScoredMotifMatrix() |
||||||
|
|
||||||
|
for _, motif := range motifs { |
||||||
|
smg.AddMotif(motif) |
||||||
|
} |
||||||
|
|
||||||
|
result, err := smg.MakeProfile(true) |
||||||
|
if err != nil { |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
|
||||||
|
var passed_test bool |
||||||
|
passed_test = true |
||||||
|
if len(gold) == len(result) { |
||||||
|
if len(gold[0]) == len(result[0]) { |
||||||
|
// Dimensions match,
|
||||||
|
// so now we compare element-wise.
|
||||||
|
for i := 0; i < len(gold); i++ { |
||||||
|
for j := 0; j < len(gold[0]); j++ { |
||||||
|
// Comparing floats,
|
||||||
|
// so don't use !=
|
||||||
|
if !TheseFloatsAreEqual(gold[i][j], result[i][j]) { |
||||||
|
passed_test = false |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} else { |
||||||
|
passed_test = false |
||||||
|
} |
||||||
|
} else { |
||||||
|
passed_test = false |
||||||
|
} |
||||||
|
|
||||||
|
if !passed_test { |
||||||
|
msg := fmt.Sprintf("Error testing MakeProfile() (with pseudocounts) for Scored Motif Matrix: found incorrect motifs\n Gold: %v\n Computed: %v\n", |
||||||
|
gold, result) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Test a single iteration of the inner loop for the greedy motif algorithm.
|
||||||
|
// This makes sure that the ProfileMostProbableKmersGreedy() function is
|
||||||
|
// returning the right kmer. If the probability of all kmers are 0.0, it should
|
||||||
|
// return the first kmer, which is the case that this test targets.
|
||||||
|
func TestGreedyMotifFirstInnerIteration(t *testing.T) { |
||||||
|
// This motif is the first motif we see in the original DNA string
|
||||||
|
// of the BA2D example.
|
||||||
|
motif := "GGC" |
||||||
|
|
||||||
|
// Define kmer motif length
|
||||||
|
k := len(motif) |
||||||
|
|
||||||
|
// This is the profile-most probable kmer that should be found
|
||||||
|
gold1 := "AAG" |
||||||
|
|
||||||
|
// These are the motifs that should be in the ScoredMotifMatrix
|
||||||
|
gold_motifs1 := []string{"GGC", "AAG"} |
||||||
|
|
||||||
|
// This DNA string is the second DNA string, so the first one
|
||||||
|
// that we extract possible motifs from in the inner iteration
|
||||||
|
// of the greedy motif finding function.
|
||||||
|
dna1 := "AAGAATCAGTCA" |
||||||
|
|
||||||
|
// Create a ScoredMotifMatrix to create a profile matrix
|
||||||
|
s := NewScoredMotifMatrix() |
||||||
|
|
||||||
|
// Add the original motif
|
||||||
|
s.AddMotif(motif) |
||||||
|
|
||||||
|
// Create a profile matrix
|
||||||
|
profile, err := s.MakeProfile(false) |
||||||
|
if err != nil { |
||||||
|
msg := "Error: MakeProfile(false) call failed" |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// Use the profile and the input DNA string to find the
|
||||||
|
// most probable kmer, greedy style.
|
||||||
|
result, err := ProfileMostProbableKmersGreedy(dna1, k, profile) |
||||||
|
|
||||||
|
// Add the most probable kmer to the motifs
|
||||||
|
s.AddMotif(result) |
||||||
|
|
||||||
|
// First, check that we found the correct
|
||||||
|
// profile-most probable kmers
|
||||||
|
if result != gold1 { |
||||||
|
msg := fmt.Sprintf("Error: ProfileMostProbableKmers failed:\n Computed profile-most probable kmer: %s\n Gold profile-most probable kmer: %s\n DNA string: %s\n k: %d\n profile: %v\n\n", |
||||||
|
result, gold1, dna1, k, profile) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// Second, check the ScoredMotifMatrix motifs
|
||||||
|
var passed_test bool |
||||||
|
passed_test = true |
||||||
|
if len(s.motifs) == len(gold_motifs1) { |
||||||
|
for i := 0; i < len(s.motifs); i++ { |
||||||
|
if s.motifs[i] != gold_motifs1[i] { |
||||||
|
passed_test = false |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
} else { |
||||||
|
passed_test = false |
||||||
|
} |
||||||
|
if !passed_test { |
||||||
|
msg := fmt.Sprintf("Error testing greedy motif first inner iteration: the ScoredMotifMatrix motifs array was not correct.\n Computed: %s\n Gold: %s", |
||||||
|
strings.Join(s.motifs, " "), |
||||||
|
strings.Join(gold_motifs1, " ")) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// One more
|
||||||
|
dna2 := "CAAGGAGTTCGC" |
||||||
|
|
||||||
|
// This is the profile-most probable kmer that should be found
|
||||||
|
gold2 := "AAG" |
||||||
|
|
||||||
|
// These are the motifs that should be in the ScoredMotifMatrix
|
||||||
|
gold_motifs2 := []string{"GGC", "AAG", "AAG"} |
||||||
|
|
||||||
|
// Create a profile matrix
|
||||||
|
profile, err = s.MakeProfile(false) |
||||||
|
if err != nil { |
||||||
|
msg := "Error: MakeProfile(false) call failed" |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// Use the profile and the input DNA string to find the
|
||||||
|
// most probable kmer, greedy style.
|
||||||
|
result, err = ProfileMostProbableKmersGreedy(dna2, k, profile) |
||||||
|
if err != nil { |
||||||
|
msg := "Error: ProfileMostProbableKmersGreedy() call failed" |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// Add the most probable kmer to the motifs
|
||||||
|
s.AddMotif(result) |
||||||
|
|
||||||
|
// First, check that we found the correct
|
||||||
|
// profile-most probable kmers
|
||||||
|
if result != gold2 { |
||||||
|
msg := fmt.Sprintf("Error: ProfileMostProbableKmers failed:\n Computed profile-most probable kmer: %s\n Gold profile-most probable kmer: %s\n DNA string: %s\n k: %d\n profile: %v\n\n", |
||||||
|
result, gold2, dna2, k, profile) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// Second, check the ScoredMotifMatrix motifs
|
||||||
|
passed_test = true |
||||||
|
if len(s.motifs) == len(gold_motifs2) { |
||||||
|
for i := 0; i < len(s.motifs); i++ { |
||||||
|
if s.motifs[i] != gold_motifs2[i] { |
||||||
|
passed_test = false |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
} else { |
||||||
|
passed_test = false |
||||||
|
} |
||||||
|
if !passed_test { |
||||||
|
msg := fmt.Sprintf("Error testing greedy motif first inner iteration: the ScoredMotifMatrix motifs array was not correct.\n Computed: %s\n Gold: %s", |
||||||
|
strings.Join(s.motifs, " "), |
||||||
|
strings.Join(gold_motifs2, " ")) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Test out the greedy motif search with regular counts.
|
||||||
|
func TestGreedyMotifSearch(t *testing.T) { |
||||||
|
gold := []string{"CAG", "CAG", "CAA", "CAA", "CAA"} |
||||||
|
k_in := 3 |
||||||
|
t_in := 5 |
||||||
|
dna := []string{ |
||||||
|
"GGCGTTCAGGCA", |
||||||
|
"AAGAATCAGTCA", |
||||||
|
"CAAGGAGTTCGC", |
||||||
|
"CACGTCAATCAC", |
||||||
|
"CAATAATATTCG", |
||||||
|
} |
||||||
|
|
||||||
|
result, err := GreedyMotifSearchNoPseudocounts(dna, k_in, t_in) |
||||||
|
if err != nil { |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
|
||||||
|
// Element-wise comparison of gold and computed result
|
||||||
|
var passed_test bool |
||||||
|
passed_test = true |
||||||
|
if len(gold) == len(result) { |
||||||
|
for i := 0; i < len(result); i++ { |
||||||
|
if result[i] != gold[i] { |
||||||
|
passed_test = false |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
} else { |
||||||
|
passed_test = false |
||||||
|
} |
||||||
|
|
||||||
|
if !passed_test { |
||||||
|
msg := fmt.Sprintf("Error testing GreedyMotifSearch(): found incorrect motifs\n Gold: %s\n Computed: %s\n", |
||||||
|
strings.Join(gold, " "), |
||||||
|
strings.Join(result, " ")) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Test out the greedy motif search with pseudocounts
|
||||||
|
func TestGreedyMotifSearchPseudocounts(t *testing.T) { |
||||||
|
gold := []string{"TTC", "ATC", "TTC", "ATC", "TTC"} |
||||||
|
k_in := 3 |
||||||
|
t_in := 5 |
||||||
|
dna := []string{ |
||||||
|
"GGCGTTCAGGCA", |
||||||
|
"AAGAATCAGTCA", |
||||||
|
"CAAGGAGTTCGC", |
||||||
|
"CACGTCAATCAC", |
||||||
|
"CAATAATATTCG", |
||||||
|
} |
||||||
|
|
||||||
|
result, err := GreedyMotifSearchPseudocounts(dna, k_in, t_in) |
||||||
|
if err != nil { |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
|
||||||
|
// Element-wise comparison of gold and computed result
|
||||||
|
var passed_test bool |
||||||
|
passed_test = true |
||||||
|
if len(gold) == len(result) { |
||||||
|
for i := 0; i < len(result); i++ { |
||||||
|
if result[i] != gold[i] { |
||||||
|
passed_test = false |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
} else { |
||||||
|
passed_test = false |
||||||
|
} |
||||||
|
|
||||||
|
if !passed_test { |
||||||
|
msg := fmt.Sprintf("Error testing GreedyMotifSearchPseudocounts(): found incorrect motifs\n Gold: %s\n Computed: %s\n", |
||||||
|
strings.Join(gold, " "), |
||||||
|
strings.Join(result, " ")) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Test out the random motif search with pseudocounts
|
||||||
|
func TestRandomMotifSearchPseudocounts(t *testing.T) { |
||||||
|
gold := []string{"TCTCGGGG", "CCAAGGTG", "TACAGGCG", "TTCAGGTG", "TCCACGTG"} |
||||||
|
k_in := 8 |
||||||
|
t_in := 5 |
||||||
|
dna := []string{ |
||||||
|
"CGCCCCTCTCGGGGGTGTTCAGTAAACGGCCA", |
||||||
|
"GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG", |
||||||
|
"TAGTACCGAGACCGAAAGAAGTATACAGGCGT", |
||||||
|
"TAGATCAAGTTTCAGGTGCACGTCGGTGAACC", |
||||||
|
"AATCCACCAGCTCCACGTGCAATGTTGGCCTA", |
||||||
|
} |
||||||
|
|
||||||
|
n := 100 |
||||||
|
motifs, err := ManyRandomMotifSearches(dna, k_in, t_in, n) |
||||||
|
if err != nil { |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
|
||||||
|
gold_smm := NewScoredMotifMatrix() |
||||||
|
for _, gold_motif := range gold { |
||||||
|
gold_smm.AddMotif(gold_motif) |
||||||
|
} |
||||||
|
gold_smm.UpdateScore() |
||||||
|
gold_score := gold_smm.score |
||||||
|
|
||||||
|
lead_smm := NewScoredMotifMatrix() |
||||||
|
for _, lead_motif := range motifs { |
||||||
|
lead_smm.AddMotif(lead_motif) |
||||||
|
} |
||||||
|
lead_smm.UpdateScore() |
||||||
|
lead_score := lead_smm.score |
||||||
|
|
||||||
|
var passed_test bool |
||||||
|
pct_err := math.Abs(float64(gold_score-lead_score) / float64(gold_score)) |
||||||
|
if pct_err < 0.40 { |
||||||
|
passed_test = true |
||||||
|
} else { |
||||||
|
passed_test = false |
||||||
|
} |
||||||
|
|
||||||
|
if !passed_test { |
||||||
|
msg := fmt.Sprintf("Error testing RandomMotifSearchPseudocounts(): found incorrect motifs\n Gold: %s\n Computed: %s\n", |
||||||
|
strings.Join(gold_smm.motifs, " "), |
||||||
|
strings.Join(lead_smm.motifs, " ")) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Test out the gibbs sampler
|
||||||
|
func TestGibbsSampler(t *testing.T) { |
||||||
|
gold := []string{"TCTCGGGG", "CCAAGGTG", "TACAGGCG", "TTCAGGTG", "TCCACGTG"} |
||||||
|
k_in := 8 |
||||||
|
t_in := 5 |
||||||
|
n_in := 100 |
||||||
|
dna := []string{ |
||||||
|
"CGCCCCTCTCGGGGGTGTTCAGTAAACGGCCA", |
||||||
|
"GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG", |
||||||
|
"TAGTACCGAGACCGAAAGAAGTATACAGGCGT", |
||||||
|
"TAGATCAAGTTTCAGGTGCACGTCGGTGAACC", |
||||||
|
"AATCCACCAGCTCCACGTGCAATGTTGGCCTA", |
||||||
|
} |
||||||
|
|
||||||
|
n_starts := 20 |
||||||
|
motifs, err := ManyGibbsSamplers(dna, k_in, t_in, n_in, n_starts) |
||||||
|
if err != nil { |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
|
||||||
|
gold_smm := NewScoredMotifMatrix() |
||||||
|
for _, gold_motif := range gold { |
||||||
|
gold_smm.AddMotif(gold_motif) |
||||||
|
} |
||||||
|
gold_smm.UpdateScore() |
||||||
|
gold_score := gold_smm.score |
||||||
|
|
||||||
|
lead_smm := NewScoredMotifMatrix() |
||||||
|
for _, lead_motif := range motifs { |
||||||
|
lead_smm.AddMotif(lead_motif) |
||||||
|
} |
||||||
|
lead_smm.UpdateScore() |
||||||
|
lead_score := lead_smm.score |
||||||
|
|
||||||
|
var passed_test bool |
||||||
|
pct_err := math.Abs(float64(gold_score-lead_score) / float64(gold_score)) |
||||||
|
if pct_err < 0.40 { |
||||||
|
passed_test = true |
||||||
|
} else { |
||||||
|
passed_test = false |
||||||
|
} |
||||||
|
|
||||||
|
if !passed_test { |
||||||
|
msg := fmt.Sprintf("Error testing GibbsSampler(): found incorrect motifs\n Gold: %s\n Computed: %s\n", |
||||||
|
strings.Join(gold_smm.motifs, " "), |
||||||
|
strings.Join(lead_smm.motifs, " ")) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,151 @@ |
|||||||
|
package rosalind |
||||||
|
|
||||||
|
import ( |
||||||
|
"errors" |
||||||
|
"fmt" |
||||||
|
"strings" |
||||||
|
) |
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// BA3a
|
||||||
|
|
||||||
|
// Given an input DNA string, generate a set of all
|
||||||
|
// k-mers of length k in the input string.
|
||||||
|
func KmerComposition(input string, k int) ([]string, error) { |
||||||
|
// Get a histogram of all kmers in this string
|
||||||
|
hist, err := KmerHistogram(input, k) |
||||||
|
if err != nil { |
||||||
|
msg := fmt.Sprintf("Error: Function KmerHistogram(%s,%d) returned an error\n", |
||||||
|
input, k) |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// Populate the string slice of kmers
|
||||||
|
result := make([]string, len(hist)) |
||||||
|
i := 0 |
||||||
|
for k, _ := range hist { |
||||||
|
result[i] = k |
||||||
|
i++ |
||||||
|
} |
||||||
|
|
||||||
|
// Return the string slice
|
||||||
|
return result, nil |
||||||
|
} |
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// BA3b
|
||||||
|
|
||||||
|
// Given a genome path, i.e., a set of k-mers that
|
||||||
|
// overlap by some unknown number (up to k-1) of
|
||||||
|
// characters each, assemble the paths into a
|
||||||
|
// single string containing the genome.
|
||||||
|
func ReconstructGenomeFromPath(contigs []string) (string, error) { |
||||||
|
|
||||||
|
pieces := []string{} |
||||||
|
for i := 0; i < len(contigs)-1; i++ { |
||||||
|
pattern1 := contigs[i] |
||||||
|
pattern2 := contigs[i+1] |
||||||
|
|
||||||
|
// Stride left-hand string and find where
|
||||||
|
// it lines up to right-hand string
|
||||||
|
overlap_index1 := -1 |
||||||
|
overlap_index2 := -1 |
||||||
|
for i := 0; i < len(pattern1); i++ { |
||||||
|
|
||||||
|
// Left-hand string: backwards-sliding window
|
||||||
|
start1 := i // sliding
|
||||||
|
end1 := len(pattern1) // fixed
|
||||||
|
slice1 := pattern1[start1:end1] |
||||||
|
|
||||||
|
// Right-hand string: fixed shrinking window
|
||||||
|
start2 := 0 // fixed
|
||||||
|
end2 := (end1 - start2) - i // sliding
|
||||||
|
slice2 := pattern2[start2:end2] |
||||||
|
|
||||||
|
if slice1 == slice2 { |
||||||
|
// Many Bothans died to discover this algorithm.
|
||||||
|
overlap_index1 = start1 |
||||||
|
overlap_index2 = end2 |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if overlap_index1 < 0 { |
||||||
|
msg := fmt.Sprintf("Error: ReconstructGenomeFromPath(): No overlap detected between %s and %s\n", |
||||||
|
pattern1, pattern2) |
||||||
|
return "", errors.New(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// Add on the prefix of the left-hand piece - that's the part
|
||||||
|
// that doesn't overlap with the next right-hand piece.
|
||||||
|
pieces = append(pieces, pattern1[:overlap_index1]) |
||||||
|
|
||||||
|
// Once we're on the last pair of pieces,
|
||||||
|
// include the suffix of the left-hand piece -
|
||||||
|
// the part that overlaps with the next right-hand piece -
|
||||||
|
// then add the suffix of the right-hand piece
|
||||||
|
// (the part that doesn't overlap with the previous
|
||||||
|
// left-hand piece).
|
||||||
|
if i == len(contigs)-2 { |
||||||
|
pieces = append(pieces, pattern1[overlap_index1:]+pattern2[overlap_index2:]) |
||||||
|
} |
||||||
|
} |
||||||
|
return strings.Join(pieces, ""), nil |
||||||
|
} |
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// BA3c
|
||||||
|
|
||||||
|
// Given a set of k-mers, construct an overlap graph
|
||||||
|
// where each k-mer is represented by a node, and each
|
||||||
|
// directed edge represents a pair of k-mers such that
|
||||||
|
// the suffix (k-1 chars) of the k-mer at the source of
|
||||||
|
// the edge overlaps with the prefix (k-1 chars) of the
|
||||||
|
// k-mer at the head of the edge.
|
||||||
|
func OverlapGraph(patterns []string) (DirGraph, error) { |
||||||
|
|
||||||
|
var g DirGraph |
||||||
|
|
||||||
|
// Add every k-mer as a node to the overlap graph
|
||||||
|
k := len(patterns[0]) |
||||||
|
for _, pattern := range patterns { |
||||||
|
n := Node{pattern} |
||||||
|
g.AddNode(&n) |
||||||
|
|
||||||
|
// Verify k-mers are all same length
|
||||||
|
if len(pattern) != k { |
||||||
|
msg := fmt.Sprintf("Error: kmer lengths do not match, k = %d but len(\"%s\") = %d\n", |
||||||
|
k, pattern, len(pattern)) |
||||||
|
return g, errors.New(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Iterate pairwise through the input patterns
|
||||||
|
// to determine which pairs should have edges
|
||||||
|
// and in which direction
|
||||||
|
for i, pattern1 := range patterns { |
||||||
|
for j, pattern2 := range patterns { |
||||||
|
if j > i { |
||||||
|
prefix1 := pattern1[:k-1] |
||||||
|
suffix1 := pattern1[1:] |
||||||
|
|
||||||
|
prefix2 := pattern2[:k-1] |
||||||
|
suffix2 := pattern2[1:] |
||||||
|
|
||||||
|
if suffix1 == prefix2 { |
||||||
|
// 1 -> 2
|
||||||
|
n1 := g.GetNode(pattern1) |
||||||
|
n2 := g.GetNode(pattern2) |
||||||
|
g.AddEdge(n1, n2) |
||||||
|
} else if suffix2 == prefix1 { |
||||||
|
// 2 -> 1
|
||||||
|
n2 := g.GetNode(pattern2) |
||||||
|
n1 := g.GetNode(pattern1) |
||||||
|
g.AddEdge(n2, n1) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return g, nil |
||||||
|
} |
@ -0,0 +1,270 @@ |
|||||||
|
package rosalind |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
"sort" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
"testing" |
||||||
|
) |
||||||
|
|
||||||
|
/////////////////////////////////
|
||||||
|
// BA3a Test
|
||||||
|
|
||||||
|
func TestKmerComposition(t *testing.T) { |
||||||
|
k := 5 |
||||||
|
input := "CAATCCAAC" |
||||||
|
gold := []string{"AATCC", "ATCCA", "CAATC", "CCAAC", "TCCAA"} |
||||||
|
|
||||||
|
results, err := KmerComposition(input, k) |
||||||
|
if err != nil { |
||||||
|
t.Error(fmt.Sprintf("Error: %v", err)) |
||||||
|
} |
||||||
|
|
||||||
|
// Sort before comparing
|
||||||
|
sort.Strings(gold) |
||||||
|
sort.Strings(results) |
||||||
|
|
||||||
|
if !EqualStringSlices(results, gold) { |
||||||
|
msg := fmt.Sprintf("Error testing KmerComposition()\ncomputed = %v\ngold = %v", |
||||||
|
results, gold) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func TestKmerCompositionFile(t *testing.T) { |
||||||
|
|
||||||
|
filename := "data/string_composition.txt" |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
// lines[0]: Input
|
||||||
|
k_str := lines[1] |
||||||
|
k, _ := strconv.Atoi(k_str) |
||||||
|
|
||||||
|
input := lines[2] |
||||||
|
|
||||||
|
// lines[3]: Output
|
||||||
|
// lines[4+]: gold standard answers
|
||||||
|
|
||||||
|
// Make space for DNA strings
|
||||||
|
iLstart := 4 |
||||||
|
iLend := len(lines) |
||||||
|
gold := make([]string, len(lines)-iLstart) |
||||||
|
|
||||||
|
// Two counters:
|
||||||
|
// one for the line index (iL),
|
||||||
|
// one for the array index (iA).
|
||||||
|
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 { |
||||||
|
gold[iA] = lines[iL] |
||||||
|
} |
||||||
|
|
||||||
|
results, err := KmerComposition(input, k) |
||||||
|
if err != nil { |
||||||
|
t.Error(fmt.Sprintf("Error: %v", err)) |
||||||
|
} |
||||||
|
|
||||||
|
// Check that lengths are equal
|
||||||
|
if len(results) != len(gold) { |
||||||
|
msg := "Error testing KmerComposition(): length of computed kmer composition does not match gold standard:" |
||||||
|
msg += fmt.Sprintf("len(computed) = %d, len(gold) = %d\n", len(results), len(gold)) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// Sort before comparing
|
||||||
|
sort.Strings(gold) |
||||||
|
sort.Strings(results) |
||||||
|
|
||||||
|
if !EqualStringSlices(results, gold) { |
||||||
|
msg := fmt.Sprintf("Error testing KmerComposition() from file %s:\ncomputed = %d\ngold = %d", |
||||||
|
filename, |
||||||
|
len(results), len(gold)) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/////////////////////////////////
|
||||||
|
// BA3b Test
|
||||||
|
|
||||||
|
func TestReconstructGenome(t *testing.T) { |
||||||
|
contigs := []string{"ACCGA", "CCGAA", "CGAAG", "GAAGC", "AAGCT"} |
||||||
|
gold := "ACCGAAGCT" |
||||||
|
|
||||||
|
results, err := ReconstructGenomeFromPath(contigs) |
||||||
|
if err != nil { |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
if results != gold { |
||||||
|
msg := fmt.Sprintf("Error testing ReconstructGenomeFromPath():\ninputs = %s\ncomputed = %s\ngold = %s", |
||||||
|
strings.Join(contigs, " "), results, gold) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func TestReconstructGenomeFile(t *testing.T) { |
||||||
|
|
||||||
|
filename := "data/genome_path_string.txt" |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
// lines[0]: Input
|
||||||
|
|
||||||
|
// Make space for DNA fragments
|
||||||
|
contigs := make([]string, len(lines)-3) |
||||||
|
iLstart := 1 |
||||||
|
iLend := len(lines) - 2 |
||||||
|
// Two counters:
|
||||||
|
// one for the line index (iL),
|
||||||
|
// one for the array index (iA).
|
||||||
|
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 { |
||||||
|
contigs[iA] = lines[iL] |
||||||
|
} |
||||||
|
|
||||||
|
// lines[-2]: Output
|
||||||
|
gold := lines[len(lines)-1] |
||||||
|
gold = strings.Trim(gold, " ") |
||||||
|
|
||||||
|
results, err := ReconstructGenomeFromPath(contigs) |
||||||
|
if err != nil { |
||||||
|
msg := "Error: ReconstructGenomeFromPath(): function returned an error" |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
|
||||||
|
if len(results) != len(gold) { |
||||||
|
msg := "Error testing ReconstructGenomeFromPath(): length of reconstructed genome does not match length of correct result\n" |
||||||
|
msg += fmt.Sprintf("len(computed) = %d, len(gold) = %d\n", len(results), len(gold)) |
||||||
|
t.Error(msg) |
||||||
|
|
||||||
|
} else if results != gold { |
||||||
|
msg := "Error testing ReconstructGenomeFromPath(): computed genome and correct genome do not match\n" |
||||||
|
for i := 0; i < len(results); i++ { |
||||||
|
if results[i] != gold[i] { |
||||||
|
msg += fmt.Sprintf("Difference at index i = %d: computed[%d] = %s, gold[%d] = %s\n", i, i, string(results[i]), i, string(gold[i])) |
||||||
|
} |
||||||
|
} |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/////////////////////////////////
|
||||||
|
// BA3c Test
|
||||||
|
|
||||||
|
func TestOverlapGraph(t *testing.T) { |
||||||
|
patterns := []string{"ATGCG", "GCATG", "CATGC", "AGGCA", "GGCAT"} |
||||||
|
|
||||||
|
g, err := OverlapGraph(patterns) |
||||||
|
if err != nil { |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
|
||||||
|
s := g.String() |
||||||
|
gold := "AGGCA -> GGCAT\nCATGC -> ATGCG\nGCATG -> CATGC\nGGCAT -> GCATG" |
||||||
|
|
||||||
|
if s != gold { |
||||||
|
msg := "Error testing OverlapGraph(): string representation of graphs don't match" |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/* |
||||||
|
func TestOverlapGraphFile(t *testing.T) { |
||||||
|
|
||||||
|
filename := "data/overlap_graph.txt" |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("ReadLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
// lines[0]: Input
|
||||||
|
|
||||||
|
// We have an unknown number of fragments
|
||||||
|
// and an unknown number of edges,
|
||||||
|
// but they are split by a line with
|
||||||
|
// "Output:"
|
||||||
|
|
||||||
|
contigs := []string{} |
||||||
|
gold_edges := []string{} |
||||||
|
var stop bool |
||||||
|
|
||||||
|
// Loop over the first section of the file,
|
||||||
|
// containing overlapping kmers
|
||||||
|
stop = false |
||||||
|
iL := 1 |
||||||
|
for stop == false { |
||||||
|
|
||||||
|
// Abort if we prematurely reach the
|
||||||
|
// end of the file
|
||||||
|
if iL >= len(lines) { |
||||||
|
msg := "Error: could not properly parse file, no line with 'Output:' found." |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// Get the line
|
||||||
|
line := lines[iL] |
||||||
|
|
||||||
|
// Break if we reached "Output:"
|
||||||
|
if "Output:" == strings.Trim(line, " ") { |
||||||
|
// step over this line
|
||||||
|
iL++ |
||||||
|
break |
||||||
|
} |
||||||
|
|
||||||
|
// Add line to list of contigs
|
||||||
|
contigs = append(contigs, strings.Trim(line, " ")) |
||||||
|
|
||||||
|
iL++ |
||||||
|
} |
||||||
|
|
||||||
|
// Loop over the second section of the file,
|
||||||
|
// containing overlapping kmer edges
|
||||||
|
stop = false |
||||||
|
for stop == false { |
||||||
|
|
||||||
|
// Break if we reach the end of the file
|
||||||
|
if iL == len(lines) { |
||||||
|
break |
||||||
|
} |
||||||
|
|
||||||
|
// Get the line
|
||||||
|
line := lines[iL] |
||||||
|
|
||||||
|
// Add line to list of edges
|
||||||
|
gold_edges = append(gold_edges, strings.Trim(line, " ")) |
||||||
|
|
||||||
|
iL++ |
||||||
|
} |
||||||
|
|
||||||
|
// Construct the graph
|
||||||
|
g, err := OverlapGraph(contigs) |
||||||
|
if err != nil { |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
|
||||||
|
// Get the edge list representation of the graph
|
||||||
|
computed_edges := strings.Split(g.String(), "\n") |
||||||
|
|
||||||
|
if !EqualStringSlices(computed_edges, gold_edges) { |
||||||
|
msg := fmt.Sprintf("Error testing OverlapGraph() with file %s: edge lists do not match\n", filename) |
||||||
|
msg += fmt.Sprintf("len(gold_edges) = %d\nlen(computed_edges) = %d\n", len(gold_edges), len(computed_edges)) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
*/ |
@ -0,0 +1,88 @@ |
|||||||
|
package rosalind |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"sort" |
||||||
|
"strings" |
||||||
|
"sync" |
||||||
|
) |
||||||
|
|
||||||
|
// Directed graph type
|
||||||
|
type DirGraph struct { |
||||||
|
nodes []*Node |
||||||
|
edges map[Node][]*Node |
||||||
|
lock sync.RWMutex |
||||||
|
} |
||||||
|
|
||||||
|
// Graph node
|
||||||
|
type Node struct { |
||||||
|
name string |
||||||
|
} |
||||||
|
|
||||||
|
// Convert a node to a string
|
||||||
|
func (n *Node) String() string { |
||||||
|
return fmt.Sprintf("%s", n.name) |
||||||
|
} |
||||||
|
|
||||||
|
// Add a node to the directed graph
|
||||||
|
func (g *DirGraph) AddNode(n *Node) { |
||||||
|
g.lock.Lock() |
||||||
|
g.nodes = append(g.nodes, n) |
||||||
|
g.lock.Unlock() |
||||||
|
} |
||||||
|
|
||||||
|
// Add a directed edge
|
||||||
|
func (g *DirGraph) AddEdge(n1, n2 *Node) { |
||||||
|
g.lock.Lock() |
||||||
|
if g.edges == nil { |
||||||
|
g.edges = make(map[Node][]*Node) |
||||||
|
} |
||||||
|
g.edges[*n1] = append(g.edges[*n1], n2) |
||||||
|
g.lock.Unlock() |
||||||
|
} |
||||||
|
|
||||||
|
// Get a total count of edges in the graph
|
||||||
|
func (g *DirGraph) EdgeCount() int { |
||||||
|
iC := 0 |
||||||
|
for _, targets := range g.edges { |
||||||
|
for i := 0; i < len(targets); i++ { |
||||||
|
iC++ |
||||||
|
} |
||||||
|
} |
||||||
|
return iC |
||||||
|
} |
||||||
|
|
||||||
|
// Get a node, given a label
|
||||||
|
func (g *DirGraph) GetNode(label string) *Node { |
||||||
|
for _, n := range g.nodes { |
||||||
|
if n.name == label { |
||||||
|
return n |
||||||
|
} |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// Return a sorted edge list representation of the graph
|
||||||
|
func (g *DirGraph) String() string { |
||||||
|
g.lock.RLock() |
||||||
|
|
||||||
|
// Keep it simple:
|
||||||
|
// iterate through set of edges in random order,
|
||||||
|
// and sort a bunch of strings at the end.
|
||||||
|
edge_strings := make([]string, g.EdgeCount()) |
||||||
|
iS := 0 |
||||||
|
iE := 0 |
||||||
|
for edge_src, edge_targets := range g.edges { |
||||||
|
for _, edge_target := range edge_targets { |
||||||
|
edge_string := edge_src.name + " -> " + edge_target.name |
||||||
|
edge_strings[iE] = edge_string |
||||||
|
iE += 1 |
||||||
|
} |
||||||
|
iS += 1 |
||||||
|
} |
||||||
|
sort.Strings(edge_strings) |
||||||
|
result := strings.Join(edge_strings, "\n") |
||||||
|
|
||||||
|
g.lock.RUnlock() |
||||||
|
return result |
||||||
|
} |
@ -0,0 +1,53 @@ |
|||||||
|
package rosalind |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"testing" |
||||||
|
) |
||||||
|
|
||||||
|
func fillGraph() DirGraph { |
||||||
|
|
||||||
|
var g DirGraph |
||||||
|
|
||||||
|
n1a := Node{"AGGCA"} |
||||||
|
n1b := Node{"GGCAT"} |
||||||
|
|
||||||
|
g.AddNode(&n1a) |
||||||
|
g.AddNode(&n1b) |
||||||
|
g.AddEdge(&n1a, &n1b) |
||||||
|
|
||||||
|
n2a := Node{"CATGC"} |
||||||
|
n2b := Node{"ATGCG"} |
||||||
|
|
||||||
|
g.AddNode(&n2a) |
||||||
|
g.AddNode(&n2b) |
||||||
|
g.AddEdge(&n2a, &n2b) |
||||||
|
|
||||||
|
n3a := Node{"GCATG"} |
||||||
|
n3b := Node{"CATGC"} |
||||||
|
|
||||||
|
g.AddNode(&n3a) |
||||||
|
g.AddNode(&n3b) |
||||||
|
g.AddEdge(&n3a, &n3b) |
||||||
|
|
||||||
|
n4a := Node{"GGCAT"} |
||||||
|
n4b := Node{"GCATG"} |
||||||
|
|
||||||
|
g.AddNode(&n4a) |
||||||
|
g.AddNode(&n4b) |
||||||
|
g.AddEdge(&n4a, &n4b) |
||||||
|
|
||||||
|
return g |
||||||
|
} |
||||||
|
|
||||||
|
func TestDatastructureDirGraph(t *testing.T) { |
||||||
|
g := fillGraph() |
||||||
|
s := g.String() |
||||||
|
gold := "AGGCA -> GGCAT\nCATGC -> ATGCG\nGCATG -> CATGC\nGGCAT -> GCATG" |
||||||
|
if s != gold { |
||||||
|
msg := "Error: DirGraph data structure did not print properly\n" |
||||||
|
msg += fmt.Sprintf("computed:\n%v\n\n", s) |
||||||
|
msg += fmt.Sprintf("gold:\n%v\n\n", gold) |
||||||
|
t.Error(msg) |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,64 @@ |
|||||||
|
package rosalind |
||||||
|
|
||||||
|
import ( |
||||||
|
"errors" |
||||||
|
"fmt" |
||||||
|
) |
||||||
|
|
||||||
|
// Count the number of each type of nucleotide ACGT.
|
||||||
|
func CountNucleotides(dna string) (map[string]int, error) { |
||||||
|
|
||||||
|
if !CheckIsDNA(dna) { |
||||||
|
msg := fmt.Sprintf("Error: input string was not DNA: %s", dna) |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// Map to store counts for each nucleotide
|
||||||
|
result := make(map[string]int) |
||||||
|
|
||||||
|
// Get bitmask representations
|
||||||
|
bms, err := DNA2Bitmasks(dna) |
||||||
|
|
||||||
|
if err != nil { |
||||||
|
msg := fmt.Sprintf("Error: DNA2Bitmasks() threw an error for input %s", |
||||||
|
dna) |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
|
||||||
|
// Iterate over every possible nucleotide
|
||||||
|
bases := []string{"A", "C", "G", "T"} |
||||||
|
for _, base := range bases { |
||||||
|
|
||||||
|
// Bitmap for this nucleotide
|
||||||
|
bm := bms[base] |
||||||
|
|
||||||
|
// Frequency for this nucleotide
|
||||||
|
sum := 0 |
||||||
|
for j := 0; j < len(bm); j++ { |
||||||
|
if bm[j] { |
||||||
|
sum++ |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Store the result
|
||||||
|
result[base] = sum |
||||||
|
} |
||||||
|
|
||||||
|
return result, nil |
||||||
|
} |
||||||
|
|
||||||
|
// Count the number of each type of nucleotide ACGT
|
||||||
|
// and return as an array in order A, C, G, T.
|
||||||
|
func CountNucleotidesArray(dna string) ([]int, error) { |
||||||
|
result := make([]int, 4) |
||||||
|
mresult, err := CountNucleotides(dna) |
||||||
|
if err != nil { |
||||||
|
msg := fmt.Sprintf("Error: CountNucleotides() returned an error: %v", err) |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
result[0] = mresult["A"] |
||||||
|
result[1] = mresult["C"] |
||||||
|
result[2] = mresult["G"] |
||||||
|
result[3] = mresult["T"] |
||||||
|
return result, nil |
||||||
|
} |
@ -0,0 +1,21 @@ |
|||||||
|
package rosalind |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"testing" |
||||||
|
) |
||||||
|
|
||||||
|
func TestCountNucleotides(t *testing.T) { |
||||||
|
input := "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC" |
||||||
|
results, err := CountNucleotidesArray(input) |
||||||
|
if err != nil { |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
gold := []int{20, 12, 17, 21} |
||||||
|
|
||||||
|
if !EqualIntSlices(results, gold) { |
||||||
|
err := fmt.Sprintf("Error testing CountNucleotides(): input = %s\ncomputed = %v\ngold = %v\n", |
||||||
|
input, results, gold) |
||||||
|
t.Error(err) |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,195 @@ |
|||||||
|
package rosalind |
||||||
|
|
||||||
|
import ( |
||||||
|
"bufio" |
||||||
|
"errors" |
||||||
|
"fmt" |
||||||
|
"math" |
||||||
|
"os" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
) |
||||||
|
|
||||||
|
// ReadLines reads a whole file into memory
|
||||||
|
// and returns a slice of its lines.
|
||||||
|
func ReadLines(path string) ([]string, error) { |
||||||
|
file, err := os.Open(path) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
defer file.Close() |
||||||
|
|
||||||
|
var lines []string |
||||||
|
scanner := bufio.NewScanner(file) |
||||||
|
buf := make([]byte, 2) |
||||||
|
|
||||||
|
// This is awkward.
|
||||||
|
// Scanners aren't good for big files,
|
||||||
|
// just simple stuff.
|
||||||
|
BIGNUMBER := 90000 |
||||||
|
scanner.Buffer(buf, BIGNUMBER) |
||||||
|
for scanner.Scan() { |
||||||
|
lines = append(lines, scanner.Text()) |
||||||
|
} |
||||||
|
return lines, scanner.Err() |
||||||
|
} |
||||||
|
|
||||||
|
// WriteLines writes the lines to the given file.
|
||||||
|
func WriteLines(lines []string, path string) error { |
||||||
|
file, err := os.Create(path) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
defer file.Close() |
||||||
|
|
||||||
|
w := bufio.NewWriter(file) |
||||||
|
for _, line := range lines { |
||||||
|
fmt.Fprintln(w, line) |
||||||
|
} |
||||||
|
return w.Flush() |
||||||
|
} |
||||||
|
|
||||||
|
// ReadMatrix takes a set of lines containing a
|
||||||
|
// multidimensional array of floating point values,
|
||||||
|
// k elements per line, n lines, and returns a
|
||||||
|
// slice of slices with size slice[k][n]
|
||||||
|
// and with type float32.
|
||||||
|
func ReadMatrix32(lines []string, k int) ([][]float32, error) { |
||||||
|
|
||||||
|
separator := " " // change to , or whatever separator
|
||||||
|
nLines := len(lines) |
||||||
|
|
||||||
|
// Return a multidimensional slice of floats.
|
||||||
|
// To make multidimensional slice,
|
||||||
|
// make a slice, then loop and make more slices
|
||||||
|
result := make([][]float32, nLines) |
||||||
|
for i := 0; i < nLines; i++ { |
||||||
|
|
||||||
|
// Make space for this row of values
|
||||||
|
result[i] = make([]float32, k) |
||||||
|
|
||||||
|
// Split row string into tokens
|
||||||
|
tokens := strings.Split(lines[i], separator) |
||||||
|
if len(tokens) != k { |
||||||
|
msg := fmt.Sprintf("Error: length of line %d was %d, should be %d", i+1, len(tokens), k) |
||||||
|
return nil, errors.New(msg) |
||||||
|
} |
||||||
|
for j, token := range tokens { |
||||||
|
// Convert each token to a float64,
|
||||||
|
// then to a float32.
|
||||||
|
// https://golang.org/pkg/strconv/#ParseFloat
|
||||||
|
f, err := strconv.ParseFloat(token, 32) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
result[i][j] = float32(f) |
||||||
|
} |
||||||
|
} |
||||||
|
return result, nil |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
// Utility function: check if two string arrays/array slices
|
||||||
|
// are equal. This is necessary because of squirrely
|
||||||
|
// behavior when comparing arrays (of type [1]string)
|
||||||
|
// and slices (of type []string).
|
||||||
|
func EqualStringSlices(a, b []string) bool { |
||||||
|
if len(a) != len(b) { |
||||||
|
return false |
||||||
|
} |
||||||
|
for i := 0; i < len(a); i++ { |
||||||
|
if a[i] != b[i] { |
||||||
|
return false |
||||||
|
} |
||||||
|
} |
||||||
|
return true |
||||||
|
} |
||||||
|
|
||||||
|
// Utility function: check if two boolean arrays/array slices
|
||||||
|
// are equal. This is necessary because of squirrely
|
||||||
|
// behavior when comparing arrays (of type [1]bool)
|
||||||
|
// and slices (of type []bool).
|
||||||
|
func EqualBoolSlices(a, b []bool) bool { |
||||||
|
if len(a) != len(b) { |
||||||
|
return false |
||||||
|
} |
||||||
|
for i := 0; i < len(a); i++ { |
||||||
|
if a[i] != b[i] { |
||||||
|
return false |
||||||
|
} |
||||||
|
} |
||||||
|
return true |
||||||
|
} |
||||||
|
|
||||||
|
// Check if two int arrays/array slices are equal.
|
||||||
|
func EqualIntSlices(a, b []int) bool { |
||||||
|
if len(a) != len(b) { |
||||||
|
return false |
||||||
|
} |
||||||
|
for i := 0; i < len(a); i++ { |
||||||
|
if a[i] != b[i] { |
||||||
|
return false |
||||||
|
} |
||||||
|
} |
||||||
|
return true |
||||||
|
} |
||||||
|
|
||||||
|
// Check if two floats are equal, to within
|
||||||
|
// some small tolerance.
|
||||||
|
func TheseFloatsAreEqual(a, b float32) bool { |
||||||
|
if math.Abs(float64(b-a)) < 1.0e-6 { |
||||||
|
return true |
||||||
|
} else { |
||||||
|
return false |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Compute the factorial of an integer.
|
||||||
|
func Factorial(n int) int { |
||||||
|
if n < 2 { |
||||||
|
// base case
|
||||||
|
return 1 |
||||||
|
} else { |
||||||
|
// recursive case
|
||||||
|
return n * Factorial(n-1) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Returns value of Binomial Coefficient Binom(n, k).
|
||||||
|
func Binomial(n, k int) int { |
||||||
|
|
||||||
|
result := 1 |
||||||
|
|
||||||
|
// Since C(n, k) = C(n, n-k)
|
||||||
|
if k > (n - k) { |
||||||
|
k = n - k |
||||||
|
} |
||||||
|
|
||||||
|
// Calculate value of:
|
||||||
|
// ( n * (n-1) * ... * (n-k+1) )
|
||||||
|
// -----------------------------
|
||||||
|
// ( k * (k-1) * ... * 1 )
|
||||||
|
for i := 0; i < k; i++ { |
||||||
|
result *= n - i |
||||||
|
result /= i + 1 |
||||||
|
} |
||||||
|
|
||||||
|
return result |
||||||
|
} |
||||||
|
|
||||||
|
// Do this because otherwise we use
|
||||||
|
// math.Max() which requires us to do
|
||||||
|
// int -> float -> int
|
||||||
|
func minint(a, b int) int { |
||||||
|
if a < b { |
||||||
|
return a |
||||||
|
} |
||||||
|
return b |
||||||
|
} |
||||||
|
|
||||||
|
func maxint(a, b int) int { |
||||||
|
if a > b { |
||||||
|
return a |
||||||
|
} |
||||||
|
return b |
||||||
|
} |
@ -0,0 +1,39 @@ |
|||||||
|
package rosalind |
||||||
|
|
||||||
|
import "testing" |
||||||
|
|
||||||
|
func TestEqualStringSlices(t *testing.T) { |
||||||
|
a := []string{"peanut", "butter", "jelly", "time"} |
||||||
|
b := make([]string, 4) |
||||||
|
b[0] = "peanut" |
||||||
|
b[1] = "butter" |
||||||
|
b[2] = "jelly" |
||||||
|
b[3] = "time" |
||||||
|
if !EqualStringSlices(a, b) { |
||||||
|
msg := "Error: EqualStringSlices() is broken!" |
||||||
|
t.Fatal(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func TestEqualBoolSlices(t *testing.T) { |
||||||
|
a := []bool{true, true, true, false, false, false, true, true, true} |
||||||
|
b := make([]bool, 9) |
||||||
|
b[0], b[1], b[2] = true, true, true |
||||||
|
b[3], b[4], b[5] = false, false, false |
||||||
|
b[6], b[7], b[8] = true, true, true |
||||||
|
if !EqualBoolSlices(a, b) { |
||||||
|
msg := "Error: EqualBoolSlices() is broken!" |
||||||
|
t.Fatal(msg) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func TestEqualIntSlices(t *testing.T) { |
||||||
|
a := []int{3, 1, 4, 1, 5, 9} |
||||||
|
b := make([]int, 6) |
||||||
|
b[0], b[1], b[2] = 3, 1, 4 |
||||||
|
b[3], b[4], b[5] = 1, 5, 9 |
||||||
|
if !EqualIntSlices(a, b) { |
||||||
|
msg := "Error: EqualIntSlices() is broken!" |
||||||
|
t.Fatal(msg) |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,38 @@ |
|||||||
|
#!/bin/bash |
||||||
|
set -u |
||||||
|
set -x |
||||||
|
|
||||||
|
DOC_DIR=godoc |
||||||
|
PKG=github.com/charlesreid1/go-rosalind |
||||||
|
|
||||||
|
# Run a godoc server which we will scrape. Clobber the GOPATH to include |
||||||
|
# only our dependencies. |
||||||
|
GOPATH=$(pwd):$(pwd)/vendor godoc -http=localhost:6060 & |
||||||
|
DOC_PID=$! |
||||||
|
|
||||||
|
# Wait for the server to init |
||||||
|
while : |
||||||
|
do |
||||||
|
# version 1: |
||||||
|
curl -s "http://localhost:6060/pkg/$PKG" > /dev/null |
||||||
|
|
||||||
|
if [ $? -eq 0 ] # exit code is 0 if we connected |
||||||
|
then |
||||||
|
break |
||||||
|
fi |
||||||
|
done |
||||||
|
|
||||||
|
# Scrape the pkg directory for the API docs. Scrap lib for the CSS/JS. Ignore everything else. |
||||||
|
# The output is dumped to the directory "localhost:6060". |
||||||
|
wget -r -m -k -E -p -erobots=off --include-directories="/pkg,/lib" --exclude-directories="*" "http://localhost:6060/pkg/$PKG/" |
||||||
|
|
||||||
|
# Stop the godoc server |
||||||
|
kill -9 $DOC_PID |
||||||
|
|
||||||
|
# Delete the old directory or else mv will put the localhost dir into |
||||||
|
# the DOC_DIR if it already exists. |
||||||
|
rm -rf $DOC_DIR |
||||||
|
mv localhost\:6060 $DOC_DIR |
||||||
|
|
||||||
|
echo "Docs can be found in $DOC_DIR" |
||||||
|
echo "Replace /lib and /pkg in the gh-pages branch to update gh-pages" |
@ -0,0 +1,50 @@ |
|||||||
|
package rosalindstronghold |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Print problem description for Rosalind.info
|
||||||
|
// Problem DNA: Counting DNA Nucleotides
|
||||||
|
func DNADescription() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem DNA:", |
||||||
|
"Counting DNA Nucleotides", |
||||||
|
"", |
||||||
|
"Given a DNA string, return a count of each base pair as an array, in the order A, C, G, T", |
||||||
|
"", |
||||||
|
"URL: http://rosalind.info/problems/dna/", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Run the problem
|
||||||
|
func DNA(filename string) { |
||||||
|
|
||||||
|
DNADescription() |
||||||
|
|
||||||
|
// Read the contents of the input file
|
||||||
|
// into a single string
|
||||||
|
lines, err := rosa.ReadLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("readLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Input file contents
|
||||||
|
input := lines[0] |
||||||
|
result, _ := rosa.CountNucleotidesArray(input) |
||||||
|
|
||||||
|
fmt.Println("") |
||||||
|
fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
for _, r := range result { |
||||||
|
fmt.Printf("%d ", r) |
||||||
|
} |
||||||
|
fmt.Printf("\n\n") |
||||||
|
} |
@ -0,0 +1 @@ |
|||||||
|
AGCAGGTAGAAGGCGTGTTAGAGGCATCCCTCATGCCTAAATCTATGTAAGAGTCGTGCTGGTCCCCCGTGTGCACCATGTGTGCACACTAAGAGGTCCTACACAACTTAGGATGGACAGATCAGCGTAGAAAGGGGGGGTTACTCGATGGCTTAATCCGAGCTCTACATGTGCGTTACTCCCCCGACTGGTCGCTGCGAACCGGACACACGACAGGTGCGCCGGTACGGACGGGTTAACTTTGTTTATTCACAGCAACGCGCGACAGTCCTACCTATCTTTTCAAGGAAGTGACAGGGACTAGTTCGGCAGGACACAAACCAGAAAACCTGATCAGACGACATGTCACAGGCGATAAGATGGTGTCCGTGGCGATATCCATTAATACCAAAGTATCAGCGGCCGACGCATCTAGCGCTAAGCCGATCGAGCGCAACCATTACCAATATACTGCGGCGGGCTGGTATGTAGTGCAGACATTGTGGGTTCCACGGCTCATGTATCTCGAAACCCTTACTGTAAAATGTAGGTAATTGAGCGCAGATCCATACACGCGAGTGTCTCCGTCACAAATACTGATGGCTCTATTGGCGTCGGATATTATTAGTTTGTATATCCCGCCTTACCTATTCTACCTAAGGCGAGCATGATTGTGCTCGGCCCTGGAGTTAAGTCTGTGAGTTCCCGTGGATGACAACTGGACAGCGTCACGTCATTGTTGAACCGTCTATTCTGCTTTACGTCATGCAGGTTGACGGGGCCAGAGCTTCTCTGCCGCACCAGCTCTGTTCTATATGATTATTTTTTATGGCTAAGACCGATCTACCATTATTGTTGTATGCGACGCCAGTTCGCACATGCCGGGCCTATACTTCGCATAACTCGAGAGGGCATGACTTGCGAGCGGCGGACTCG |
@ -0,0 +1,34 @@ |
|||||||
|
import jinja2 |
||||||
|
import os |
||||||
|
|
||||||
|
def main(): |
||||||
|
|
||||||
|
# Jinja env |
||||||
|
env = jinja2.Environment(loader=jinja2.FileSystemLoader('.')) |
||||||
|
|
||||||
|
problems = [ |
||||||
|
{ |
||||||
|
'id': 'DNA', |
||||||
|
'title': 'Counting DNA Nucleotides', |
||||||
|
'description': 'Given a DNA string, return a count of each base pair as an array, in the order A, C, G, T', |
||||||
|
'url': 'http://rosalind.info/problems/dna/' |
||||||
|
}, |
||||||
|
] |
||||||
|
|
||||||
|
print("Writing problem boilerplate code") |
||||||
|
|
||||||
|
t = 'template.go.j2' |
||||||
|
for problem in problems: |
||||||
|
contents = env.get_template(t).render(**problem) |
||||||
|
fname = problem['id'].lower()+'.go' |
||||||
|
if not os.path.exists(fname): |
||||||
|
print("Writing to file %s..."%(fname)) |
||||||
|
with open(fname,'w') as f: |
||||||
|
f.write(contents) |
||||||
|
else: |
||||||
|
print("File %s already exists, skipping..."%(fname)) |
||||||
|
|
||||||
|
print("Done") |
||||||
|
|
||||||
|
if __name__=="__main__": |
||||||
|
main() |
@ -0,0 +1,7 @@ |
|||||||
|
package rosalindstronghold |
||||||
|
|
||||||
|
import "testing" |
||||||
|
|
||||||
|
func TestDNA(t *testing.T) { |
||||||
|
DNA("for_real/rosalind_dna.txt") |
||||||
|
} |
@ -0,0 +1,49 @@ |
|||||||
|
package rosalindstronghold |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"log" |
||||||
|
|
||||||
|
rosa "github.com/charlesreid1/go-rosalind/rosalind" |
||||||
|
) |
||||||
|
|
||||||
|
// Print problem description for Rosalind.info |
||||||
|
// Problem {{id}}: {{title}} |
||||||
|
func {{id}}Description() { |
||||||
|
description := []string{ |
||||||
|
"-----------------------------------------", |
||||||
|
"Rosalind: Problem {{id}}:", |
||||||
|
"{{title}}", |
||||||
|
"", |
||||||
|
"{{description}}", |
||||||
|
"", |
||||||
|
"URL: {{url}}", |
||||||
|
"", |
||||||
|
} |
||||||
|
for _, line := range description { |
||||||
|
fmt.Println(line) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Run the problem |
||||||
|
func {{id}}(filename string) { |
||||||
|
|
||||||
|
{{id}}Description() |
||||||
|
|
||||||
|
// Read the contents of the input file |
||||||
|
// into a single string |
||||||
|
lines, err := readLines(filename) |
||||||
|
if err != nil { |
||||||
|
log.Fatalf("readLines: %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
//// Input file contents |
||||||
|
//input := lines[0] |
||||||
|
//params := lines[1] |
||||||
|
//result := rosa.PatternCount(input, pattern) |
||||||
|
// |
||||||
|
//fmt.Println("") |
||||||
|
//fmt.Printf("Computed result from input file: %s\n", filename) |
||||||
|
//fmt.Println(result) |
||||||
|
} |
||||||
|
|
Loading…
Reference in new issue