Compare commits

...

31 Commits

Author SHA1 Message Date
Charles Reid 9092f98bf6
Add solution to BA2F (#17) 6 years ago
Charles Reid 2a580cbdc7
add a maxint and minint utility function (#16) 6 years ago
Charles Reid 4810449c86
Fix BA2e solution (#14) 6 years ago
Charles Reid 216872f46b
Add solution to BA2E (#12) 6 years ago
Charles Reid 49d7385a4c
add materials for BA2D (in progress) (#11) 6 years ago
Charles Reid 10b2a14f54
Add BA2c solution (#10) 6 years ago
Charles Reid a0c9f211bb
add chapter3 module, and rosalind functions for BA3a thru BA3c (#9) 6 years ago
Charles Reid 87eac56bc4
Add Stronghold module (#8) 6 years ago
Charles Reid 8543defb86
Continue work on Chapter 2 problems (#7) 6 years ago
Charles Reid 2a64f89e35
Add code for Chapter 2 (#6) 6 years ago
Charles Reid 8992e3e3ce Merge branch 'chapter1cleanup' 6 years ago
Charles Reid 61b538675d Merge branch 'master' of github.com:charlesreid1/go-rosalind into chapter1cleanup 6 years ago
Charles Reid e7b887b94a shield links 6 years ago
Charles Reid 385bd4ac58 add language shield. shields up. 6 years ago
Charles Reid cf22c98e2b
clean up chapter 1, consolidate tests (#5) 6 years ago
Charles Reid ef2172d04c add MIT license 6 years ago
Charles Reid b971d2d1f9 add license shield 6 years ago
Charles Reid 257bdd3f5b fix (remove) custom travis go import path 6 years ago
Charles Reid f70724b137 add CheckIsDNA to BA1A 6 years ago
Charles Reid ef412f12f0 tests for chapter01 working 6 years ago
Charles Reid 3a2b8d65af update readme to reflect chapter 1 cleanup 6 years ago
Charles Reid d8efc0df41 add chapter01 tests to travis file 6 years ago
Charles Reid 42f5d6f5fd clean up chapter 1, consolidate tests 6 years ago
Charles Reid 6a5d827185 yikes, spaces 6 years ago
Charles Reid d3782ad84d fix tab 6 years ago
Charles Reid d4cf42824e fix import statements in readme example 6 years ago
Charles Reid 8f24f5f75c fix typo in readme example 6 years ago
Charles Reid 60abfd5288 fix typo in .travis.yml 6 years ago
Charles Reid 1695a36aa4
Make this go get friendly (#3) 6 years ago
Charles Reid 7adf5b5418
Chapter1 part2 (#2) 6 years ago
Charles Reid e65a3d6726
Add solutions for Chapter 1 (#1) 6 years ago
  1. 19
      .gitignore
  2. 14
      .travis.yml
  3. 19
      LICENSE
  4. 126
      Readme.md
  5. 38
      ba1a.go
  6. 69
      chapter1/Readme.md
  7. 55
      chapter1/ba1a.go
  8. 59
      chapter1/ba1b.go
  9. 51
      chapter1/ba1c.go
  10. 61
      chapter1/ba1d.go
  11. 59
      chapter1/ba1e.go
  12. 61
      chapter1/ba1f.go
  13. 53
      chapter1/ba1g.go
  14. 66
      chapter1/ba1h.go
  15. 70
      chapter1/ba1i.go
  16. 71
      chapter1/ba1j.go
  17. 62
      chapter1/ba1k.go
  18. 51
      chapter1/ba1lima.go
  19. 62
      chapter1/ba1m.go
  20. 60
      chapter1/ba1n.go
  21. 20
      chapter1/chapter1_test.go
  22. 2
      chapter1/for_real/rosalind_ba1a.txt
  23. 2
      chapter1/for_real/rosalind_ba1b.txt
  24. 1
      chapter1/for_real/rosalind_ba1c.txt
  25. 2
      chapter1/for_real/rosalind_ba1d.txt
  26. 2
      chapter1/for_real/rosalind_ba1e.txt
  27. 1
      chapter1/for_real/rosalind_ba1f.txt
  28. 2
      chapter1/for_real/rosalind_ba1g.txt
  29. 3
      chapter1/for_real/rosalind_ba1h.txt
  30. 2
      chapter1/for_real/rosalind_ba1i.txt
  31. 2
      chapter1/for_real/rosalind_ba1j.txt
  32. 2
      chapter1/for_real/rosalind_ba1k.txt
  33. 1
      chapter1/for_real/rosalind_ba1l.txt
  34. 2
      chapter1/for_real/rosalind_ba1m.txt
  35. 2
      chapter1/for_real/rosalind_ba1n.txt
  36. 1
      chapter1/utils.go
  37. 69
      chapter2/Readme.md
  38. 67
      chapter2/ba2a.go
  39. 61
      chapter2/ba2b.go
  40. 54
      chapter2/ba2c.go
  41. 67
      chapter2/ba2d.go
  42. 67
      chapter2/ba2e.go
  43. 64
      chapter2/ba2f.go
  44. 65
      chapter2/ba2g.go
  45. 13
      chapter2/chapter2_test.go
  46. 11
      chapter2/for_real/rosalind_ba2a.txt
  47. 11
      chapter2/for_real/rosalind_ba2b.txt
  48. 6
      chapter2/for_real/rosalind_ba2c.txt
  49. 26
      chapter2/for_real/rosalind_ba2d.txt
  50. 26
      chapter2/for_real/rosalind_ba2e.txt
  51. 21
      chapter2/for_real/rosalind_ba2f.txt
  52. 21
      chapter2/for_real/rosalind_ba2g.txt
  53. 77
      chapter2/populate_templates.py
  54. 49
      chapter2/template.go.j2
  55. 60
      chapter3/ba3a.go
  56. 54
      chapter3/ba3b.go
  57. 54
      chapter3/ba3c.go
  58. 9
      chapter3/chapter3_test.go
  59. 2
      chapter3/for_real/rosalind_ba3a.txt
  60. 4976
      chapter3/for_real/rosalind_ba3b.txt
  61. 981
      chapter3/for_real/rosalind_ba3c.txt
  62. 49
      chapter3/populate_templates.py
  63. 49
      chapter3/template.go.j2
  64. 4
      rosalind/Readme.md
  65. 5
      rosalind/data/clump_finding.txt
  66. 5
      rosalind/data/frequent_words.txt
  67. 5
      rosalind/data/frequent_words_mismatch.txt
  68. 5
      rosalind/data/frequent_words_mismatch_complements.txt
  69. 4979
      rosalind/data/genome_path_string.txt
  70. 5
      rosalind/data/hamming_distance.txt
  71. 4
      rosalind/data/minimum_skew.txt
  72. 10
      rosalind/data/motif_enumeration.txt
  73. 2624
      rosalind/data/neighbors.txt
  74. 5
      rosalind/data/number_to_pattern.txt
  75. 19953
      rosalind/data/overlap_graph.txt
  76. 5
      rosalind/data/pattern_count.txt
  77. 5
      rosalind/data/pattern_matching.txt
  78. 4
      rosalind/data/pattern_to_number.txt
  79. 4
      rosalind/data/reverse_complement.txt
  80. 9517
      rosalind/data/string_composition.txt
  81. 1020
      rosalind/rosalind_ba1.go
  82. 1121
      rosalind/rosalind_ba1_test.go
  83. 974
      rosalind/rosalind_ba2.go
  84. 882
      rosalind/rosalind_ba2_test.go
  85. 151
      rosalind/rosalind_ba3.go
  86. 270
      rosalind/rosalind_ba3_test.go
  87. 88
      rosalind/rosalind_datastructures.go
  88. 53
      rosalind/rosalind_datastructures_test.go
  89. 64
      rosalind/rosalind_stronghold.go
  90. 21
      rosalind/rosalind_stronghold_test.go
  91. 195
      rosalind/utils.go
  92. 39
      rosalind/utils_test.go
  93. 1
      scripts/.gitignore
  94. 38
      scripts/gendoc.sh
  95. 50
      stronghold/dna.go
  96. 1
      stronghold/for_real/rosalind_dna.txt
  97. 34
      stronghold/populate_templates.py
  98. 7
      stronghold/stronghold_test.go
  99. 49
      stronghold/template.go.j2

19
.gitignore vendored

@ -0,0 +1,19 @@ @@ -0,0 +1,19 @@
golibby
queens
chapter01/chapter01
# golang:
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
# Test binary, build with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out

14
.travis.yml

@ -0,0 +1,14 @@ @@ -0,0 +1,14 @@
# https://docs.travis-ci.com/user/languages/go/
language: go
go:
- 1.10.x
- 1.11.x
- tip
install: true
script:
- go test -v ./rosalind/...
- go test -v ./chapter1/...
- go test -v ./chapter2/...
- go test -v ./chapter3/...

19
LICENSE

@ -0,0 +1,19 @@ @@ -0,0 +1,19 @@
Copyright 2019 Charles Reid
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

126
Readme.md

@ -0,0 +1,126 @@ @@ -0,0 +1,126 @@
# go-rosalind
`rosalind` is a Go (golang) package for solving bioinformatics problems.
[![travis](https://img.shields.io/travis/charlesreid1/go-rosalind.svg)](https://travis-ci.org/charlesreid1/go-rosalind.svg)
[![golang](https://img.shields.io/badge/language-golang-00ADD8.svg)](https://golang.org)
[![license](https://img.shields.io/github/license/charlesreid1/go-rosalind.svg)](https://github.com/charlesreid1/go-rosalind/blob/master/LICENSE)
[![godoc](https://godoc.org/github.com/charlesreid1/go-rosalind?status.svg)](http://godoc.org/github.com/charlesreid1/go-rosalind)
## Summary
This repo contains a Go (golang) library, `rosalind`, that implements
functionality for solving bioinformatics problems. This is mainly
useful for problems on Rosalind.info but is for general use as well.
Rosalind problems are grouped by chapter. Each problem has its own
function and is implemented in a library called `chapter1`, `chapter2`,
etc.
For example, Chapter 1 question A is implemented in package
`chapter1` as the function `BA1a( <input-file-name> )`.
This (specific) functionality wraps the (general purpose)
`rosalind` library.
## Quick Start
### Rosalind
The `rosalind` library can be installed using `go get`:
```
go get https://github.com/charlesreid1/go-rosalind/rosalind
```
The library can now be imported and its functions called directly.
Here is a brief example:
```
package main
import (
"fmt"
"github.com/charlesreid1/go-rosalind/rosalind"
)
func main() {
input := "AAAATGCGCTAGTAAAAGTCACTGAAAA"
k := 4
result, _ := rosalind.MostFrequentKmers(input, k)
fmt.Println(result)
}
```
### Problem Sets
Each set of problems is grouped into its own package. These
packages import the `rosalind` package, so it should be
available.
You can install the Chapter 1 problem set, for example, like so:
```
go get https://github.com/charlesreid1/go-rosalind/chapter1
```
This can now be imported and used in any Go program.
Try creating a `main.go` file in a temporary directory,
and run it with `go run main.go`:
```
package main
import (
rch1 "github.com/charlesreid1/go-rosalind/chapter1"
)
func main() {
filename := "rosalind_ba1a.txt"
rch1.BA1a(filename)
}
```
Assuming an input file `rosalind_ba1a.txt` is available,
you should see a problem description and the output of
the problem, which can be copied and pasted into
Rosalind.info:
```
$ go run main.go
-----------------------------------------
Rosalind: Problem BA1a:
Most Frequest k-mers
Given an input string and a length k,
report the k-mer or k-mers that occur
most frequently.
URL: http://rosalind.info/problems/ba1a/
Computed result from input file: for_real/rosalind_ba1a.txt
39
```
## Command Line Interface
TBA
## Organization
The repo contains the following directories:
* `rosalind/` - code and functions for the Rosalind library
* `chapter1/` - solutions to chapter 1 questions (utilizes `rosalind` library)
* `chapter2/` - solutions to chapter 2 questions
* `chapter3/` - solutions to chapter 3 questions
* `stronghold/` - solutions to questions from the stronghold section of Rosalind.info
See the Readme file in each respective directory for more info.

38
ba1a.go

@ -1,38 +0,0 @@ @@ -1,38 +0,0 @@
package main
import "fmt"
// Rosalind: Problem BA1A
//
// To run:
//
// $ go run ba1a.go
func pattern_count(input string, pattern string) int {
// Number of substring overlaps
var overlap = len(input) - len(pattern) + 1
// Count of occurrences
count:=0
// Loop over each substring overlap
for i:=0; i<overlap; i++ {
// Grab a slice of the full input
start:=i
end:=i+len(pattern)
var slice = input[start:end]
if slice==pattern {
count += 1
}
}
return count
}
func main() {
// Call the pattern_count function
fmt.Println("Number of occurrences of GCG in GCGCG:")
res := pattern_count("GCGCG","GCG")
fmt.Println(res)
}

69
chapter1/Readme.md

@ -0,0 +1,69 @@ @@ -0,0 +1,69 @@
# Rosalind Chapter 1
This folder contains the `chapter1` module, which
provides functions for each of the problems from
Chapter 1 of Rosalind.info's Bionformatics Textbook
track.
## How to run
* Each problem has its own function (example: `BA1a(...)`)
* Each problem expects an input file
(example input files in `for_real` directory,
or provide the input file downloaded
from Rosalind.info)
* Pass the input file name to the function, like this:
`BA1a("rosalind_ba1a.txt")`
## Quick Start
To use the functions in this package, start by installing it:
```
go get github.com/charlesreid1/go-rosalind/chapter1
```
Once you have installed the `chapter1` package,
you can import it, then call the function for whichever
Rosalind.info problem you want to solve from Chapter 1:
```
package main
import (
rch1 "github.com/charlesreid1/go-rosalind/chapter1"
)
func main() {
rch1.BA1a("rosalind_ba1a.txt")
}
```
## Examples
See `chapter1_test.go` for examples.
## Tests
To run tests of all Chapter 1 problems, run
`go test` from this directory:
```
go test -v
```
or, from the parent directory, the root of the
go-rosalind repository:
```
go test -v ./chapter1/...
```
Note that this solves every problem in
Chapter 1 and prints the solutions (so there
is a lot of spew). It does not check the
solutions (for that, see the tests in the
`rosalind` library.)

55
chapter1/ba1a.go

@ -0,0 +1,55 @@ @@ -0,0 +1,55 @@
package rosalindchapter1
import (
"fmt"
"log"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Rosalind: Problem BA1a: Most Frequent k-mers
// Describe the problem
func BA1aDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1a:",
"Most Frequest k-mers",
"",
"Given an input string and a length k,",
"report the k-mer or k-mers that occur",
"most frequently.",
"",
"URL: http://rosalind.info/problems/ba1a/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem,
// print the name of the input file,
// print the output/result
func BA1a(filename string) {
BA1aDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("rosa.ReadLines: %v", err)
}
// Input file contents
var input, pattern string
input = lines[0]
pattern = lines[1]
result := rosa.PatternCount(input, pattern)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(result)
}

59
chapter1/ba1b.go

@ -0,0 +1,59 @@ @@ -0,0 +1,59 @@
package rosalindchapter1
import (
"fmt"
"log"
"strconv"
"strings"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Rosalind: Problem BA1b: Most Frequent k-mers
// Describe the problem
func BA1bDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1b:",
"Most Frequest k-mers",
"",
"Given an input string and a length k,",
"report the k-mer or k-mers that occur",
"most frequently.",
"",
"URL: http://rosalind.info/problems/ba1b/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1b(filename string) {
BA1bDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("Error: rosa.ReadLines: %v", err)
}
// Input file contents
input := lines[0]
k_str := lines[1]
k, err := strconv.Atoi(k_str)
if err != nil {
log.Fatalf("Error: string to int conversion: %v", err)
}
mfks, _ := rosa.MostFrequentKmers(input, k)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(strings.Join(mfks, " "))
}

51
chapter1/ba1c.go

@ -0,0 +1,51 @@ @@ -0,0 +1,51 @@
package rosalindchapter1
import (
"fmt"
"log"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Rosalind: Problem BA1c: Find the Reverse Complement of a String
// Describe the problem
func BA1cDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1c:",
"Find the Reverse Complement of a String",
"",
"Given a DNA input string,",
"find the reverse complement",
"of the DNA string.",
"",
"URL: http://rosalind.info/problems/ba1c/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1c(filename string) {
BA1cDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("Error: rosa.ReadLines: %v", err)
}
// Input file contents
input := lines[0]
result, _ := rosa.ReverseComplement(input)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(result)
}

61
chapter1/ba1d.go

@ -0,0 +1,61 @@ @@ -0,0 +1,61 @@
package rosalindchapter1
import (
"fmt"
"log"
"strconv"
"strings"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Rosalind: Problem BA1d: Find all occurrences of pattern in string
// Describe the problem
func BA1dDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1d:",
"Find all occurrences of pattern in string",
"",
"Given a string input (genome) and a substring (pattern),",
"return all starting positions in the genome where the",
"pattern occurs in the genome.",
"",
"URL: http://rosalind.info/problems/ba1d/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1d(filename string) {
BA1dDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("Error: rosa.ReadLines: %v", err)
}
// Input file contents
pattern := lines[0]
genome := lines[1]
// Result is a slice of ints
locs, _ := rosa.FindOccurrences(pattern, genome)
// Convert to a slice of strings for easier printing
locs_str := make([]string, len(locs))
for i, j := range locs {
locs_str[i] = strconv.Itoa(j)
}
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(strings.Join(locs_str, " "))
}

59
chapter1/ba1e.go

@ -0,0 +1,59 @@ @@ -0,0 +1,59 @@
package rosalindchapter1
import (
"fmt"
"log"
"strconv"
"strings"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Rosalind: Problem BA1e: Find patterns forming clumps in a string
// Describe the problem
func BA1eDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1e:",
"Find patterns forming clumps in a string",
"",
"A clump is characterized by integers L and t",
"if there is an interval in the genome of length L",
"in which a given pattern occurs t or more times.",
"",
"URL: http://rosalind.info/problems/ba1e/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1e(filename string) {
BA1eDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("Error: rosa.ReadLines: %v", err)
}
// Input file contents
genome := lines[0]
params_str := lines[1]
params_slice := strings.Split(params_str, " ")
k, _ := strconv.Atoi(params_slice[0])
L, _ := strconv.Atoi(params_slice[1])
t, _ := strconv.Atoi(params_slice[2])
patterns, _ := rosa.FindClumps(genome, k, L, t)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(strings.Join(patterns, " "))
}

61
chapter1/ba1f.go

@ -0,0 +1,61 @@ @@ -0,0 +1,61 @@
package rosalindchapter1
import (
"fmt"
"log"
"strconv"
"strings"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Rosalind: Problem BA1f: Find positions in a gene that minimizing skew
// Describe the problem
func BA1fDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1f:",
"Find positions in a gene that minimize skew",
"",
"The skew of a genome is defined as the difference",
"between the number of C codons and the number of G",
"codons. Given a DNA string, this function should",
"compute the cumulative skew for each position in",
"the genome, and report the indices where the skew",
"value is minimzed.",
"",
"URL: http://rosalind.info/problems/ba1f/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1f(filename string) {
BA1fDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("Error: rosa.ReadLines: %v", err)
}
// Input file contents
genome := lines[0]
minskew, _ := rosa.MinSkewPositions(genome)
minskew_str := make([]string, len(minskew))
for i, j := range minskew {
minskew_str[i] = strconv.Itoa(j)
}
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(strings.Join(minskew_str, " "))
}

53
chapter1/ba1g.go

@ -0,0 +1,53 @@ @@ -0,0 +1,53 @@
package rosalindchapter1
import (
"fmt"
"log"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Rosalind: Problem BA1g: Find Hamming distance between two DNA strings
// Describe the problem
func BA1gDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1g:",
"Find Hamming distance between two DNA strings",
"",
"The Hamming distance between two strings HammingDistance(p,q)",
"is the number of characters different between the two",
"strands. This program computes the Hamming distance",
"between two strings.",
"",
"URL: http://rosalind.info/problems/ba1g/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1g(filename string) {
BA1gDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("Error: rosa.ReadLines: %v", err)
}
// Input file contents
p := lines[0]
q := lines[1]
hamm, _ := rosa.HammingDistance(p, q)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(hamm)
}

66
chapter1/ba1h.go

@ -0,0 +1,66 @@ @@ -0,0 +1,66 @@
package rosalindchapter1
import (
"fmt"
"log"
"strconv"
"strings"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Rosalind: Problem BA1h: Find approximate occurrences of pattern in string
// Describe the problem
func BA1hDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1h:",
"Find approximate occurrences of pattern in string",
"",
"Given a string Text and a string Pattern, and a maximum",
"Hamming distance d, return all locations in Text where",
"there is an approximate match with Pattern (i.e., a pattern",
"with a Hamming distance from Pattern of d or less).",
"",
"URL: http://rosalind.info/problems/ba1h/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1h(filename string) {
BA1hDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("Error: rosa.ReadLines: %v", err)
}
// Input file contents
pattern := lines[0]
text := lines[1]
d_str := lines[2]
d, _ := strconv.Atoi(d_str)
approx, _ := rosa.FindApproximateOccurrences(pattern, text, d)
approx_str := make([]string, len(approx))
for i, j := range approx {
approx_str[i] = strconv.Itoa(j)
if err != nil {
log.Fatalf("Error: conversion from int to string: %v", err)
}
}
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(strings.Join(approx_str, " "))
}

70
chapter1/ba1i.go

@ -0,0 +1,70 @@ @@ -0,0 +1,70 @@
package rosalindchapter1
import (
"fmt"
"log"
"strconv"
"strings"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Rosalind: Problem BA1i: Most Frequent Words with Mismatches
// Describe the problem
func BA1iDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1i:",
"Most Frequent Words with Mismatches",
"",
"Given an input string and a maximum allowable",
"Hamming distance d, report the most frequent",
"kmer that either occurs or whose Hamming neighbors",
"occur most frequently.",
"",
"URL: http://rosalind.info/problems/ba1i/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1i(filename string) {
BA1iDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("Error: rosa.ReadLines: %v", err)
}
// Input file contents
input := lines[0]
params := strings.Split(lines[1], " ")
if len(params) < 1 {
log.Fatalf("Error splitting second line: only found 0-1 tokens")
}
k_str, d_str := params[0], params[1]
k, err := strconv.Atoi(k_str)
if err != nil {
log.Fatalf("Error: string to int conversion for parameter k: %v", err)
}
d, err := strconv.Atoi(d_str)
if err != nil {
log.Fatalf("Error: string to int conversion for parameter d: %v", err)
}
mfks_mis, _ := rosa.MostFrequentKmersMismatches(input, k, d)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(strings.Join(mfks_mis, " "))
}

71
chapter1/ba1j.go

@ -0,0 +1,71 @@ @@ -0,0 +1,71 @@
package rosalindchapter1
import (
"fmt"
"log"
"strconv"
"strings"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Rosalind: Problem BA1j: Most Frequent Words with Mismatches and Reverse Complements
// Describe the problem
func BA1jDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1j:",
"Most Frequent Words with Mismatches and Reverse Complements",
"",
"Given an input string and a maximum allowable",
"Hamming distance d, report the most frequent",
"kmer that either occurs or whose Hamming neighbors",
"occur most frequently in the input string and in the",
"reverse complement of the input string.",
"",
"URL: http://rosalind.info/problems/ba1j/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1j(filename string) {
BA1jDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("Error: rosa.ReadLines: %v", err)
}
// Input file contents
input := lines[0]
params := strings.Split(lines[1], " ")
if len(params) < 1 {
log.Fatalf("Error splitting second line: only found 0-1 tokens")
}
k_str, d_str := params[0], params[1]
k, err := strconv.Atoi(k_str)
if err != nil {
log.Fatalf("Error: string to int conversion for parameter k: %v", err)
}
d, err := strconv.Atoi(d_str)
if err != nil {
log.Fatalf("Error: string to int conversion for parameter d: %v", err)
}
mfks_mis, _ := rosa.MostFrequentKmersMismatchesRevComp(input, k, d)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(strings.Join(mfks_mis, " "))
}

62
chapter1/ba1k.go

@ -0,0 +1,62 @@ @@ -0,0 +1,62 @@
package rosalindchapter1
import (
"fmt"
"log"
"strconv"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Rosalind: Problem BA1k: Generate Frequency Array
// Describe the problem
func BA1kDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1k:",
"Generate Frequency Array",
"",
"Given an integer k, generate the frequency array of",
"an input string. The frequency array is an array of",
"counts with one count per index, and integers mapped",
"to kmers.",
"",
"URL: http://rosalind.info/problems/ba1k/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1k(filename string) {
BA1kDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("Error: rosa.ReadLines: %v", err)
}
// Input file contents
input := lines[0]
k_str := lines[1]
k, err := strconv.Atoi(k_str)
if err != nil {
log.Fatalf("Error: string to int conversion for parameter k: %v", err)
}
arr, _ := rosa.FrequencyArray(input, k)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
for _, e := range arr {
fmt.Print(e, " ")
}
//fmt.Println(strings.Join(arr, " "))
}

51
chapter1/ba1lima.go

@ -0,0 +1,51 @@ @@ -0,0 +1,51 @@
package rosalindchapter1
import (
"fmt"
"log"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Rosalind: Problem BA1L: Pattern to Number
// Describe the problem
func BA1LDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1L:",
"Pattern to Number",
"",
"Given an input kmer of length k, convert it to",
"an integer corresponding to its lexicographic",
"order among kmers of length k.",
"",
"URL: http://rosalind.info/problems/ba1l/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1L(filename string) {
BA1LDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("Error: rosa.ReadLines: %v", err)
}
// Input file contents
input := lines[0]
number, _ := rosa.PatternToNumber(input)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(number)
}

62
chapter1/ba1m.go

@ -0,0 +1,62 @@ @@ -0,0 +1,62 @@
package rosalindchapter1
import (
"fmt"
"log"
"strconv"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Rosalind: Problem BA1m: Pattern to Number
// Describe the problem
func BA1mDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1m:",
"Number to Pattern",
"",
"Given an integer and a kmer length k, convert",
"the integer to its corresponding kmer.",
"",
"URL: http://rosalind.info/problems/ba1m/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1m(filename string) {
BA1mDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("Error: rosa.ReadLines: %v", err)
}
// Input file contents
number_str := lines[0]
k_str := lines[1]
number, err := strconv.Atoi(number_str)
if err != nil {
log.Fatalf("Error: string to int conversion for number: %v", err)
}
k, err := strconv.Atoi(k_str)
if err != nil {
log.Fatalf("Error: string to int conversion for k: %v", err)
}
result, _ := rosa.NumberToPattern(number, k)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(result)
}

60
chapter1/ba1n.go

@ -0,0 +1,60 @@ @@ -0,0 +1,60 @@
package rosalindchapter1
import (
"fmt"
"log"
"strconv"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Rosalind: Problem BA1n: Calculating d-Neighborhood of String
// Describe the problem
func BA1nDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1n:",
"Calculating d-Neighborhood of String",
"",
"Given an input string of DNA and a Hamming",
"distance d, compute all DNA strings that",
"are a Hamming distance of up to d away.",
"",
"URL: http://rosalind.info/problems/ba1n/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1n(filename string) {
BA1nDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("Error: rosa.ReadLines: %v", err)
}
// Input file contents
input := lines[0]
d_str := lines[1]
d, err := strconv.Atoi(d_str)
if err != nil {
log.Fatalf("Error: string to int conversion for d: %v", err)
}
result, _ := rosa.VisitHammingNeighbors(input, d)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
for _, j := range result {
fmt.Println(j)
}
}

20
chapter1/chapter1_test.go

@ -0,0 +1,20 @@ @@ -0,0 +1,20 @@
package rosalindchapter1
import "testing"
func TestChapter01(t *testing.T) {
BA1a("for_real/rosalind_ba1a.txt")
BA1b("for_real/rosalind_ba1b.txt")
BA1c("for_real/rosalind_ba1c.txt")
BA1d("for_real/rosalind_ba1d.txt")
BA1e("for_real/rosalind_ba1e.txt")
BA1f("for_real/rosalind_ba1f.txt")
BA1g("for_real/rosalind_ba1g.txt")
BA1h("for_real/rosalind_ba1h.txt")
BA1i("for_real/rosalind_ba1i.txt")
BA1j("for_real/rosalind_ba1j.txt")
BA1k("for_real/rosalind_ba1k.txt")
BA1L("for_real/rosalind_ba1l.txt")
BA1m("for_real/rosalind_ba1m.txt")
BA1n("for_real/rosalind_ba1n.txt")
}

2
chapter1/for_real/rosalind_ba1a.txt

@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
GTCCGGGGTCCGGGGTCCCGGGGTTACCCGGGGTCCCGGGGTCCGGGGTTACCCGGGGTAACCGGGGTCACCGGGGTCTCCGCCGGGGTGGCCGGGGTACCCGGGGTCCCGGGGTCCGGGGTCCGGGGTGCACGGGCCGGGGTGTACCGGGGTCCGGGGTCGGCGTCCGGGGTCCGGGGTCTGGAAGTTGAACTGACACCGGGGTTCCGGGGTCCGGGGTCCGGGGTCCGGGGTGGAACCCGGGGTGTCCCGGGGTCCGGGGTCCGGGGTCCGGGGTTCCGGGGTAGGCCGGGGTCCCGGGGTAGTGTGTGCCGGGGTCCTCGCCGGGGTAGCGCAAAACCGGGGTGCGGTAACTACCGGGGTCCGGGGTGCCGGGGTCCGGGGTTCCGGGGTCCGGGGTTCTCCGGGGTCCCGGGGTAGCCCGGGGTATCCGGGGTCCGGGGTCCGGGGTCCGGGGTACCGGGGTCAGGGCCGGGGTGACCGGGGTTCCGGGGTATCTGTTTATCCCGGGGTCCGGGGTCGGTAAACCGTCCGGGGTTTACCCCGGGGTCCGGGGTCTCGATCAAACCGGGGTTATGAGAATCCGGGGTCCGGGGTCCCGGGGTAGACCGGGGTACATCCCGGGGTGTCCGGGGTTACAAGCCGGGGTCCAAACGATTCCCGGGGTCCGGGGTTGCCCCGGGGTCCGGGGTGATGCACCGGGGTAAGCCGGGGTTGACGACCCCGGGGTCGCCGGGGTCTGCACTCCGGGGTTCCGGGGTAGCCGGGGTCAACCGGGGTAACCGGGGTTTGCCGGGGTCCCGGGGTTTGTCCGGGGTCCACCGGGGTCCGGGGTGCCGGGGTTCTACCGGGGTGCCGGGGTACACCGGGGTAGCCGGGGTATCCGGGGTACCGGGGTAAACCGGGGTGCCGGGGTCCGGGGTCCGGGGTTCCCGGGGTTTCTACCGGGGTGGGACCGGGGTCCGGGGTCCGGGGTATTAACCACCGGGGTGCGACCGGGGTGGCCGGGGTCCGGGGTATCCGGGGTACATCCGGGGTACGG
CCGGGGTCC

2
chapter1/for_real/rosalind_ba1b.txt

@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
AGTAGGTTCAGGGCGTTTAATAGCGAAAACAAATAATAGCAGTAGGTTGTACCACGTACCACTAATAGCGAAAACAAAAGTAGGTTCAGGGCGTTGTACCACGTACCACGAAAACAAATAATAGCTAATAGCGAAAACAAAGTACCACGAAAACAAATAATAGCGAAAACAAAGTACCACCAGGGCGTTTAATAGCGAAAACAAATAATAGCTAATAGCTAATAGCAGTAGGTTCAGGGCGTTGTACCACGAAAACAAAGAAAACAAAGTACCACTAATAGCCAGGGCGTTAGTAGGTTGAAAACAAACAGGGCGTTAGTAGGTTCAGGGCGTTGTACCACTAATAGCTAATAGCGAAAACAAATAATAGCTAATAGCTAATAGCCAGGGCGTTGTACCACGAAAACAAACAGGGCGTTTAATAGCAGTAGGTTGAAAACAAATAATAGCCAGGGCGTTGTACCACGAAAACAAAGAAAACAAAGTACCACCAGGGCGTTAGTAGGTTGTACCACGTACCACGAAAACAAAGTACCACGAAAACAAATAATAGCGTACCACGAAAACAAAAGTAGGTTTAATAGCGAAAACAAAAGTAGGTTAGTAGGTTCAGGGCGTTTAATAGCTAATAGCGAAAACAAAGTACCACCAGGGCGTTTAATAGCTAATAGCGTACCACCAGGGCGTTGTACCACGTACCACGAAAACAAAGAAAACAAAAGTAGGTTTAATAGCAGTAGGTTAGTAGGTTAGTAGGTTTAATAGCGAAAACAAACAGGGCGTTTAATAGCGTACCACGTACCACGAAAACAAAGAAAACAAAGTACCACCAGGGCGTTTAATAGCGAAAACAAACAGGGCGTTGAAAACAAA
12

1
chapter1/for_real/rosalind_ba1c.txt

File diff suppressed because one or more lines are too long

2
chapter1/for_real/rosalind_ba1d.txt

File diff suppressed because one or more lines are too long

2
chapter1/for_real/rosalind_ba1e.txt

File diff suppressed because one or more lines are too long

1
chapter1/for_real/rosalind_ba1f.txt

File diff suppressed because one or more lines are too long

2
chapter1/for_real/rosalind_ba1g.txt

@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
ATGACTAGTTATGCGACACGTGTTCCTTAAACAAACCGCTGATTGCGGAGGGATCATGTTGAAACGCAGTCAGTTGGCGCTTTACAAGAATTTAAGTGTCCCTCGGAGATGCTCCACTACACGCCATGGCGAAACGGTTCAGTCTCTTAGAAGAAGAAAGATATAGGAGTTGCGCCACCGTGATATAAGCACCGCAGTATCTGAAGGGAGCACAACTTGCTGCGAACAGACTGGTACGGTTACGTCGGGGCTTCAGGCATCGTTGGCGAGGTAGGAATCCTTATGTTAATTTTAAATCGAAGCAAAACAGAACTGTTGATCACTCATGTGTCGTTAACCGGAAGACTGCGGGTGCTCAGCCCCAATCGACGGCTGTTAGGAATGGCACACTACTGTATTTGTGACGACTAACTTGACATTCGAAGGTATCTGCGGTTGTTAAACGCCGATAATCGCCACCGCAGTTCTGAAAGGCTATATGTATCACGGTGATTTACGGCATTGTAAGCCCACTCAGAGTGCGTCGTAGGTTACGCGTTCTGAGTTGAAATAATCCAGTCGAACACGGTTGGTATCATGAATTCAGACTACCGTTTCTTGACTCCCGTCCTATACGAGTCTAGAGCGAACTTCGGGGTAAGAAATCACAATTAATCTCTTCCTTGTGTGATCCGCAAGGAAGCTGAGCTCAATTTGCAAGTACAGGTAGGTGGCAATCGAGAGCTACTAACACTCTTGTGTCGTTCGTAATTCATAAATAAAAAGACACGCCCTTATGATTGAAGCCTGAACTGCGGCAACGGTAGGTTTCCAAAGAGGATCGAGTCAGCGATACCCCCTGTACGCAGACAGATTATTACCCCCACTCTGCAATGTAGAAGTCTTAAAAACGCACTCTAGGCCAGTAACCAACCAGCTGGGTGGTGCGTTACCTAGTGCTATACAACAGTACCACCAGATTAGAAGCATGCCAGGTGTCTCGACACCTCCAATTCGTCATTTGGTGTGAGAAAAAGATATACCGCCAAGTTGCCATACCTGCAC
ATGGACTATTTTGCTATACGATTACTAGGAATAAGTTGAACAACCCTTGCTTTTCTTTTTAACACAGCCAGAGGCTCGGGATGGAACGCGTCATCTCGCGGACTCAGAGATGCCAGATGGTAGGCCTCTTCCAACGAGTAACTTACGATAATTTGATAGATTCTTGAACGTAGTGTGTCGACCTCCGTACCGGAAAATTTTCTTATCTCTAGTGAACCGTCGAGCTGTACTTTAGACCCCTGTGCGACGATAGGTCTCCTGCGTTAGGTATTTTACATATTCCGCTGGGACCCAAATTTTTCCCGCGAACGGGATAGAGGTAGTATCTAACTTCGTTTACACAACGTAACATCCCGCCATGGTCGTTACGGGCGTACCCGCGCGGCGAAGGGCGCGGACCCGCGAATCATAAACTAAGAAAAGAGTATGTTGAAGCGCACCCGCCATGTCGCTCGACATCTGCCTGGCATGCTATAATACCTGCTGAGCAGTACATCCACGGCGTCTATGAGCGCCACGTCAATCGGATCAGCCGGAATGCTGATTCTGTTGTGTCGCCGTTATGAATTTGGAGGTGGCACGCAAGGTTCCAGCCCTGTATAGTGTGTTAAAGTCCACTTTTCATCATTGCTTAATGTTTAATCGGGTCCTCACCCGAAACTGTGATTGCGTTCTTATGTAAAGCTCTCGTTAGCAGACACCAATCTATGAAACTTCCGCCTCGGGCAACTTTCATGAGGCACTGTAACATTTGTTGCATAGAGCCGTACTATGGCCACCGTATTTTATATGGCTGACGTAAAGAGCCTGTTAATGTGTAATTCGAAGGTCCCTTTAGATGAGTCTCATGCCAGACCCAGAAGAGTGACGGCTGTCTCGGAGTGGGTATACGTTAGCCCCTGCCAATAGTAAAGCGTACACCTTGTCTTCAAGACTGTCACTGACACAAATTCCCCGACCCATATTCCGTTCCGGGTTGGTCTACCTTACGGCGGGAATCCAGAGGCCTAATGCGCTGGTTATATACCACCGGATCCCGATATA

3
chapter1/for_real/rosalind_ba1h.txt

File diff suppressed because one or more lines are too long

2
chapter1/for_real/rosalind_ba1i.txt

@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
CAGTGTAAGTAACGGATTGAGGACGTAACGGACTAGTATTCGAGGACAGTGTAATTGAGGACGTAACGGAGTAACGGATCGAGGACTAGTATCAGTGTAATTGAGGACGTAACGGAGTAACGGACAGTGTAACAGTGTAACTAGTATGTAACGGACAGTGTAAGTAACGGAGTAACGGAGTAACGGATCGAGGATTGAGGACCTAGTATCTAGTATTCGAGGATCGAGGATTGAGGACCTAGTATCTAGTATGTAACGGATTGAGGACTTGAGGACCTAGTATTCGAGGATCGAGGAGTAACGGACAGTGTAACAGTGTAATCGAGGATCGAGGACAGTGTAATTGAGGACTCGAGGACTAGTATTTGAGGACTCGAGGATTGAGGACGTAACGGAGTAACGGATCGAGGACTAGTATGTAACGGAGTAACGGACAGTGTAACTAGTATTTGAGGACCAGTGTAACAGTGTAACAGTGTAACAGTGTAACAGTGTAACTAGTATGTAACGGAGTAACGGATTGAGGACGTAACGGAGTAACGGATCGAGGATTGAGGACCTAGTATTTGAGGACGTAACGGATTGAGGACCTAGTATCTAGTATCAGTGTAACTAGTATGTAACGGATCGAGGATCGAGGACAGTGTAATTGAGGACTTGAGGACCAGTGTAATCGAGGATTGAGGACTTGAGGACTTGAGGACTCGAGGACAGTGTAAGTAACGGAGTAACGGATCGAGGACAGTGTAATTGAGGACCTAGTATTTGAGGACCTAGTATGTAACGGATTGAGGACCAGTGTAACTAGTATCTAGTATCTAGTATCAGTGTAATTGAGGACTCGAGGATTGAGGAC
6 2

2
chapter1/for_real/rosalind_ba1j.txt

@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
TTACTCGCTGGCAGGTTGACGGAGAAATATTGGTGACGGAGAAGACGGAGAATGGGCATATATTGGTTGGCAGGTTTGGGCATTTACTCGCGACGGAGAATTACTCGCTGGGCATTTACTCGCTGGGCATTTACTCGCTGGCAGGTTTGGCAGGTTATATTGGTATATTGGTATATTGGTTGGGCATTTACTCGCGACGGAGAATGGCAGGTTGACGGAGAAGACGGAGAAATATTGGTTTACTCGCATATTGGTGACGGAGAAATATTGGTTTACTCGCTTACTCGCTGGGCATTGGGCATTGGCAGGTTGACGGAGAAGACGGAGAATTACTCGCATATTGGTTTACTCGCGACGGAGAATTACTCGCATATTGGTGACGGAGAAGACGGAGAATTACTCGCTGGCAGGTTTGGGCATTGGGCATTTACTCGCTGGCAGGTTTGGGCATTGGCAGGTTGACGGAGAAGACGGAGAATGGCAGGTTTGGCAGGTTTGGCAGGTTTGGCAGGTTTGGGCATGACGGAGAATTACTCGCTGGCAGGTTTTACTCGCTGGCAGGTTTTACTCGCATATTGGTTGGCAGGTTTTACTCGCTTACTCGCTTACTCGCGACGGAGAAGACGGAGAAATATTGGTATATTGGTATATTGGTTGGCAGGTTTGGCAGGTTTGGCAGGTTATATTGGTTTACTCGCTTACTCGCATATTGGTTGGCAGGTTTGGGCATTGGCAGGTTTGGCAGGTTGACGGAGAATGGCAGGTTGACGGAGAAGACGGAGAATGGGCATTGGGCATGACGGAGAATGGCAGGTT
5 3

2
chapter1/for_real/rosalind_ba1k.txt

@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
CAATGAGTGATATTGTTTGGTAGCAATCCATAGTTGAGGCCCTACGGAAGTTGCATCCGGGGCCCGTAGGACTCGCGGGCAAAAGATTGCTAAGCATTCTTGGTCACCATCGCAGTATTGCTCGTAGTCGGGTGGGTTTGCCGAACTGATAATGTGCCAGTCCCCGCGGAACCGGAATCAGGGCAACGGCTAGAGATACTCTCCGTGGGTCCTAAGTAGGAGGCTTGGGGCTGAGTGAGCAACCACTTACTCGAGTGTGTTGTTTTCTGTGCGTCCCCCGGGCGGTGTTCATTTAAGGATGACCGGGTGAGTAACCGAACAATTTTGTTGCCATGAAACGCGGCAATAACTCAATCTACCAGTACGGACAAATATAATGTTGGGCCCTTTTAGCTTAACGGACGTCGTCCCATTCTGACCTTAACTAAGACTATAAGGTAGGGGGTCAGATACGACACGGTCAGTAGGTGGATATACCGTGACAAATACCGGCACCTATGCTAATTGCGATTTGGAATGGAACGCGCCGAATACTTCGGATCATATCACCGTCCCTGTACTCGAAAGTTCTGCCACGAACAAGTCTCCTACTTGTGTCTTTTCTCACTGCGAAG
5

1
chapter1/for_real/rosalind_ba1l.txt

@ -0,0 +1 @@ @@ -0,0 +1 @@
TGCCGTATTGACGAACACCGAGCCCTAAT

2
chapter1/for_real/rosalind_ba1m.txt

@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
6003
9

2
chapter1/for_real/rosalind_ba1n.txt

@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
TGCCCTAG
3

1
chapter1/utils.go

@ -0,0 +1 @@ @@ -0,0 +1 @@
package rosalindchapter1

69
chapter2/Readme.md

@ -0,0 +1,69 @@ @@ -0,0 +1,69 @@
# Rosalind Chapter 2
This folder contains the `chapter2` module, which
provides functions for each of the problems from
Chapter 2 of Rosalind.info's Bionformatics Textbook
track.
## How to run
* Each problem has its own function (example: `BA2a(...)`)
* Each problem expects an input file
(example input files in `for_real` directory,
or provide the input file downloaded
from Rosalind.info)
* Pass the input file name to the function, like this:
`BA2a("rosalind_ba2a.txt")`
## Quick Start
To use the functions in this package, start by installing it:
```
go get github.com/charlesreid1/go-rosalind/chapter2
```
Once you have installed the `chapter2` package,
you can import it, then call the function for whichever
Rosalind.info problem you want to solve from Chapter 2:
```
package main
import (
rch1 "github.com/charlesreid1/go-rosalind/chapter2"
)
func main() {
rch1.BA2a("rosalind_ba2a.txt")
}
```
## Examples
See `chapter2_test.go` for examples.
## Tests
To run tests of all Chapter 2 problems, run
`go test` from this directory:
```
go test -v
```
or, from the parent directory, the root of the
go-rosalind repository:
```
go test -v ./chapter2/...
```
Note that this solves every problem in
Chapter 2 and prints the solutions (so there
is a lot of spew). It does not check the
solutions (for that, see the tests in the
`rosalind` library.)

67
chapter2/ba2a.go

@ -0,0 +1,67 @@ @@ -0,0 +1,67 @@
package rosalindchapter2
import (
"fmt"
"log"
"strconv"
"strings"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Print problem description for Rosalind.info
// Problem BA2a: Implement Motif Enumeration
func BA2aDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA2a:",
"Implement Motif Enumeration",
"",
"Given a collection of strings of DNA, find all motifs (kmers of length k and Hamming distance d from all DNA strings).",
"",
"URL: http://rosalind.info/problems/ba2a/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Run the problem
func BA2a(filename string) {
BA2aDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("ReadLines: %v", err)
}
// Input file contents
params := strings.Split(lines[0], " ")
k, _ := strconv.Atoi(params[0])
d, _ := strconv.Atoi(params[1])
// 1 line in the input file is for
// parameters/gold standard.
// The rest of the lines are DNA strings.
// Make space for DNA strings
dna := make([]string, len(lines)-1)
iLstart := 1
iLend := len(lines)
// Two counters:
// one for the line index (iL),
// one for the array index (iA).
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 {
dna[iA] = lines[iL]
}
results, _ := rosa.FindMotifs(dna, k, d)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(strings.Join(results, " "))
}

61
chapter2/ba2b.go

@ -0,0 +1,61 @@ @@ -0,0 +1,61 @@
package rosalindchapter2
import (
"fmt"
"log"
"strconv"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Print problem description for Rosalind.info
// Problem BA2b: Find a Median String
func BA2bDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA2b:",
"Find a Median String",
"",
"Given a kmer length k and a set of strings of DNA, find the kmer(s) that minimize the L1 norm of the distance from it to all other DNA strings.",
"",
"URL: http://rosalind.info/problems/ba2b/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Run the problem
func BA2b(filename string) {
BA2bDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("rosa.ReadLines: %v", err)
}
// Input file contents
k_str := lines[0]
k, _ := strconv.Atoi(k_str)
// Make space for DNA strings
dna := make([]string, len(lines)-1)
iLstart := 1
iLend := len(lines)
// Two counters:
// one for the line index (iL),
// one for the array index (iA).
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 {
dna[iA] = lines[iL]
}
results, _ := rosa.MedianString(dna, k)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(results)
}

54
chapter2/ba2c.go

@ -0,0 +1,54 @@ @@ -0,0 +1,54 @@
package rosalindchapter2
import (
"fmt"
"log"
"strconv"
"strings"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Print problem description for Rosalind.info
// Problem BA2c: Find a Profile-most Probable k-mer in a String
func BA2cDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA2c:",
"Find a Profile-most Probable k-mer in a String",
"",
"Given a profile matrix, find the most probable k-mer to generate the given DNA string.",
"",
"URL: http://rosalind.info/problems/ba2c/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Run the problem
func BA2c(filename string) {
BA2cDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("rosa.ReadLines: %v", err)
}
// Input file contents
dna := lines[0]
k_str := lines[1]
k, _ := strconv.Atoi(k_str)
// To make multidimensional slice,
// make a slice, then loop and make more slices
profile, _ := rosa.ReadMatrix32(lines[2:6], k)
// Find the most probable kmer
result, _ := rosa.ProfileMostProbableKmers(dna, k, profile)
fmt.Println(strings.Join(result, " "))
}

67
chapter2/ba2d.go

@ -0,0 +1,67 @@ @@ -0,0 +1,67 @@
package rosalindchapter2
import (
"fmt"
"log"
"strconv"
"strings"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Print problem description for Rosalind.info
// Problem BA2d: Implement GreedyMotifSearch
func BA2dDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA2d:",
"Implement GreedyMotifSearch",
"",
"Find a collection of motif strings using a greedy motif search. Return first-occurring profile-most probable kmer.",
"",
"URL: http://rosalind.info/problems/ba2d/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Run the problem
func BA2d(filename string) {
BA2dDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("rosa.ReadLines: %v", err)
}
//// Input file contents
params := strings.Split(lines[0], " ")
k, _ := strconv.Atoi(params[0])
t, _ := strconv.Atoi(params[1])
// 1 line in the input file is for
// parameters.
// The rest of the lines are DNA strings.
// Make space for DNA strings
dna := make([]string, len(lines)-1)
iLstart := 1
iLend := len(lines)
// Two counters:
// one for the line index (iL),
// one for the array index (iA).
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 {
dna[iA] = lines[iL]
}
result, _ := rosa.GreedyMotifSearchNoPseudocounts(dna, k, t)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(strings.Join(result, " "))
}

67
chapter2/ba2e.go

@ -0,0 +1,67 @@ @@ -0,0 +1,67 @@
package rosalindchapter2
import (
"fmt"
"log"
"strconv"
"strings"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Print problem description for Rosalind.info
// Problem BA2e: Implement GreedyMotifSearch with Pseudocounts
func BA2eDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA2e:",
"Implement GreedyMotifSearch with Pseudocounts",
"",
"Re-implement problem BA2d (greedy motif search) using pseudocounts, which avoid setting probabilities to an absolute value of zero.",
"",
"URL: http://rosalind.info/problems/ba2e/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Run the problem
func BA2e(filename string) {
BA2eDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("rosa.ReadLines: %v", err)
}
// Input file contents
params := strings.Split(lines[0], " ")
k, _ := strconv.Atoi(params[0])
t, _ := strconv.Atoi(params[1])
// 1 line in the input file is for
// parameters.
// The rest of the lines are DNA strings.
// Make space for DNA strings
dna := make([]string, len(lines)-1)
iLstart := 1
iLend := len(lines)
// Two counters:
// one for the line index (iL),
// one for the array index (iA).
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 {
dna[iA] = lines[iL]
}
result, _ := rosa.GreedyMotifSearchPseudocounts(dna, k, t)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(strings.Join(result, " "))
}

64
chapter2/ba2f.go

@ -0,0 +1,64 @@ @@ -0,0 +1,64 @@
package rosalindchapter2
import (
"fmt"
"log"
"strconv"
"strings"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Print problem description for Rosalind.info
// Problem BA2f: Implement RandomizedMotifSearch with Pseudocounts
func BA2fDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA2f:",
"Implement RandomizedMotifSearch with Pseudocounts",
"",
"Re-implement problem BA2e (greedy motif search with pseudocounts) but use a random, instead of greedy, algorithm to pick motif kmers from each DNA string.",
"",
"URL: http://rosalind.info/problems/ba2f/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Run the problem
func BA2f(filename string) {
BA2fDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("rosa.ReadLines: %v", err)
}
// Input file contents
params := strings.Split(lines[0], " ")
k, _ := strconv.Atoi(params[0])
t, _ := strconv.Atoi(params[1])
// Make space for DNA strings
dna := make([]string, len(lines)-1)
iLstart := 1
iLend := len(lines)
// Two counters:
// one for the line index (iL),
// one for the array index (iA).
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 {
dna[iA] = lines[iL]
}
n := 100
result, _ := rosa.ManyRandomMotifSearches(dna, k, t, n)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(strings.Join(result, "\n"))
}

65
chapter2/ba2g.go

@ -0,0 +1,65 @@ @@ -0,0 +1,65 @@
package rosalindchapter2
import (
"fmt"
"log"
"strconv"
"strings"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Print problem description for Rosalind.info
// Problem BA2g: Implement GibbsSampler
func BA2gDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA2g:",
"Implement GibbsSampler",
"",
"Generate probabilities of each kmer in a DNA string using its profile. Use these to assemble a list of probabilities. GibbsSampler uses this random number generator to generate a random k-mer.",
"",
"URL: http://rosalind.info/problems/ba2g/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Run the problem
func BA2g(filename string) {
BA2gDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("rosa.ReadLines: %v", err)
}
// Input file contents
params := strings.Split(lines[0], " ")
k, _ := strconv.Atoi(params[0])
t, _ := strconv.Atoi(params[1])
// Make space for DNA strings
dna := make([]string, len(lines)-1)
iLstart := 1
iLend := len(lines)
// Two counters:
// one for the line index (iL),
// one for the array index (iA).
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 {
dna[iA] = lines[iL]
}
n := 100
n_starts := 20
result, _ := rosa.ManyGibbsSamplers(dna, k, t, n, n_starts)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(strings.Join(result, "\n"))
}

13
chapter2/chapter2_test.go

@ -0,0 +1,13 @@ @@ -0,0 +1,13 @@
package rosalindchapter2
import "testing"
func TestChapter02(t *testing.T) {
//BA2a("for_real/rosalind_ba2a.txt")
//BA2b("for_real/rosalind_ba2b.txt")
//BA2c("for_real/rosalind_ba2c.txt")
//BA2d("for_real/rosalind_ba2d.txt")
//BA2e("for_real/rosalind_ba2e.txt")
//BA2f("for_real/rosalind_ba2f.txt")
BA2g("for_real/rosalind_ba2g.txt")
}

11
chapter2/for_real/rosalind_ba2a.txt

@ -0,0 +1,11 @@ @@ -0,0 +1,11 @@
5 1
GATTTGGGCCAAAGTCTGCGGCGAA
GATGTGCGTCAACCAGTCGGAGTCC
TCACACCGGCTCGGAGATTTTTTTT
GATCTACAACGCGTGACTATATGCT
TAAGTGATTTTGTGGCCTTTACTCG
CCATCTACCCGATGTTCGACCGCGT
GAGCGCGCTGCCTACATTTGGATCT
TCCGGGTTAGGATGTTGAAACAAAA
ATGGAGCCATGATATGTACACTTAG
GCATGGATCTTACTCCGACGTTATC

11
chapter2/for_real/rosalind_ba2b.txt

@ -0,0 +1,11 @@ @@ -0,0 +1,11 @@
6
CCCTAGTCTACCTGTTTGGAGCGGGGCCTGAATTTGACTGGC
GTCTTTACCGAGTTAGTCTGATGTAAGTACTGCTCCTCTACC
CCGACATTGCGCTCTACTCTGCGCACATAACTAAACGTTGCA
CCTCCGTCTACATAGAAGGAGTCTGCAACGCCCCCACTGAGG
ATCTTGCTCGTATCTACCGATAAGTAGCGAAAATCTAGCGTT
CGGGGTTACCTGGCAGTGTCTACTAGATCAGATTGCCCGGCT
TTAGTAAATGAATCTACGTCTCTGAGCGCGCGAATCAGGGTG
TGAGCACTCTGACTTAACTCTACTACTCTCCAATAAGCGCTC
TCACGTTCTACACTAGGTAAGTATGCATATTTGCATGAGTCT
TTTGAAGAAGGCTCTACAAATTTAAACCCAGACTCAGACACG

6
chapter2/for_real/rosalind_ba2c.txt

@ -0,0 +1,6 @@ @@ -0,0 +1,6 @@
GTCACAGCTGCATAACAAGTAAACTGAGAAATCCCCAGTTAGGCGGATTGACCATCGAACACACTTTCACTACTTGCGGATAAATCCTGTAGAACTAGACTTTATCTCGGCTGCGACAAGACAGGAGTTCATGCACCTGCTCTGTCCCTCGCAACAGTCTAGGGAGCAAGTAGGCGGCTTCTTAGCTAGTACCTGGGTAG
7
0.393 0.286 0.286 0.25 0.179 0.321 0.107
0.071 0.357 0.25 0.286 0.214 0.393 0.357
0.214 0.214 0.143 0.286 0.25 0.143 0.25
0.321 0.143 0.321 0.179 0.357 0.143 0.286

26
chapter2/for_real/rosalind_ba2d.txt

@ -0,0 +1,26 @@ @@ -0,0 +1,26 @@
12 25
AGATCCGGTTTTATTCAAGCGAATTAGTGGGAGTGCGAGCATGCGCCAGATTCGTCCGGGATTGTCGTTAGGACACTAAACAGAGTCAGGTGCAGTGAGGAACCGGTCCTCCTTGCTGTCCATCTTTGGCTATCAATCGCTTTGCGGGCGGCATGC
CGAGCATCCCTTTAACATAATTGCCCGTGGGTGTATTGCGTTTTTCCAACGCATAAGAGCATCTTATGTGTTTATGCGTGGAAGCCTATCACTTTGCATAGCGTTTGGCGATCACCTCCATGCCGCAAGGCCTAAGGCACACGGTTAATTGGGTCA
AACGAGGCGAACCCTGGAACAGGTACCATGCCTTTGCGATTCAGCTTCTATCCCCGTCTAATTAGACATCTCAGCGTTCCTCAAGCTAGCAGACTGCACAGGGCTTATCCCCGGATGGTCGCTACTTCTCTGTGCATATAGCACGTAATGCCACAT
CTTCCCGTCGAAATGCTACATAGACTGAGCGATACATGCGGTGCAGTTAGTTTGTTGACCTTATCCCACACTACAACGGCCTGTTACATTGCGCGTGTCTTATGCAAATCGATCGCTTTGTAACCGTAATCCACCATTTCTGGAAAGCATTTCCAG
ATTAAACATTCCAGCAACACGCGGGCGATCCTGAGGAATCACCGCAACTCACGTCTAGAGCCTGTCCGGCACTCGATTACTTTGTCTTTCGAACCCCGTTGGTTAGTGCACTCTGTCATATAGTGCTAGGCTGCCCTCTCAGACGCGCTCAGTCGT
TCGGTGTGTACACCTGGTAGAGGAGGAACCAATTAAACTTCGTGAACCCAAGGCGGCCCCCCATTCAGTTCGACTGGGACTCCGGCGCTTTTATGCGCGCGTAGAGGCAGTGACAAGGCTTCCGGTTAAGTCTTCTTTACTGACGCCATGCCTTTG
CGTATCTCTGTTTAGGCTCCCACCCCGATACCTTTGTTTCTCATATGAGCGCTTGTCTCGCCGCCAGATATCTGACTGGTCCGGTGATCAATGCTTAGGCGTTCAGGTTTACTACTGTCGCGACAAGACGGTCATACGCGCCAAAGGCTTCACAGC
AAGCGAAGTCCTTTGAATACTAAAACTCACCACTGGGCCGTCCCGACTATAAGTTGTCGCGAACAGAGTTTCTGTTACTTACCTCACTATCTTGCATCCATTCCTTTGGGTATTTGGGTTGTACACGCTATACGATCATGATTAGTCTCTATGCCT
TAACAACGATGCGGTTCCGTAATCGTAGTGAGAAAACCGGGTAGGAAGTAAGTGTGCATGAACGTTAGGCGCGTCTTGAAGCCAGATGGGTAGCTGGCTAATGTTTCTGCCATAGGACTGGATCACTTGTGCCCAACAGGAACAGCAATTCCTTTG
GCGATGACTTTGACGGCAGATCCGACCTCGGCTTAGTATGGTGGATGAACCTCCAAGTCACCGGGTCCTAGCATTATTTCGAATGGCCGAGGAGGCCATCATTAGGTAACGCCCAGAGTACATCCCCCGAACACCGAAGGTCGTTCGCGTCCGGCA
CCTAACGTACCATTTTTGACTGGAAGCCAAAGTTGACCGGCTTTTATAGCTTTTGACGGTCTCCTGTACTCAAGTAGATTTTTGTTAACAAACCTGGCATTGTCGTCATACAGTCAGGGAAGATACTTCCCTAGCTGCACCCACCCAATAGCTTTG
TGCTCTGACCAGACGATGGCTTTGCTGGAGGTTGAAGGCCATTTTTTTGTTCTAGTGCCCGACAGCTTCATGAGGGCGGTCGACTCTGAGGCTTGAGCAAAACCTAATATAAATGCTGAAGCTTAGCGCACGGCACGGAAATTGGGGGGAACTACT
CGGAAGCGTTTATGACGGCGACAGGAGTAACCATGAAGAGGAACAGGCGCGACGATGGAACCGCCTTACTACGTTCCGTCACGCCACCCGAGTGGAGTCGGTACCGTTAAGCTGACGGCGCGCTATTCTCTCCTGATTAGGTTACCTATGCCTTTG
GATGTAGCCATATAAATCATTCATCGTTATTGTGGGCTCTTGTCTACCGTATACACACACCCAATCCCTTTGGGCATTATTCGACTATCCCCTACCTCGCCTACTGCTGATACCACGTTTTAGGCTCCGTTTCATATATATCCCCCTAAACAAGGG
GATGGAGCGTTGGCGAACCGCTGAGCGAGCTATGAACAGCCTGTGAGACGCGGGGTAGGAGCCATCACTTTGGATCGTTCCCAGTCTTTCTATTATCAGTATCGATATGCGGCAACCAGTTTTCTTGCGCTCTGAACCATCCTATAGTAGAACTTC
TCCTATACGTAGCCTCGTCCGGCCTGACGTGTCCGGATTCATTTAGAGGCCATTACTTTGCTGTCAGTCGCTGCACTCATGTCGATTGTCGTGGTTGATTTAAAGACCCGCATAGCACAGTACCCTAACCCCAACTTCTCTCTGTTTAGACAGTGC
GAGCTTTGTATGGAGATTGCGCTTCCGATTGCTTTGAACATCGGACGCGCTTATAGAGACACTCGTGCTGGCAGACCGGTGCGCGATAAACGAATCTCGGCGTGCATTGGTGTTTGGGCTTCCGATGTCAAAGACCGCAGAACTGCGCCGGGGAAT
CGATCTTCAAAGGCTGGCTTGCATTAGGAGGACTGTGAAGAACACGCTTCTCTTATGACTGCACGGCGGTTGACTACGTCGCTTTGGGGCCACCCTTTCATTGCATGAACAATACCTTTGGTCTTTGACTGATCTTGAGGAGTCCACCGGATCACT
ACATTTCAAACACACTGTATGGGTTACCCTAATTCGCTGCGCATGCGCTGGGCCTCGAGCGAAGAATGTACGTGCTTTAGCTACTGTCAGTCTATCCAACGAAACTACGGCTTACGTGGTTACAGACCCCATGCTGGTTGGGAATCGATTTCTTTG
TATAAAGAAGTAGGTCCGTCAGATTCGAGGAATCCTCGATGTCCCTGGTACATGCAAAAGTTCAGAGCCGTAGAACTACTGTAGGCGATTGCTTTGCGCAAAAGGGATCAGTCGCCGTCGTAACTCAAATTTAGTCTTTTCACCAACGTGCAGGGA
TTTGAGTCATTATTAACGGTGTACGGAGTGACGCCCCCAATGCCTTTGTCCGGCTTGTACCGGATTATCCGCTTGAGTAACTTATTCTTATCTGAGATGTCGGTGGATATTGCCACTTAATCGAAACGATCGTACCTCGCCCGAGTCCTAGCAGCG
CGCACGTGAATGTAGGAGCCAATCCGGCCTCTTTAGTGCTCCAATCACTAAGGGTAGATTTGTCGCACCACCCGTATGTGATCCCTCAAAGCGAAATCATCTACACTCTCCATAGCTTTGAAATCCAATAGTACAACCTCGGCCGGGTAATCACCA
ACCATATCTTTGCGGACTTCCGAAAAGATCGAAAAAATAGCTTACTGACCCCCAACCTTGAGGTAAGAGCGGTCCCTCGGTCAGGCGGAACTTCCAGTGTCCGATTAGATCAGGCCGCATAGTGTGGGACTCCGATCAAGTGTATAATATGCAGGT
GCGGGGGGAGTTTGCTAGGACAGTCGGGCGGTAGTTTGTGTCTTAAGTAACTGCTCGAAGGCTAGAATGTGGGATCATAGCTTCAGCGGATTCCTAGCGATGGCTTTGAAACATGGACGAGTTACTTTTGGCGTTTTTGAGAGTTTATAAGGTGAG
CCAAACATGGTGGTCACTATTAATTGTCCTCCGCGTACCGAGATACGAGGGGAGTCCTCCCACAATTCGTCGCCGATTTCTTTGAGTCAGGGTATCATAGGGAGTGCTATTCCATAGCGATAACTGCTCCACAGAAGTTCATTAAGTATTTTTTCT

26
chapter2/for_real/rosalind_ba2e.txt

@ -0,0 +1,26 @@ @@ -0,0 +1,26 @@
12 25
GATGTGCGTCACAATCCCGCCCTCCAGCTGAACTAGCCAATACTTCCTCTTTCTGCTTCATGATTCACCCAATAGACACTAGGGCTTATACGGGGTGTGTACTTCCCACTGTGGGGCGAGCTGAGTCCATAGTCATGGGCCCGCCTCATCTAAACC
TCATAGCCGCCGCGCTGGTGGGTGCAAGTCTGCCGACTCCCACCTTATATGTAGCAGGTCACGTAGATAGGAGTGTGTTATATCCTCCGATAATCCCTAAATATAGGATGATACTGGTTCTGCCAGACTCTGTCTGTCTTGAAGTCGCTAGATGAA
GCGTATAGAAGGAATCCGACTTGGACGCCATTCAGATCAACTAACATAAACAGCAGTAAGCACTTAGCTAGTTGATACCGACGCATACAGAGCGGTCGCGAAAGTCGAGACCGTTCCTTGGTTCAGCAATTCGTGGCTGCCGTCCTCTGGTAAGCC
ATTGTATGGAACGGTGATGTGCTACTCGCTTAAACCTATAAGCGCACATTTGCAGTCCATGTGATACCTTCCAAGATGTTATTGGCGTGGAAATTAATAAGACGAGACTATTCGTCCTCTGGGTTAAGCTGGTATTTAGAAAGATTATCCAGCAAT
CGGGCCGCAATGGGCGCCATTACGTTTTTGGTTTAATCATTGCTGGATCTCCCAACCCACTCGCCTGCGGGACGTTCGCTCAATCTCCGCCCACGTCTGTGCTCTATCTCAACGATAGTCCTCGACTATAGCCTCATGTAAACCCGGGAGACCTTT
ACACAGCGCCTCGCAGGGGGTGTGAGGACCGACGTTGTAAAGGCCATACCGACTCTCAAGGCATTTTGGGAACCCCCACCTGTCTGTGCCATTGAACTCTACTAACCCTAATTCACCTATTGCCGAGCCTTCGGATTTGACGAATACGCATTGCGG
AGCTAGCCGCTTGCCAAACCGCTCTCAGAAGGAACGAGCCTAATTCTCCTCACGTAATCCGGCCAGTTCATAGGTGTGCAAAAACTTAACACACGTTCGGGTGGGGTTAGCAGCGCAGAATTGAACCCCTTCACGCCGACAGGGTGGCTAACTACA
ATTCATGATTGCTGAGTCATTGAAACACACCAGAAACGTCAGCGGCTAGACTATGTACCAGCGGAAGCTGGGATTCCTTTAGAAGCTGTTCCCATACTTGGTGGGTGATCCTATGATACTCTCGGTTAATCCGTGCGAATTTAGCTGTCCCTGAGT
TGGTTGTGATATTAACCAAGTGCGCTCCCCTAAACCTCGAACCTGGGCCATTTAGATGACTTAACGCCGCCTAGCAGCGCCGCCGTGGTCTTACAACTAGATAGAACTGCGAGGCATTCTGCGGGGCGTAAGTGTCGTCACAGCGATCATGGATTC
AACTTTCATGTCCACGCGCAACCTTCGGTTTCGTCCCTCTGCTAAACCAATTGGTCATTTCTATTGCTGGACCCACCCGCAGGCGGTCGAAAAGAAATACGCTGCCCCGAATACAGCTCCGATTCCTCTTGCCTCCCAGACACAGATGGTAACTTA
GGTCCGCGTTGTAGTACACGTAAAGCCTTATCGGGGGTCCTAAATCTAGTACCTGGCAGACTCACATAAACCGCTCTCCCCGCCTGGAATCTAATGTCTGTAACTGATCCTTTTCCTTCATATCAATACCTCTGTATCAGTGTCCAAAGGTCAGCT
AGATGAGGTGCAGTGGGGGGAGTGATGCTAAGTGGTCCCACTGAACTCATTGAACCCATACTCGGTGGGACTAACATCTACAGCCGCACGTCAGAACTACAAACAAGATCGGCACGAACGTTCGCGACTATTTACATCTTAGCGCTCTACTAATCC
CGCGTTGTTGCCCCCGGATTACTACTCATAGTCTGGACAACTGAGACGATAGCTCATCATCGCGGTAACATGCGTGATTACGGGGAAAATTATGGGGTGACCCAGATCCGCTTCGACGCGCTCACCTAAGCCAACACGCGGGTATCGGTCTAGTAT
TCTGACGCTTTCCCCACGAGAAGATCATCCCGGTTGTTACGATCCTTCCTCCAATAAACCTGCGCCCCATACTGAGCGGTACCCTTTCAAAGGTAGAGTGCTACCATGCGATGTATCTGAATACCATAAACGTGTGAGTAGGATTGTGGGGGTACA
GACCGAAAATCGATGGCCAGACCCATCGAAGTCCGCCCCAAGTTGGCTATGAGTTTAATGTGCGCGTTGCTGCTATGAGGATGTCAGGCTGGGCCGAGGGAATGAGAAAGTTTCTGACCTCGAATGAGAGTGAGTCCTCCCGCACTCCGCTAAGCC
TTAAGGTCTCTGACCGTTGTAAAAATTGTGCCCCCCCATCTTGTAATCGCTAACGAAAGGAAATCTTCTTGACAACCACAACGAGTAACTGCCGACTCGGTGGGTAGCCAGCGGTTGACGAACTGGACAATGCTCTAGTAAACCCCGGTCGACAGC
TAAGAAACCTTGTCGACTGGGAAACTCGCGTAAACCTTTGCGGAGGCTCCTTATCGTTACCTGACTTCCAGAGGGACTACCGTCGAGTCAAGGGGTCAGTTAGGTAAGCACACGATGCATGCAACCCTTGCTGACTTCTTCAATTTCCGTGACCGT
GACCATCAAGCCCGTAAATCGATGGTATCCTATGGTATCGTCACCACAGCCTCGTCACTTAAACCAGGCGTAAACCGAGTGAAACGACTCGGCTCGCAGATAGAACCCGTGTAGAAAACCATGTGAAACAAATGAATCACTTTACTCGGGTAAGCC
TCAGTGTTAGCCTCGGGTCAGAACGGTCTATTGAAGTTAATGACACTCGGGTAGTCGGCGCTCAGCTAATCCGCCGCTAAGCCACGAGCAGGCGACAAAGGAGAATCTGGCGACGGGTAGCACGATAGTTGGGGAGGCCTCGCCTAGGTTAGACAG
CTCGACTAATCCGAATTACACTGCACCTCCGAGTGGGTCTGGTTACTGCCGGTAGGGAGGCCAAATAGCTTGCCCAACTCGATAAGTCCTATGACGATCGGGCTGTCATAACAAGATTAATAGGGATGTCAAACCGTAGGGTGCACAGACAATTCC
ATCCAATTCTCGATGGGATTACACCCTAACCAGACTGGGGATCCATAAGTCTTTTTGCCTCTCGCGTAATCCGCGCATGACTGTATACACTTCCCCAACGGGGGGTCAGTGTTTTCTATTCCCGCAGCACGCCCCGTCTAGCCGACCCGAAGGTGC
GATGTCAGATTACACCATTTCTGGCGCGTTTTGAAAGATCGGACCTTCATATGGGTTCCTGCTCAGCGTGGACCAATGAGAATGGAGAGCCATGAATTAGCACTACGACTCTCCTAAACCATGATTCTGATTTTCTGATCTTCCCATCAGCCGTAC
GCTAATCCTCCTGAGTTACGAGCATCCATGCGATAAACAAGACCGATTCACATCCAAATTGGCCGTCTCTGTGATGCTGGGCCGGTGAAGTTGACTTCGTAGAGTTTATCTCCAGCCTGCAACCTGAAGGATCTCGACTAACCCTTAAGCGAGCTG
CCGCTCGAAGATCCCCTCTTGCAGCACGGTGCAGGTTCGGCAGGCTGAAGTCTACACCGCTTTGGTGACAAAGCGAATGACTCACTTAGGCCCGCGCATAGGGCAACCGTACATCACCGACAGAGTGTACAGCTCGGCTAAACCAGACATACGCTT
GGGGAGCGGTGTCGAAAGAGAAGGCATCTCTGAAGGAGTTAAAAACCACGATTTGAAAGTCCTCTGTATATGCTCGCGTAAGCCTTGCTTTTCCCACTGAGGCTACACAGGCGAGTCCAGCTAATGACGGCGTTCTCATCTCAATGTTGGCGACTG

21
chapter2/for_real/rosalind_ba2f.txt

@ -0,0 +1,21 @@ @@ -0,0 +1,21 @@
15 20
CCCCGAGTAATTCCCAGATATAACGTACTCAAATGTTGAAAACAAGTGACCACTGTATCCACGAGGGGTGTAACTCTTATCAATGGCATAAGGGCCACGAGGACTCTACCATAAGAGCACAGCGCCAGCTGACGAATGAGTATCTCTGACCAACCGATTGCGATCTGTTGTTGGCAGATAGGCCCGCAGGACCCCGAGTAATTCCC
AGATATAACGTACTCAAATGTTGAAAACAAGTGACCACTGTATCCACGAGGGGTGTAACTCTTATCAATGGCATAAGGGCCACGAGGACTCTACCATAAGAGCACAGCGCCAGCTGACGAATGAGAGATTTGACTACTAGTATCTCTGACCAACCGATTGCGATCTGTTGTTGGCAGATAGGCCCGCAGGACCCCGAGTAATTCCC
ATAGTGCGTACACCACAAGTGAGATATGATACTTCGACCCAGAGGTAAAGATAAGATCTAGTATTAACCCCGGAGCGAAGGGAGAATGGTACGATCTTGAACAGACTACTCATCGCCGATATGAGTCGAAGATAATGCTGTCATCAAAAGTGGCTTTGTTGAGGTTAACACTGTAGACTGGATGCAAGGCCGATGAATTATAAGTC
CGGGCTCGGAGAACAGACTAGGGGTACGAAAAGGTTCCGAAATTAGCACGCGCGCGTATAAAAAGATCGACGACCATGCCCGAGTTAGCTCACAGGAACAACTTTGGATAGTTAGATCCCAGCTGACAGTTCGAACTACGCAATCAGGGCTCCTCTGGATTCATACTCTAAGCATGAGAAGGCACAGAGCAAACAGCTACTTGGAT
CTTCAGTTAATGATGCCTCAGAGGTCGGCGTTGAACCGCGTAACAGACTACTATCTTTATGCGCAGTACAGTTGTAATATGACTAAGGCGCCCGCGAACCGTTCCAACGTGCCGAGAAGGTTGGCCTACAAGGAAGAAGCCGGTCATTCAGTCTTCAAGGCCAACGGTCCTGCACAGATGATTACGCACCGATCAGTATAATGTGT
CCATTGGGTGAGTTGATTCCATGATTCGTAGAAGCCACTACTAGGTGAGCTAGGCTCCTCTACAGTATAGAGAAGAGCCTTTAAGCCTATCCTGGAGCCTCTCACCCCACAATCGTAAGAACTTGGGTGCGTGAATGACTGAAGTATACATCACCTTAACTCATATTGTTGATCCGCTGTTGTCTGATTGGTAGGCTTGGTAGCGC
CGAGCGCTTTTTGCACACCGAACGTGTCAGTTCCACATGAGCGTGACAGAGTGCCCGCGCATGGGGTAATCCCGTATCAGAACAGTAAACTAGGTCATGTCCTCCATCGTCTTAGAAGGGGACAACCCCGCAGGGTATGCTAAGAAGTGGAGTAGAGAGTGTTGTGCTGAACACGCGTATCCGGCGGTTTCAAAGTCCAAGGTTGA
TGTCGTCCCTCTTCTTTTCACTCACATGTATGCCGCTAATACAGACCAACTAAAGAAGAACCAGCTACTAGTGCCATACCTCAAAAGCTAGAACTGTAATAACACACCGCTCGTTGTGGGCCGATTGTATATTAGTAAAGCAGCCAATATTTGTAACACGGCCGATGACCGTGCAAGTTTCCCTTGGAGGCAATGGCATCAAATTC
TGAGGTGATTGTTTATCCAGATTGGCTGTTTGTCTGAGAAGCTTGTAGCGAGATTCGACTACTAGCTATAGCGACTCAAAATGCTGCGCATTCCCAACTAAAGTAAAACGCAAGCTTAGATGCAGAATTGAGATCACTTGTCTGGATTCATTTTTAATGTGGCGCTACAGGTGCATGTCATGCCCGGTAGGTTGAGAGGCTCTCAA
ACGTGGAGCGATCCTACCGGTGTATGTGTACCATCCTGGCTGAAAGGCAGTCGACTGGCCACCGTTCGGGCGGCTGCGTTAGAGCTGACTACTCGCATAGGTCTAGAGCGATGACCCCTTGTTTGTCAGCGTATATCCTGGGTAATCGTTGTACCGCCTTTCAGGCCGATCCTAGGCAAGACTACTAGAACTAGGAGAACATTGCG
TAGACCGCCACCCAGGGTGCTTCCTTAGATCAATCCCGCGTGTAATTAAGCGTAGGGGAGACGCTCCTGAGAACAGACTCTCAGCTAAGCGTAGGAGGAGCATTTTTTTTGGATGAGGCCTCCTCTGTGGATACAAACGGCTCGGTCAACCAGCCACACGAATCAAGTGGAGGATATGTTGTAGTCCGCTCATTCCGAACTTTTAC
GATATTTACGCAGTCGGACACCCCTCTTCCTTAATGCTTCGTTAAAATATCGGTCCGTGCGAGTTCTGGTGGCCCGTAGGCCTCGCTACTTAGAGGTGGGCCCCCAAGGGCACGCATAATCGGCGCCTACCGCGGTAGATCAAAGTGAACGAAGATCTTGTAAAGATTCTATGCGGAGGCAGAACATCTTACTAGGTAAGAGATTC
GCTAAAGCTATCCAAGGATACCGGGCGGTGCTAAACTATTACAGTATAACATCAATCAATCACCGCTTCGCCTTCCTCGTAAGTCATACTGCATATGGGCTTCCACTATAGAGTAATCCCGTGAGTGGACGAGAAAGCACTACTAGACAACACGGACACCGTTACAGTCATAGTTGCGGCAGTGCTCATAAGTCCTTCACACAGGG
TCCCGCTATATCTGATCTTTGTGACTGCTGGGGTATAACTCAGCTGGCACAGAACAGACCCGTAGTCCAGAAAGACTTGAGTATAATGACGCCATGCTCGACCGGAGGTCAAGTACGGGAGAGGCTTACGCAGAATCCCCCAAGAGGCTCGTTAACTGACCGCAATCGATTTCAATTCCCTCCGAGCTCACCGAGCGCTGGTATAG
TTCCTGACTGCTTGAGCCAGCGCTATATTGCGCGCAAACCAGTCGCGTGACGTACCGTCATACCGAGTGATGTAAAGTTGATTATTGGGATCAGCTAATTCCTCGCGGTGTTAGTTCATCACTTTTGAGTCCGACAGACTACTAGTTCACTACTTCGAAGTTTGCTCTTACAAGCGAGGAACGGCCTGCCGAGTATACAGGGGCGT
CAGGCCTCGCGATTAACCTTATGCGGCCTCTCAGAGCCCCGTCTAGCGAAGGTAATATGAGAACAGACTACAGAGACTACGCTCTGCCCCCTAAGGACTGGGAATTTATGGCCCTATATCGCCCTTATTCGCCATAACTTCTAAGATTTGCTATTACCATTCTGAGGCAGTTTAACTAAATGGCTATCCAACCTATGAGGACTGAA
GATCCAGAACAACTTACTAGTCCGTAGGGCTGGACCTCTTCATGCCCGGGCGTCGGGCACATACGCGTATCAAAATGGGAGATGGCATTATCTACTTCTCCTGTGATTTTGAACGTAGTCACCCCACATCATACTTTTCACACCTCGTACTGGTGATTCCATTCTACCCAACGATACGTAATTGACCCCGCTTTTGATTGGAATTT
CAAGAGTCGTACGAGCCCTCCGTCATCAATGCTTGCGATTAGAGTTCACGGTAGAATCCACCAGAGCAGAAGAGAACGCCAGTAGCGACCAGAAAGCCTTTAGAAAAGGCAGACTACTAGAATGTTGTGTGTAAGTGTACCAAACATTGATTCGGACGGTTGTCGTGTTCGAACCAGGTGATTTGGTGAGGTTTCAGCGCCTAGTG
TTGTGATGATCTTCGTAAACATCCGGTGGGAGCCCCCCTCTCCTCGATGACTGTTTGACTATGATCCATTTACCTTGACTCGCAGGACGACAAGCCATTATTCATGCCGTGTGATAAGAACGCTCTACTAGGTTCTGTCCATGCCCAGCACAGATCAAGGGACCCGGCGGGCCCGGGTCAGAACTTTGGTCACCATTCGCAAATCA
AGAGGCACCTGGCGCCAAAGGCATTTAATGAACATGGCGAACTGCCAGACGAGCATGGTAGAACAGAAGCCTAGGACCATCCCGACATAACAACCACTATTTATAATTGAACTATCTTGGCACACACGCTATTGGCGTTGCACTGAGACCGTTCATCGCCTTCACTGTGACCATTCGCCTATAGACATATAACTAACTTGGCTTCA

21
chapter2/for_real/rosalind_ba2g.txt

@ -0,0 +1,21 @@ @@ -0,0 +1,21 @@
15 20 2000
AGTGGTTTAATCGGACGAGGCGTGTCCCTCAGCCCGATAACCATCCCGTCCTGTGTGCGACCGTTGAGCATCGTATTAGTTCCGTAGGATTTTGCGGTCGTCTATTTGATATAAAGTCAGGTATATATGGCCACAAGTTCGCGTGGACCGTTAGCGCACCAACACTGTAATATAACTGCCTTAAGGTAGCGACTCGCCAAGCGCAGGGGCAGCCCTGACAGTTTCCACGAAACTCAAGAGAGTATGTAGCGACAGTCCTTCGCAAGACAATCGTACGTGTCTACCGAAACTTAATTTCGTTAGTGGTTTAATCGGA
CGAGGCGTGTCCCTCAGCCCGATAACCATCCCGTCCTGTGTGCGACCGTTGAGCATCGTATTAGTTCCGTAGGATTTTGCGGTCGTCTATTTGATATAAAGTCAGGTATATATGGCCACAAGTTCGCGTGGACCGTTAGCGCACCAACACTGTAATATAACTGCCTTAAGGTAGCGACTCGCCAAGCGCAGGGGCATCGAAGAGTGTGCATGCCCTGACAGTTTCCACGAAACTCAAGAGAGTATGTAGCGACAGTCCTTCGCAAGACAATCGTACGTGTCTACCGAAACTTAATTTCGTTAGTGGTTTAATCGGA
TCGCTAAGTGGTGATACCGGCTGATAAGAAAGTAAGATTTCAGCATGACCCTGTTGATTCCACCCCTTCCTTTCATGGTGAGGCTTGTCTTTGCGGCGCCTCACGGTACCTGTGGACTGTACACACGAAGCACAACTTCCGAACTATTCGTTTGTAGACTATAAATCACCATGCTCATCAAGCTCAAAATTTCTCCTTACACCGACCGCGGTGGGAAAAAACGCAACGAAGCTCCAATTATCTCCAGTCTCTGCACGTGTAGAGATGGTGGAAAGCTAAGAGATGCCTTCGCCACATTAAGTCCCGCACAACGTTA
ATTGGCAAAACCGATAGGATCCCGCGACTATGACGTCGCTTTTCGCTAAGTGTGGGCTGACCCTCCTACAAATAAGTCTCGTTTTAACCCTGGCCATTGCTTACAACCGCCGAAGTCGCGCTTCAATCAAAGGTGCAGGGTTATAATAGACATACAATTAGGATGTTTGACCGACATGCCTTGTTAACTTTAATTGACGTTACAGATTGATTATGCGATCTCTTTATGTTCTCAATTTAATATACCTCCGCTGGTTCCTATTGGGAGCCTTCAACACATAATAAATCCTTGTACCTCTGATTGAGTCTCTTTGCCT
ATGTTCCTTAAGTAATTAATAGTACGTACACCGGTATTCGCTAGCCGTGCATCTTGACCCCCCCAAGGCGAACAGTTTGGATTTGCGAAGTCCCACGAAGGGGGCTTAAGGCTTGAGCCACATCCAGTTATGAAAGTATATCATCGGCACCCAGGAGGCTAGACAGGAGGTCAGAAAATTCCGCATTAGCGTCGTTGCGCAAGGCCGTCGCCGCCCGTGCTTCCAGGATTAGATCGCCTGCCAGACAGTCCGACTCCGTTGACAATAGAAGACAAGCTTATGCCCCGATTCACTCACCACCCAGACAGGCCCGCAT
TGCTCGGACTGATATCCGCGTATGCGTACGTAATGTCTAGCAGGCGGTCGAGCCATAGGCTTCAATAGGGGTGTTGCGACTAAGCGATTGGCACAGGAAGCATTGGAATTAACACCGCAGTCATCTAAGTGTGCATACGGGCATGTGGAGATTTTTCTACGAATGATGCGTCAACGACCCATGGAATGAGTTTTTAGTTGTTACCCATTTTTATAATAACGTGCGCGGTTTATCTTATCCCTTATAATGATCTCTAACATAGGCGTACCTGAAAAGAATGCATTAAGCCGCAAAGGAGCCCAATTCTCAGCCGTCG
AATCTAAGTTACTTCATGGTTCACGGTGCCACATCGACTGATCATCCATGCCTAGTGCTGTACTTAACCCATCATATTTCCTAAGTGCTTCGAACCCTTCGATCGGGGTGGTCATCTGTCCGTGACAAGGCTGCTAATAACCCACGCCGGTATAACACTGATTACGTTATACGCCTTATTCGGCAGTGACTGGCGTGCCACGTGCCAGGTAAAAAGAAATCTGGAACAGGGCTCCTCTTTCATAAGTGTGCATTTAAATAAGGGGAATAGTAAGGTCTCATTTGTAGTGCACGTGCCTTTCAATTATAGGCCCATA
CGCCATCCACGTTTTAGTAATGTACCCAGGCCAACTAACACATAGCAACCGTCAGTTTTCACAGTTGTCATCTTGCCGCCCGAATAAGCCCCGCTGACCAACGTCTGAGGACGTTCTCCGCGGAGATGAGGGTATAGCGTCGTCGTACCTGCATTACCGAACAACTCTCCATCTTTAGGGAATTACCCATCTTAGCTATAGGACACAAGAGTCGACAGTAATTGTGGACTGGCTTTTGCGGTCTCGGTTCAATCAAGGAAAACCCTCTTGCACTACAATCGCAGCGTGTGCATTCAGAGCCCTTATCATACCTCGA
AGGCGAGGTGAGGTCATGACCTGTCTAACCCCTTAGCGCCGGTGTAAATTCAATGCACGTAACGCTAGAGGCCTTAGGCCTCGATTCATCCTTGTGATCCATGTAATCGAACGACGCCTATCTAGTTCCGGAGCTTCGAACGAGGCCGATTAAAAACCCGTTGGCCGGTTGATCTGTGCTGCTCAAGATGTAACATCCCGAACTCAGCGCATCACGCCCCGCCAGAGCCTTTGGGAGCAGGGCCAGCGCTCGCTGGTTGTGCATGCGCTGCAATTCAAACACCTGTGGCACGAGTGCCCCAAAGGACCATCATCGA
TTGCAATACAGTGCCCCTTGTGCTGTTTGCTAGGCGAATACAGGCGACCGACACAAAGCCCGGCCCTATATCAGTACGAGGCAACATACTGCTCGCTAAACTAGCGTATAAATTTGGACACCATAATGCGCAATGCACGCGGTATAGGTGGTCTTGTGGTAAGAGGGATTTCTAAATATCGTATTCCCCAACGCAGGTATACGAAGCCCATGGTAATTTAAGCGTTTAAACAGCTAGAACTCGCTCGCTCTTTGTGCATGTACTAGGTCCTTGTGTAGAAGAGGCGCAATGCCTTGCTATAGACCTTTGTCCCGTC
GTGCGAAAATGCATAATTATAACTTTTCGCCTCGGGCGCGTCCACGGTATTACGAAGTCGAAGCGCGCATCCACTGACAATTCACAGACAGCAAAAATTGTTGCATTTAATCACGGGGACCCTATGGTGCAGTTGCGAGACCACATATGACCGGCTTTGTAGCACCGGCCCCAGTTTAATTCCCCTGACTAATGGTAGATACGACGACCGCCCCCCACAGACTCACTCACCTCCAGCACCAGCTCAACGGTGACCCCTTTCTGTTCTAATGCGGTATTCGCTAAGTGTCACTAAGGTTAGTGCGGCTTTTGCTGCA
GTAGTCCGTGGCATATGGAAGGGGAGCTTTACTCCCTGATCGGTGAGTGTGGAGACGTTTAGCGTACTGGTCACGGCAAGAGACGTTGTGAGTGTTGTATATGTTTCTTAGGAAAGCGGACGGCGTTACGCATGAGTAAGACGGCCTAAAGAATGAACCATGATTGATAATCTATTAATTGTTAGGTAAGGATAAGCAAAAAGGTGCTGCTGGGTCTTCAAGTAAGGGATATTCCCTCGCAGTGTGTGCATGACTCATATGGTTAACCCGTCTAAGCAAAAATCTCGACAGGAAGGTGAGTCGGCGCCATATGAGG
GGCGACAGCTGGTAACACGGCTATCGGGGCCGAATTGCCGACACTTGGCGCATCGCTGGAAGTGCTTCAGATAGTTATGACGGTGAAACACGCTCCGGCACAGCCTATAGTATGTTCGTAGCGTACAAAAGCTAGGCAGCCGTAATATCAGCGCTTAATGCTTTAATTGGCATGTGTCCTATTTGTGACGTTGATGTCGATGATAATCCGCACAGAACAACTCATGCATATTCGACGAGTGTGCATGATAGACACTGCGTTGTTGCCATGTATCTCCTGAAGCACGCACACAACGAAGGTCGCGTGCTTTTTCCGG
ATACTACGATCAAGCGAAAAGGGGACAATCGTTGGCAGGGTCACTAGGGCAGGGTCTTAGAGAATCAGTGCAACGTTTTAATTCGCTCACCTGTGCCGCTAAGTGTGCGATGTGTAATCTCCAATGGGAAATGAATCCTTCGGCTCGAGTAATGATTGCGTATGGTATTGGCCCAACGTAGGACTCAAGTCCCTGGTCGTAGCGCGATCTGTAATGTAAACTTACCATACGAGCAGGCTACGTTGAGGAGCGCTCGCGGAACGGTATAGAAAACGGTACGTCATATTGGCCCTTGTGACTCCTCCTCCGCTTGGAT
TTTTATTACATCTGTTCGCTAATCCTGCATGGTAAACAGTTACAGGAATTGCGATACTCCACTGGGCCACCACTACTTCACTAACTGGTAAAATGCCGGTCAGCTCATAGGTCCAAATAACGTTTATGGGTGTTAGCAATGTTAGCGATGCGATCTGTAAGATCCGAATCGTATCACAGCGCGCTCCTGCCAACGTCACCTGCTCCACCTAGCACTTGTCGATAACTCCCCCTCCAATCTACAAACACCAACTCGAAAATTGACACGCGCTTCTCGGCTGTTGGTGCACTCTGATGTTAATATGATGCCATGAGCC
TGCCAGATACCTACCCTCTATATTCCAAACGTGAGTGAACTAAGTTCGATTACGAACCCGTACGTGGCTGAGCACGGCTAGTACCGGCCCGATTGTACCGTTCATATTGATTAGAGTGACCGGAGCACACAATCATCGCCCCCTTAAGCTATAACTGTCCCCGGAGCCTGAGCCTTTGACAACTTCGATAGGTTAATAAAAGAGTCTCGCTAAGTAGCCATATGATGAGTGATAGGGAGGCCTAGACCTGGACAACCCCTCATTTACGTCCGAACTCGGAGTGAAGTGTAATGTGAGCTCTTAAAAGGAGCTGGAT
AGGCTCTCTGATTTAGCGGTGACCGCGTCCCGATTCTCACTCCTCAGAAGGTCTGGTAGCGCTACGGGGATGGGAACCCAGACACTCGAATCGAATCGGTATGACACTAGTACAAGGGGGCCTTTAACGCACGAGAATAACACAATTCCTTCGCATACATAGCTAACAGCAGACATAGGTCTTGATAAAAACTGTGCTGCTTCCTCAGAGTCGCTAAGAAAGCATGCCAGTGCACACTGGACATTACGCCGCAGTACAGCAATTCGCGTCTCAGATCAACCTGGGGAAATAAAACGTCTTTGCGTTAGCCCTTTGT
TGCCTTGATGTCCAGGCATAGGTCATGTACGGGCTACCGTCATGTCCATGTCAGAATCCGAATGATCCTCTGGATTCCGATCCCGGCAGAGGGTAACTGTGCGACCTCAGCTTCCTCATCCCGCTATCGCTCACGGCCGGTCCTAGTGCGGCATGGATAAGCTCATCCAGGATGATTTACCCAAACCCTTTCACGTGGTGGTGGGGCGCGACTGTCACGCAGGAGAGTCGCCCGAGCTGTGGGCAGGAATACTTTCCTAAAGTGTGCATGTTAGGAAGAGACGTTAACTGCGCCTCCCTATCCTATCTGAGTGGCG
ATAACGACCTGTGTGTTCATCGTATCTTCTCGAACACTTATGTAGATTCGCGTGGCTACGTTGTACATTCACTCCACTCAAGAGCGAAGGGTGACGTTTTCACTCCTCGCTGGAAAACCTAGAACGGGCTGTTTTTTACGATCAAAACAAAACCACTTGATAATTGTACTATTGTCTGGTAAGCTAAGTGTGCAATCAAGATCAACTCAATCCCCTGCCACCATAGTGTGGGCACCACGTAGAGAATTCGTCGAACAGATAACGCAAACTGACAGGGAGCTTAATGAACCATCAGCCGATCACCTTCGTGAGCATC
TCGCTACTGGTGCATCCTATCTATTGATATTGACAACCCGGGATTAGTGACAACCGATTTCAGACTAAACTAGTTAGTAAAGCATTTCTCTATCTCCGCCGAGTGGACGGTGATCTAAGCAAGTAGGTGCCAGGAGGCCCATAACCGCCAATGACTTTCATGATCTAATCGACGGTTCGTTTTGAGGTTGGGGTACGCTCATAACCTTTATGTTTTGGTACACGCCTGTCACCTGCGCCGTGGTATCTGAGACATTTGTCTCCTGGACTAGTTGATTCCAGTATTCACAGAACGCCGGGATACGTTTCCGTCAATA

77
chapter2/populate_templates.py

@ -0,0 +1,77 @@ @@ -0,0 +1,77 @@
import jinja2
import os
def main():
# Jinja env
env = jinja2.Environment(loader=jinja2.FileSystemLoader('.'))
problems = [
{
'chapter': '2',
'problem': 'a',
'title': 'Implement Motif Enumeration',
'description': 'Given a collection of strings of DNA, find all motifs (kmers of length k and Hamming distance d from all DNA strings).',
'url': 'http://rosalind.info/problems/ba2a/'
},
{
'chapter': '2',
'problem': 'b',
'title': 'Find a Median String',
'description': 'Given a kmer length k and a set of strings of DNA, find the kmer(s) that minimize the L1 norm of the distance from it to all other DNA strings.',
'url': 'http://rosalind.info/problems/ba2b/'
},
{
'chapter': '2',
'problem': 'c',
'title': 'Find a Profile-most Probable k-mer in a String',
'description': 'Given a profile matrix, find the most probable k-mer to generate the given DNA string.',
'url': 'http://rosalind.info/problems/ba2c/'
},
{
'chapter': '2',
'problem': 'd',
'title': 'Implement GreedyMotifSearch',
'description': 'Find a collection of motif strings using a greedy motif search. Return first-occurring profile-most probable kmer.',
'url': 'http://rosalind.info/problems/ba2d/'
},
{
'chapter': '2',
'problem': 'e',
'title': 'Implement GreedyMotifSearch with Pseudocounts',
'description': 'Re-implement problem BA2d (greedy motif search) using pseudocounts, which avoid setting probabilities to an absolute value of zero.',
'url': 'http://rosalind.info/problems/ba2e/'
},
{
'chapter': '2',
'problem': 'f',
'title': 'Implement RandomizedMotifSearch with Pseudocounts',
'description': 'Re-implement problem BA2e (greedy motif search with pseudocounts) but use a random, instead of greedy, algorithm to pick motif kmers from each DNA string.',
'url': 'http://rosalind.info/problems/ba2f/'
},
{
'chapter': '2',
'problem': 'g',
'title': 'Implement GibbsSampler',
'description': 'Generate probabilities of each kmer in a DNA string using its profile. Use these to assemble a list of probabilities. GibbsSampler uses this random number generator to generate a random k-mer.',
'url': 'http://rosalind.info/problems/ba2g/'
},
]
print("Writing problem boilerplate code")
t = 'template.go.j2'
for problem in problems:
contents = env.get_template(t).render(**problem)
fname = 'ba'+problem['chapter']+problem['problem']+'.go'
if not os.path.exists(fname):
print("Writing to file %s..."%(fname))
with open(fname,'w') as f:
f.write(contents)
else:
print("File %s already exists, skipping..."%(fname))
print("Done")
if __name__=="__main__":
main()

49
chapter2/template.go.j2

@ -0,0 +1,49 @@ @@ -0,0 +1,49 @@
package rosalindchapter{{chapter}}
import (
"fmt"
"log"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Print problem description for Rosalind.info
// Problem BA{{chapter}}{{problem}}: {{title}}
func BA{{chapter}}{{problem}}Description() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA{{chapter}}{{problem}}:",
"{{title}}",
"",
"{{description}}",
"",
"URL: {{url}}",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Run the problem
func BA{{chapter}}{{problem}}(filename string) {
BA{{chapter}}{{problem}}Description()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("rosa.ReadLines: %v", err)
}
//// Input file contents
//input := lines[0]
//params := lines[1]
//result := rosa.PatternCount(input, pattern)
//
//fmt.Println("")
//fmt.Printf("Computed result from input file: %s\n", filename)
//fmt.Println(result)
}

60
chapter3/ba3a.go

@ -0,0 +1,60 @@ @@ -0,0 +1,60 @@
package rosalindchapter3
import (
"fmt"
"log"
"strconv"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Print problem description for Rosalind.info
// Problem BA3a: Generate k-mer Composition of a String
func BA3aDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA3a:",
"Generate k-mer Composition of a String",
"",
"Given an input string, generate a list of all kmers that are in the input string.",
"",
"URL: http://rosalind.info/problems/ba3a/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Run the problem
func BA3a(filename string) {
BA3aDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("rosa.ReadLines: %v", err)
}
// Input file contents
k_str := lines[0]
k, err := strconv.Atoi(k_str)
if err != nil {
msg := fmt.Sprintf("Error: string to int conversion failed for %s\n",
k_str)
log.Fatalf(msg)
}
input := lines[1]
result, _ := rosa.KmerComposition(input, k)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
for _, kmer := range result {
fmt.Printf("%s\n", kmer)
}
fmt.Printf("\n")
}

54
chapter3/ba3b.go

@ -0,0 +1,54 @@ @@ -0,0 +1,54 @@
package rosalindchapter3
import (
"fmt"
"log"
"strings"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Print problem description for Rosalind.info
// Problem BA3b: Reconstruct string from genome path
func BA3bDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA3b:",
"Reconstruct string from genome path",
"",
"Reconstruct a string from its genome path, i.e., sequential fragments of overlapping DNA.",
"",
"URL: http://rosalind.info/problems/ba3b/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Run the problem
func BA3b(filename string) {
BA3bDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("rosa.ReadLines: %v", err)
}
// Trim each line and there are your contigs
for i, line := range lines {
lines[i] = strings.Trim(line, " ")
}
genome, err := rosa.ReconstructGenomeFromPath(lines)
if err != nil {
log.Fatalf("Error when calling ReconstructGenomeFromPath()")
}
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(genome)
}

54
chapter3/ba3c.go

@ -0,0 +1,54 @@ @@ -0,0 +1,54 @@
package rosalindchapter3
import (
"fmt"
"log"
"strings"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Print problem description for Rosalind.info
// Problem BA3c: Construct the overlap graph of a set of k-mers
func BA3cDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA3c:",
"Construct the overlap graph of a set of k-mers",
"",
"Given a set of overlapping k-mers, construct the overlap graph and print a sorted adjacency matrix",
"",
"URL: http://rosalind.info/problems/ba3c/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Run the problem
func BA3c(filename string) {
BA3cDescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("rosa.ReadLines: %v", err)
}
// Trim each line and there are your contigs
for i, line := range lines {
lines[i] = strings.Trim(line, " ")
}
g, err := rosa.OverlapGraph(lines)
if err != nil {
log.Fatalf("Error when calling ReconstructGenomeFromPath()")
}
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
fmt.Println(g.String())
}

9
chapter3/chapter3_test.go

@ -0,0 +1,9 @@ @@ -0,0 +1,9 @@
package rosalindchapter3
import "testing"
func TestChapter03(t *testing.T) {
BA3a("for_real/rosalind_ba3a.txt")
BA3b("for_real/rosalind_ba3b.txt")
BA3c("for_real/rosalind_ba3c.txt")
}

2
chapter3/for_real/rosalind_ba3a.txt

@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
50
GGGGGAAACTTACGGAGTACAAGAAGACCCGGCACAAAGAGAAAACACGTTGCTCGTTAGCTTAAGTTAAGACGTATCGGATATCTATCGTATCCTCGTAGTATTGCTAGCCACTTCACTGGACCAGGCTTACGTATTAGCCTTATGACCCCATTTCGTCTCCGCTGCTACAGCTGTGGAGTTGACGCGTCCGGTGGGCCCTCCGTTAGCAGGTCAGCTCATATTTTCGGCAAGAAATTACCCGGAACGGACCGAAAATGGGGTACAACATGCCCACCCACAACTTAGTACACAACGCTCAGCAAGTAGCTAACGACCGCTGCCGTCGTCAGTATTAGACGCACTTAACCGTACGGAATCCGTGAGTCCTGTTTCCGCCGATCGAATTACGCGCCCGGGTCGTGGGTCCAAAGGTGGCCGATCTCACGTACTGGTGAGTCGCGCGGTCACTTGGCTGTGAGGTCCACCGGCGGCCACAGTAATCTCTGGTGCACCCAGAATCGAGTCTGGATTGTGCACAAAGCTGCCCGCCTCTATTTCTCGGACCTGGCAGAACGCAACGGATGGGTTGAAGATTGGGCCGGTTCCGATGCCCCAAAGTACCCACATTTACTAGGGTGAGGCTGTTCTTTTGAGAGTAGAGACGAAAGCACCCCGACGTAACTGCTGCACACGGGGCTGCTCGGGATACTGTGCCGGAACTAGCGAGGCTCTACCCTCATCGGAAACCAGGCCTCATAATTCTTACAGCGTACTGTGTACTCCACAAGGAGCTGACCAGACATTCCACGTCCATGGATTCGGCTCATGCATACCTCCCGATCCACTCCTGAGCACATTGGATGGACACTTGAACGATGTCCTTAGCGCACGAGACATCAATTCGTGACGGTAGATTGCTCTCACCCTGATGCGGGTAAGTCACGTATTACCCGGCGTGCGGTATGTAGTAATACAGCTATCTACAACAAATGCAACCCGGCAGGTCTCCCATAGACCA

4976
chapter3/for_real/rosalind_ba3b.txt

File diff suppressed because it is too large Load Diff

981
chapter3/for_real/rosalind_ba3c.txt

@ -0,0 +1,981 @@ @@ -0,0 +1,981 @@
TGTACCTCGGGCTTTAAGGT
CGGAACGGGAATCCTGGGAG
ACTAGCTGCACTATCGGTTT
CGATCTCCAGGAAGATGTTG
AGCGGGAACTGCTGCAGGGC
GGGCTCCCGCGCCAGTGCCC
AATGCCTTTCTTTAGGCAGC
AGAAACCGAAATCGTGCCCC
TCATGGTTACAAAATTTACG
CCAGGTCAAAGGGAGCTATA
GCGCCCTGGTAAGTTACGCA
GACAATGGCCCGTGTGAATG
CGACTAGATAGATCCAATTG
ATTTTGGACCGCTGTCCTTC
CGACGTTCAGCGAAGAACGA
ACAGCCATCAAAGGAGAGCC
AGCAACAATAATTACGTCAC
AAATGGTCGAAACTAGCTGC
CAACCTGAATGTCGGGTCCG
AAAATTTTGGACCGCTGTCC
TGGTATCTCAAGAACCCTCT
GGAGGGCCAACCTGAATGTC
ACTTGTAACCCGTCGGAACG
GCCATTTGTGCCTTTTTGGT
GAACCCTCTCTCTACAGGCT
GTGCCTTTTTGGTGTGGAAC
AAGGTCCCACACAGCGTGCC
TCTCTCTACAGGCTAGTCAT
CTGCTGGAAACGACTAGATA
GCTTTAAGGTTAGGGGGTAA
AGCGAGGCCCTTGACACGAA
GATTGAATAGAGATATTCGA
CTCTCTCTACAGGCTAGTCA
CATCTTTTTGCCCATTATAA
CTGCAGGGCTCCCGCGCCAG
CATTAACACGACGTTCAGCG
TTCAGCGCCCTGGTAAGTTA
GAATGTGGAAATCGATCGTA
ATTCAGCGCCCTGGTAAGTT
TTTGGCCGAACCAGGTCAAA
ATCGGAGGAGTAGTTGGTAC
CGAAATCGTGCCCCATGTTC
GGTTGTGATCAGCGGCCGGG
GCAGCATTCAGCGCCCTGGT
GAGGTCCATTATTAACACGG
CCCATTATAATCCACATGGC
GTGATCAGCGGCCGGGACAA
GGGCCAACCTGAATGTCGGG
CGGAGGAGTAGTTGGTACCA
GCGAAGAACGAATACGACAG
GTCACCATTCCCAGCAAATG
CAACTTGTAACCCGTCGGAA
CCCGTGTGAATGTGGAAATC
ACAGAGCGAGGCCCTTGACA
TGTCCTTCCAGTGCTCGCGG
GCAATTACGAATGACCAGGC
AGAACGAATACGACAGAGCG
CTAGTCATGCGGGCTCGCAC
ATCCAGGAGAGATTGAATAG
GAGAGTCACATACACAATGC
TCCCCTGGAAGCGCGCTCCC
GTTGGTACCAACGCAAGAGG
AAATCGATCGTATACCGCAG
CAGCCATCAAAGGAGAGCCC
GTCCATTATTAACACGGGCT
TTAGGCAGCATTCAGCGCCC
GATCAGCGGCCGGGACAATG
ATTACGAATGACCAGGCTCA
GAGCCCCTTGCGCTGTATCG
TGGTCACTTGCTGCTGGAAA
AAAATTTACGTCCGAGCTAT
GGAGTAGTTGGTACCAACGC
GAAACGACTAGATAGATCCA
GTGCATCCAGGAGAGATTGA
GCCGAAATCACGCGCATTGT
TAGCAACAATAATTACGTCA
CTCAAGAACCCTCTCTCTAC
GAATCCTGGGAGGGCCAACC
TGCCCCATGTTCGCGTTTAA
CAATGCGGCTGTCGATCTCC
GGGAGCTATAACAGCCATCA
TACGAATGACCAGGCTCAAT
GACATCAAGGTCCCACACAG
CCTTCCAGTGCTCGCGGGGT
TTTCTTTAGGCAGCATTCAG
TCCCAGCAAATGCCTTTCTT
ACTGCTGCAGGGCTCCCGCG
CGTTCAGCGAAGAACGAATA
GGAACGGGAATCCTGGGAGG
GGCTAGTCATGCGGGCTCGC
GGTCGAAACTAGCTGCACTA
TCGGCAAAGTTAGAGCGGGA
CAGGTCAAAGGGAGCTATAA
ATAGCAACAATAATTACGTC
TAATTACGTCACCATTCCCA
GCAAAAATTTTGGACCGCTG
GGTCAAAGGGAGCTATAACA
TAGCTTCACAAATGGTCGAA
GGAACTGCTGCAGGGCTCCC
CACTTGCTGCTGGAAACGAC
CCTGAATGTCGGGTCCGAGT
AGGTTAGGGGGTAAGGCCAT
CCAGCAAATGCCTTTCTTTA
GGAGAGCCCCTTGCGCTGTA
CCCATTCTTCCCCTGGAAGC
CCATTCCCAGCAAATGCCTT
TCGAGAGTCACATACACAAT
AACCCTCTCTCTACAGGCTA
CATGTTCGCGTTTAAGATGA
GATCCAATTGGCCGAAATCA
GTAGTTGGTACCAACGCAAG
AACGGGAATCCTGGGAGGGC
TGGAAATCGATCGTATACCG
ACCGCTGTCCTTCCAGTGCT
CGAGCTATAGCAGCAAAAAT
AGCCCCTTGCGCTGTATCGT
GATATTCGAGAGTCACATAC
CGAGTTTGGCCGAACCAGGT
ACCAGGTCAAAGGGAGCTAT
CAAAGGTGGTTGTGATCAGC
TCAGCGGCCGGGACAATGGC
GGTAAGGCCATTTGTGCCTT
GAAGTTCGGCAAAGTTAGAG
AAAAATGGTATCTCAAGAAC
AAACGACTAGATAGATCCAA
ATCGTAGCTTCACAAATGGT
TGAGTGACATCAAGGTCCCA
CATTCTTCCCCTGGAAGCGC
GGTCCCACACAGCGTGCCAT
CAATTACGAATGACCAGGCT
CTTGTAACCCGTCGGAACGG
TGATCAGCGGCCGGGACAAT
ATCCACATGGCTATAGCAAC
GGCTGTCGATCTCCAGGAAG
CACATGGCTATAGCAACAAT
TTAAGATGAGTGACATCAAG
GAGATTGAATAGAGATATTC
CCTGGGAGGGCCAACCTGAA
GGGCTTTAAGGTTAGGGGGT
AGAGCGAGGCCCTTGACACG
CCCACACAGCGTGCCATTCA
TGGCTATAGCAACAATAATT
CCTGGTAAGTTACGCATTAA
GCCCTGGTAAGTTACGCATT
CCCGTCGGAACGGGAATCCT
ACGTTCAGCGAAGAACGAAT
AAGTTACGCATTAACACGAC
GCATCCAGGAGAGATTGAAT
GGTTTGGTCACTTGCTGCTG
CCCGCGCCAGTGCCCATCTC
CCGCTGTCCTTCCAGTGCTC
GTGTACCTCGGGCTTTAAGG
ACAAAGGTGGTTGTGATCAG
CAGTGCTCGCGGGGTGTACC
AATCGTGCCCCATGTTCGCG
CCTTTATCTCGTGCATCCAG
GGGAACTGCTGCAGGGCTCC
GAAATCGTGCCCCATGTTCG
TAACACGGGCTCATCTTTTT
AACACTAAACAAAGGTGGTT
ATGTTCGCGTTTAAGATGAG
GCTGCTGGAAACGACTAGAT
GTATCTCAAGAACCCTCTCT
CCAACGCAAGAGGTCCATTA
CACGGGCTCATCTTTTTGCC
TGCTGCTGGAAACGACTAGA
TTGTTGTCGTCATGGTTACA
CGCACGCGGTCAACTTGTAA
GCCGGGACAATGGCCCGTGT
GGAATCCTGGGAGGGCCAAC
TAGATAGATCCAATTGGCCG
AATGGCCCGTGTGAATGTGG
CTTCACAAATGGTCGAAACT
AGAGCGGGAACTGCTGCAGG
TCCAGTGCTCGCGGGGTGTA
ACACAGCGTGCCATTCATCC
TCTTTAGGCAGCATTCAGCG
CCAGTGCCCATCTCGGCGAA
TACAGGCTAGTCATGCGGGC
GTCGGAACGGGAATCCTGGG
TCGGAACGGGAATCCTGGGA
GAGTAGTTGGTACCAACGCA
CCCTGGAAGCGCGCTCCCGC
AACAATAATTACGTCACCAT
CTGAATGTCGGGTCCGAGTT
CGCGTTTAAGATGAGTGACA
TCGGGCTTTAAGGTTAGGGG
ATACCGCAGAAGTTCGGCAA
ACGAATACGACAGAGCGAGG
TGGTTACAAAATTTACGTCC
CGCATTAACACGACGTTCAG
GCGAAGAAACCGAAATCGTG
ACTAAACAAAGGTGGTTGTG
TTGCCCATTATAATCCACAT
CCGGGACAATGGCCCGTGTG
TCCAGGAGAGATTGAATAGA
TAATCCACATGGCTATAGCA
AATGGTCGAAACTAGCTGCA
AAGAACGAATACGACAGAGC
AATCCTGGGAGGGCCAACCT
CGAAGAACGAATACGACAGA
AATGCGGCTGTCGATCTCCA
AGATGAGTGACATCAAGGTC
ATCTCAAGAACCCTCTCTCT
GTGCCCATCTCGGCGAAAAA
ATAGATCCAATTGGCCGAAA
AGGCCCTTGACACGAACACT
CACAAATGGTCGAAACTAGC
ATTGTTGTCGTCATGGTTAC
TGTCGATCTCCAGGAAGATG
GTTCGGCAAAGTTAGAGCGG
GTCAAAGGGAGCTATAACAG
CACGACGTTCAGCGAAGAAC
CGTGCCCCATGTTCGCGTTT
TGGACCGCTGTCCTTCCAGT
TCTCGTGCATCCAGGAGAGA
AGTGACATCAAGGTCCCACA
CTCTACAGGCTAGTCATGCG
CAGCGGCCGGGACAATGGCC
CGTCCGAGCTATAGCAGCAA
GCGGCCGGGACAATGGCCCG
GAAGAAACCGAAATCGTGCC
AGGAGAGCCCCTTGCGCTGT
CAAGGTCCCACACAGCGTGC
GTGCTCGCGGGGTGTACCTC
CTTCCCCTGGAAGCGCGCTC
GGCTCAATCGGAGGAGTAGT
GAACCCATTCTTCCCCTGGA
GTATACCGCAGAAGTTCGGC
GAATGACCAGGCTCAATCGG
ATAACAGCCATCAAAGGAGA
GAGCTATAACAGCCATCAAA
GGTCCGAGTTTGGCCGAACC
CCCCTTGCGCTGTATCGTAG
CTGGTAAGTTACGCATTAAC
CCTTGACACGAACACTAAAC
AGATCCAATTGGCCGAAATC
AGTGCTCGCGGGGTGTACCT
GTCAACTTGTAACCCGTCGG
TACGCATTAACACGACGTTC
ACAAAATTTACGTCCGAGCT
GCAACAATAATTACGTCACC
CAATTGGCCGAAATCACGCG
ACCCGTCGGAACGGGAATCC
GGTTACAAAATTTACGTCCG
ACGACAGAGCGAGGCCCTTG
ATTGAATAGAGATATTCGAG
TCATGCGGGCTCGCACGCGG
GCAAAGTTAGAGCGGGAACT
TGTATCGTAGCTTCACAAAT
CCCTCTCTCTACAGGCTAGT
TCTACAGGCTAGTCATGCGG
GATAGATCCAATTGGCCGAA
CCGAAATCGTGCCCCATGTT
TTTAAGGTTAGGGGGTAAGG
CCGCCTTTATCTCGTGCATC
GGCCGGGACAATGGCCCGTG
AATGTGGAAATCGATCGTAT
ACACGGGCTCATCTTTTTGC
GAATGTCGGGTCCGAGTTTG
TGGAAGCGCGCTCCCGCCTT
GCTCCCGCCTTTATCTCGTG
CGCCCTGGTAAGTTACGCAT
GTCGTCATGGTTACAAAATT
GCGGTCAACTTGTAACCCGT
ATCGTGCCCCATGTTCGCGT
GAAACCGAAATCGTGCCCCA
TGTGGTGCAATTACGAATGA
CGAAATCACGCGCATTGTTG
AACCTGAATGTCGGGTCCGA
CGCCTTTATCTCGTGCATCC
GTTTAAGATGAGTGACATCA
ATAATTACGTCACCATTCCC
AGGAGTAGTTGGTACCAACG
GGTTAGGGGGTAAGGCCATT
TCATCTTTTTGCCCATTATA
ACAAATGGTCGAAACTAGCT
TCGCGGGGTGTACCTCGGGC
CGGGCTCATCTTTTTGCCCA
GCCATCAAAGGAGAGCCCCT
TTTACGTCCGAGCTATAGCA
TGCGCTGTATCGTAGCTTCA
ACGACTAGATAGATCCAATT
ACAATGCGGCTGTCGATCTC
GTGAATGTGGAAATCGATCG
CCGAAATCACGCGCATTGTT
TGGTGCGAAGAAACCGAAAT
CTATCGGTTTGGTCACTTGC
CCCAGCAAATGCCTTTCTTT
ACCCTCTCTCTACAGGCTAG
GCTGCAGGGCTCCCGCGCCA
TGCGGCTGTCGATCTCCAGG
CCAGGCTCAATCGGAGGAGT
TTCGAGAGTCACATACACAA
AATTACGTCACCATTCCCAG
GGTAAGTTACGCATTAACAC
CTGTGGTGCAATTACGAATG
TGTTGGTGCGAAGAAACCGA
AGGGCCAACCTGAATGTCGG
AAGGTTAGGGGGTAAGGCCA
AATTTACGTCCGAGCTATAG
ATACACAATGCGGCTGTCGA
TCGTGCCCCATGTTCGCGTT
CAGAAGTTCGGCAAAGTTAG
TTGAATAGAGATATTCGAGA
TCAGCGCCCTGGTAAGTTAC
AAATCGTGCCCCATGTTCGC
ATGCCTTTCTTTAGGCAGCA
TGTCGTCATGGTTACAAAAT
CCATGTTCGCGTTTAAGATG
GCATTCAGCGCCCTGGTAAG
CCCATGTTCGCGTTTAAGAT
ATCCTGGGAGGGCCAACCTG
AACACGACGTTCAGCGAAGA
GAATAGAGATATTCGAGAGT
GTACCTCGGGCTTTAAGGTT
CAGGCTCAATCGGAGGAGTA
TTTAGGCAGCATTCAGCGCC
ATTCTTCCCCTGGAAGCGCG
ACGAATGACCAGGCTCAATC
GGCTCATCTTTTTGCCCATT
GAGCGAGGCCCTTGACACGA
AATGACCAGGCTCAATCGGA
AAGAGGTCCATTATTAACAC
GGGTGTACCTCGGGCTTTAA
TTTTTGGTGTGGAACCCATT
TGACCAGGCTCAATCGGAGG
ACACTAAACAAAGGTGGTTG
GGGGTGTACCTCGGGCTTTA
ACGTCACCATTCCCAGCAAA
CGAAACTAGCTGCACTATCG
CCTCGGGCTTTAAGGTTAGG
TTAACACGGGCTCATCTTTT
AAATGGTATCTCAAGAACCC
TGACATCAAGGTCCCACACA
GAGGCCCTTGACACGAACAC
CTTGCGCTGTATCGTAGCTT
GTCCGAGTTTGGCCGAACCA
GGAGGAGTAGTTGGTACCAA
TTCCAGTGCTCGCGGGGTGT
GAACGAATACGACAGAGCGA
CGGGTCCGAGTTTGGCCGAA
AATTACGAATGACCAGGCTC
GTCGGGTCCGAGTTTGGCCG
TATAGCAGCAAAAATTTTGG
AGTAGTTGGTACCAACGCAA
TGCTGGAAACGACTAGATAG
TGGCCCGTGTGAATGTGGAA
TGTTCGCGTTTAAGATGAGT
AGCGGCCGGGACAATGGCCC
ATTCCCAGCAAATGCCTTTC
TACCTCGGGCTTTAAGGTTA
AGTCATGCGGGCTCGCACGC
GGCAGCATTCAGCGCCCTGG
CGAGAGTCACATACACAATG
GTGTGGAACCCATTCTTCCC
CATCCAGGAGAGATTGAATA
GGCCGAAATCACGCGCATTG
TTGACACGAACACTAAACAA
TCTCAAGAACCCTCTCTCTA
TGCATCCAGGAGAGATTGAA
ATCTCCAGGAAGATGTTGGT
CACATACACAATGCGGCTGT
TTGGTGCGAAGAAACCGAAA
AGTGCCCATCTCGGCGAAAA
GCGCTCCCGCCTTTATCTCG
TAGATCCAATTGGCCGAAAT
GGCCCTTGACACGAACACTA
CAAAAATTTTGGACCGCTGT
CATCAAGGTCCCACACAGCG
CAGCAAATGCCTTTCTTTAG
CATACACAATGCGGCTGTCG
ATCGTATACCGCAGAAGTTC
GAATACGACAGAGCGAGGCC
TCAACTTGTAACCCGTCGGA
TGTAACCCGTCGGAACGGGA
TGCAATTACGAATGACCAGG
TCGTAGCTTCACAAATGGTC
TCCATTATTAACACGGGCTC
TTGGCCGAAATCACGCGCAT
CCTCTCTCTACAGGCTAGTC
ATTGGCCGAAATCACGCGCA
GCTAGTCATGCGGGCTCGCA
AGGTCCCACACAGCGTGCCA
GGAAATCGATCGTATACCGC
CGCGCCAGTGCCCATCTCGG
TCGATCGTATACCGCAGAAG
CCAATTGGCCGAAATCACGC
ATACGACAGAGCGAGGCCCT
CGCTCCCGCCTTTATCTCGT
TGGCCGAAATCACGCGCATT
AATAATTACGTCACCATTCC
CAGCGAAGAACGAATACGAC
TACGACAGAGCGAGGCCCTT
GAAGCGCGCTCCCGCCTTTA
TCTTCCCCTGGAAGCGCGCT
GAGTGACATCAAGGTCCCAC
GCGGGCTCGCACGCGGTCAA
CGGCTGTCGATCTCCAGGAA
CCGTCGGAACGGGAATCCTG
GTAAGTTACGCATTAACACG
ATCTTTTTGCCCATTATAAT
AATACGACAGAGCGAGGCCC
GACACGAACACTAAACAAAG
GTGTGAATGTGGAAATCGAT
TGCACTATCGGTTTGGTCAC
GTCATGCGGGCTCGCACGCG
ACCCATTCTTCCCCTGGAAG
ATCTCGTGCATCCAGGAGAG
AAGTTAGAGCGGGAACTGCT
CTATAGCAACAATAATTACG
GTGCAATTACGAATGACCAG
CAGGCTAGTCATGCGGGCTC
ATAATCCACATGGCTATAGC
GCTATAACAGCCATCAAAGG
AGATGTTGGTGCGAAGAAAC
ATGGTCGAAACTAGCTGCAC
GGAGAGATTGAATAGAGATA
CGACAGAGCGAGGCCCTTGA
TCGCACGCGGTCAACTTGTA
GAAATCACGCGCATTGTTGT
TATACCGCAGAAGTTCGGCA
CAGCATTCAGCGCCCTGGTA
TTTTGGTGTGGAACCCATTC
TATAATCCACATGGCTATAG
ATCACGCGCATTGTTGTCGT
ATTATAATCCACATGGCTAT
TGTCGGGTCCGAGTTTGGCC
AAGTTCGGCAAAGTTAGAGC
ACGGGAATCCTGGGAGGGCC
ATTTACGTCCGAGCTATAGC
CTGGAAGCGCGCTCCCGCCT
GCAAATGCCTTTCTTTAGGC
TTACGCATTAACACGACGTT
AGGTGGTTGTGATCAGCGGC
GCCCGTGTGAATGTGGAAAT
GTCCGAGCTATAGCAGCAAA
AGGTCAAAGGGAGCTATAAC
GCGGCTGTCGATCTCCAGGA
TTGCGCTGTATCGTAGCTTC
AAAGTTAGAGCGGGAACTGC
TTCCCCTGGAAGCGCGCTCC
CCCCATGTTCGCGTTTAAGA
TATCTCAAGAACCCTCTCTC
CCGCGCCAGTGCCCATCTCG
GGAACCCATTCTTCCCCTGG
ATAGCAGCAAAAATTTTGGA
CGTTTAAGATGAGTGACATC
TATAACAGCCATCAAAGGAG
CCAGGAAGATGTTGGTGCGA
TACGTCACCATTCCCAGCAA
GCCTTTCTTTAGGCAGCATT
AACAGCCATCAAAGGAGAGC
GCATTAACACGACGTTCAGC
CTCCCGCCTTTATCTCGTGC
CGTGCATCCAGGAGAGATTG
CTGGAAACGACTAGATAGAT
GCATTGTTGTCGTCATGGTT
CGGCCGGGACAATGGCCCGT
TTATTAACACGGGCTCATCT
TAGCAGCAAAAATTTTGGAC
TCAAGAACCCTCTCTCTACA
AGTCACATACACAATGCGGC
CAACGCAAGAGGTCCATTAT
CGGGCTCGCACGCGGTCAAC
CTCGCGGGGTGTACCTCGGG
ATGTGGAAATCGATCGTATA
AGCTATAACAGCCATCAAAG
TTAAGGTTAGGGGGTAAGGC
TCCAATTGGCCGAAATCACG
AACGACTAGATAGATCCAAT
GGCCAACCTGAATGTCGGGT
CCTTTCTTTAGGCAGCATTC
AGTTGGTACCAACGCAAGAG
TGTGAATGTGGAAATCGATC
CATGGTTACAAAATTTACGT
TTGCTGCTGGAAACGACTAG
CATGCGGGCTCGCACGCGGT
CAACAATAATTACGTCACCA
GAGAGATTGAATAGAGATAT
GTGCGAAGAAACCGAAATCG
GGGCTCGCACGCGGTCAACT
GCAAGAGGTCCATTATTAAC
GTCACTTGCTGCTGGAAACG
GGCCGAACCAGGTCAAAGGG
CGCGCATTGTTGTCGTCATG
CATTATAATCCACATGGCTA
GGTGCAATTACGAATGACCA
GGGAATCCTGGGAGGGCCAA
CATCTCGGCGAAAAATGGTA
CGTGTGAATGTGGAAATCGA
GCTATAGCAACAATAATTAC
CCGCAGAAGTTCGGCAAAGT
ACTAGATAGATCCAATTGGC
AAACAAAGGTGGTTGTGATC
TCGCGTTTAAGATGAGTGAC
TCGGGTCCGAGTTTGGCCGA
AAAATGGTATCTCAAGAACC
GGACAATGGCCCGTGTGAAT
GACCAGGCTCAATCGGAGGA
CAATAATTACGTCACCATTC
ATCGGTTTGGTCACTTGCTG
TCGGCGAAAAATGGTATCTC
AACCCATTCTTCCCCTGGAA
TCCCGCCTTTATCTCGTGCA
GATCTCCAGGAAGATGTTGG
GAAAAATGGTATCTCAAGAA
GGCCCGTGTGAATGTGGAAA
GTCCTTCCAGTGCTCGCGGG
TTGTAACCCGTCGGAACGGG
ACGCAAGAGGTCCATTATTA
GCTCGCGGGGTGTACCTCGG
TAAGGTTAGGGGGTAAGGCC
CCCCTGGAAGCGCGCTCCCG
GCTATAGCAGCAAAAATTTT
TCTCGGCGAAAAATGGTATC
ATCCAATTGGCCGAAATCAC
ATGGCCCGTGTGAATGTGGA
GCCCCTTGCGCTGTATCGTA
TTGTCGTCATGGTTACAAAA
AACCGAAATCGTGCCCCATG
CCAGTGCTCGCGGGGTGTAC
CCATTATAATCCACATGGCT
ATGTTGGTGCGAAGAAACCG
ATGGTTACAAAATTTACGTC
AGTTTGGCCGAACCAGGTCA
GTACCAACGCAAGAGGTCCA
GAGGAGTAGTTGGTACCAAC
CAAAGTTAGAGCGGGAACTG
TTAGGGGGTAAGGCCATTTG
GAAGATGTTGGTGCGAAGAA
ATGGCTATAGCAACAATAAT
AAATCACGCGCATTGTTGTC
TATTCGAGAGTCACATACAC
AAATGCCTTTCTTTAGGCAG
CGAATACGACAGAGCGAGGC
AGATAGATCCAATTGGCCGA
TGACACGAACACTAAACAAA
GCCAACCTGAATGTCGGGTC
CTGCTGCAGGGCTCCCGCGC
CTCGTGCATCCAGGAGAGAT
TTTGCCCATTATAATCCACA
ACGAACACTAAACAAAGGTG
TGCAGGGCTCCCGCGCCAGT
AGAAGTTCGGCAAAGTTAGA
TAGAGATATTCGAGAGTCAC
CTGCACTATCGGTTTGGTCA
GGCCATTTGTGCCTTTTTGG
CACACAGCGTGCCATTCATC
CAAATGGTCGAAACTAGCTG
TTACAAAATTTACGTCCGAG
AACCAGGTCAAAGGGAGCTA
GCTCAATCGGAGGAGTAGTT
CCCTGGTAAGTTACGCATTA
CCGAACCAGGTCAAAGGGAG
TCCTTCCAGTGCTCGCGGGG
GCGGGAACTGCTGCAGGGCT
ACACGAACACTAAACAAAGG
GGGACAATGGCCCGTGTGAA
TCAATCGGAGGAGTAGTTGG
TAAGATGAGTGACATCAAGG
CCACATGGCTATAGCAACAA
TGAATAGAGATATTCGAGAG
AGAGATTGAATAGAGATATT
ACGGGCTCATCTTTTTGCCC
CTTTTTGCCCATTATAATCC
CTCGCACGCGGTCAACTTGT
CTTCCAGTGCTCGCGGGGTG
CTCATCTTTTTGCCCATTAT
AGGCCATTTGTGCCTTTTTG
AATTTTGGACCGCTGTCCTT
ATTTGTGCCTTTTTGGTGTG
ACACGACGTTCAGCGAAGAA
TTGGTCACTTGCTGCTGGAA
GTATCGTAGCTTCACAAATG
ACGCGCATTGTTGTCGTCAT
CCCTTGCGCTGTATCGTAGC
GGAAGCGCGCTCCCGCCTTT
CGTATACCGCAGAAGTTCGG
AAAGGAGAGCCCCTTGCGCT
GATGAGTGACATCAAGGTCC
CGCTGTATCGTAGCTTCACA
TCACATACACAATGCGGCTG
AAGAACCCTCTCTCTACAGG
CAGAGCGAGGCCCTTGACAC
CCACACAGCGTGCCATTCAT
GGTCAACTTGTAACCCGTCG
TATCTCGTGCATCCAGGAGA
AAGGAGAGCCCCTTGCGCTG
GGTCCATTATTAACACGGGC
GTCGATCTCCAGGAAGATGT
TTAGAGCGGGAACTGCTGCA
AAAGGTGGTTGTGATCAGCG
TGGTTGTGATCAGCGGCCGG
GTTAGAGCGGGAACTGCTGC
CCAACCTGAATGTCGGGTCC
GTTCAGCGAAGAACGAATAC
CTCCCGCGCCAGTGCCCATC
TCGTGCATCCAGGAGAGATT
CGCGGGGTGTACCTCGGGCT
CACTAAACAAAGGTGGTTGT
CCCTTGACACGAACACTAAA
CGGTTTGGTCACTTGCTGCT
ACTATCGGTTTGGTCACTTG
TGGAAACGACTAGATAGATC
GTGGTGCAATTACGAATGAC
TGGTGCAATTACGAATGACC
GACGTTCAGCGAAGAACGAA
AATCGATCGTATACCGCAGA
TTTGTGCCTTTTTGGTGTGG
CATTCAGCGCCCTGGTAAGT
GCGCCAGTGCCCATCTCGGC
GTAACCCGTCGGAACGGGAA
AACTGCTGCAGGGCTCCCGC
AGAGATATTCGAGAGTCACA
GTCCCACACAGCGTGCCATT
AGCTATAGCAGCAAAAATTT
TTTTGGACCGCTGTCCTTCC
TCGGAGGAGTAGTTGGTACC
CTAGCTGCACTATCGGTTTG
TCCAGGAAGATGTTGGTGCG
AGATTGAATAGAGATATTCG
CAGGAAGATGTTGGTGCGAA
TCAAAGGAGAGCCCCTTGCG
CCATTATTAACACGGGCTCA
AAGGGAGCTATAACAGCCAT
CTATAACAGCCATCAAAGGA
AAGGTGGTTGTGATCAGCGG
GAGGGCCAACCTGAATGTCG
GCGAAAAATGGTATCTCAAG
TTATAATCCACATGGCTATA
AGGCTCAATCGGAGGAGTAG
TGCGGGCTCGCACGCGGTCA
TCACTTGCTGCTGGAAACGA
AGGGAGCTATAACAGCCATC
GGACCGCTGTCCTTCCAGTG
TTCACAAATGGTCGAAACTA
TAACACGACGTTCAGCGAAG
CCGTGTGAATGTGGAAATCG
CGGGCTTTAAGGTTAGGGGG
TTGTGATCAGCGGCCGGGAC
GTGACATCAAGGTCCCACAC
TGGCCGAACCAGGTCAAAGG
GACCGCTGTCCTTCCAGTGC
TTCTTCCCCTGGAAGCGCGC
GTCGAAACTAGCTGCACTAT
TACGTCCGAGCTATAGCAGC
CCTTGCGCTGTATCGTAGCT
CGGTCAACTTGTAACCCGTC
CAGTGCCCATCTCGGCGAAA
GCTTCACAAATGGTCGAAAC
GGTGGTTGTGATCAGCGGCC
TGTGCCTTTTTGGTGTGGAA
GGCTCCCGCGCCAGTGCCCA
TGTGGAACCCATTCTTCCCC
TCTTTTTGCCCATTATAATC
CTTTATCTCGTGCATCCAGG
AACGAATACGACAGAGCGAG
GATGTTGGTGCGAAGAAACC
TGCCCATTATAATCCACATG
AATCGGAGGAGTAGTTGGTA
GGGTAAGGCCATTTGTGCCT
GTTTGGCCGAACCAGGTCAA
TCTCTACAGGCTAGTCATGC
GAAGAACGAATACGACAGAG
CGCGCTCCCGCCTTTATCTC
AATCCACATGGCTATAGCAA
CTTTTTGGTGTGGAACCCAT
AGCGCGCTCCCGCCTTTATC
GAGCTATAGCAGCAAAAATT
AGCAAAAATTTTGGACCGCT
TAGTTGGTACCAACGCAAGA
CAAAGGAGAGCCCCTTGCGC
ACAGGCTAGTCATGCGGGCT
GAGAGCCCCTTGCGCTGTAT
GTTTGGTCACTTGCTGCTGG
ATTATTAACACGGGCTCATC
GAAACTAGCTGCACTATCGG
GCTGTCCTTCCAGTGCTCGC
TAAACAAAGGTGGTTGTGAT
CGTCACCATTCCCAGCAAAT
TCCGAGCTATAGCAGCAAAA
GGTGTGGAACCCATTCTTCC
ACCGCAGAAGTTCGGCAAAG
GGTATCTCAAGAACCCTCTC
CGCTGTCCTTCCAGTGCTCG
CATTTGTGCCTTTTTGGTGT
CTCTCTACAGGCTAGTCATG
CAGCGCCCTGGTAAGTTACG
GCCTTTATCTCGTGCATCCA
ACTTGCTGCTGGAAACGACT
AACCCGTCGGAACGGGAATC
CCGAGTTTGGCCGAACCAGG
CTGGGAGGGCCAACCTGAAT
GGTCACTTGCTGCTGGAAAC
CTCCAGGAAGATGTTGGTGC
CGCGGTCAACTTGTAACCCG
CACTATCGGTTTGGTCACTT
GTCATGGTTACAAAATTTAC
AAGATGTTGGTGCGAAGAAA
GCAGGGCTCCCGCGCCAGTG
CTGTCGATCTCCAGGAAGAT
TTCTTTAGGCAGCATTCAGC
CACGCGGTCAACTTGTAACC
TGCCTTTTTGGTGTGGAACC
GAAATCGATCGTATACCGCA
AGGCAGCATTCAGCGCCCTG
GCACGCGGTCAACTTGTAAC
CAGGGCTCCCGCGCCAGTGC
TTTAAGATGAGTGACATCAA
CACGCGCATTGTTGTCGTCA
GAGTTTGGCCGAACCAGGTC
ACCGAAATCGTGCCCCATGT
AGGTCCATTATTAACACGGG
CAAGAACCCTCTCTCTACAG
TTTGGTCACTTGCTGCTGGA
CTTGCTGCTGGAAACGACTA
ACACAATGCGGCTGTCGATC
GTTACGCATTAACACGACGT
AACTTGTAACCCGTCGGAAC
AGCCATCAAAGGAGAGCCCC
GCTGTCGATCTCCAGGAAGA
CAAAGGGAGCTATAACAGCC
TGGGAGGGCCAACCTGAATG
GACTAGATAGATCCAATTGG
TTGTGCCTTTTTGGTGTGGA
GCGTTTAAGATGAGTGACAT
CGGGGTGTACCTCGGGCTTT
GCGCGCTCCCGCCTTTATCT
ATGCGGCTGTCGATCTCCAG
CGCAGAAGTTCGGCAAAGTT
ATGACCAGGCTCAATCGGAG
CATCAAAGGAGAGCCCCTTG
ATAGAGATATTCGAGAGTCA
TGCTCGCGGGGTGTACCTCG
TTGGTACCAACGCAAGAGGT
AGCATTCAGCGCCCTGGTAA
AGGGCTCCCGCGCCAGTGCC
AACTAGCTGCACTATCGGTT
CAATCGGAGGAGTAGTTGGT
GTTCGCGTTTAAGATGAGTG
GTAGCTTCACAAATGGTCGA
CATTGTTGTCGTCATGGTTA
GGGGTAAGGCCATTTGTGCC
CGAAAAATGGTATCTCAAGA
ATCAGCGGCCGGGACAATGG
CTGTCCTTCCAGTGCTCGCG
GGGAGGGCCAACCTGAATGT
TCACGCGCATTGTTGTCGTC
GATCGTATACCGCAGAAGTT
AGCGCCCTGGTAAGTTACGC
TAAGTTACGCATTAACACGA
CCGAGCTATAGCAGCAAAAA
TAGAGCGGGAACTGCTGCAG
ACATCAAGGTCCCACACAGC
ATCGATCGTATACCGCAGAA
GTGGAACCCATTCTTCCCCT
ATTAACACGACGTTCAGCGA
CGGCGAAAAATGGTATCTCA
TAACAGCCATCAAAGGAGAG
TGCCCATCTCGGCGAAAAAT
AATAGAGATATTCGAGAGTC
AAGCGCGCTCCCGCCTTTAT
CTTTAGGCAGCATTCAGCGC
GCACTATCGGTTTGGTCACT
GGCTTTAAGGTTAGGGGGTA
GAGATATTCGAGAGTCACAT
TCAGCGAAGAACGAATACGA
AAATTTACGTCCGAGCTATA
GTGGAAATCGATCGTATACC
GCCCATTATAATCCACATGG
TAAGGCCATTTGTGCCTTTT
CTTTCTTTAGGCAGCATTCA
CTGTATCGTAGCTTCACAAA
TTGGTGTGGAACCCATTCTT
GGAGCTATAACAGCCATCAA
TTCGCGTTTAAGATGAGTGA
AGGGGGTAAGGCCATTTGTG
TGAATGTGGAAATCGATCGT
ACGCATTAACACGACGTTCA
ATTAACACGGGCTCATCTTT
AGGAAGATGTTGGTGCGAAG
GAGCGGGAACTGCTGCAGGG
AATGGTATCTCAAGAACCCT
CTACAGGCTAGTCATGCGGG
CCAGGAGAGATTGAATAGAG
CGAAGAAACCGAAATCGTGC
GAACTGCTGCAGGGCTCCCG
ACCAGGCTCAATCGGAGGAG
TATCGGTTTGGTCACTTGCT
TTTTGCCCATTATAATCCAC
TTCAGCGAAGAACGAATACG
GGCTCGCACGCGGTCAACTT
TCACCATTCCCAGCAAATGC
TGCGAAGAAACCGAAATCGT
AGAGGTCCATTATTAACACG
GGCGAAAAATGGTATCTCAA
CACAATGCGGCTGTCGATCT
TACCAACGCAAGAGGTCCAT
TACACAATGCGGCTGTCGAT
CACGAACACTAAACAAAGGT
GAACACTAAACAAAGGTGGT
CTCAATCGGAGGAGTAGTTG
CGCATTGTTGTCGTCATGGT
AGATATTCGAGAGTCACATA
ACCTGAATGTCGGGTCCGAG
TTACGAATGACCAGGCTCAA
GCCCCATGTTCGCGTTTAAG
AGCTTCACAAATGGTCGAAA
GAACGGGAATCCTGGGAGGG
CGGGAACTGCTGCAGGGCTC
TGGTCGAAACTAGCTGCACT
TCGATCTCCAGGAAGATGTT
CCATTCTTCCCCTGGAAGCG
AAGATGAGTGACATCAAGGT
CATTCCCAGCAAATGCCTTT
TCCACATGGCTATAGCAACA
GCTCCCGCGCCAGTGCCCAT
CGGCAAAGTTAGAGCGGGAA
CTTTAAGGTTAGGGGGTAAG
CCCATCTCGGCGAAAAATGG
CCATTTGTGCCTTTTTGGTG
GCTGTATCGTAGCTTCACAA
CCTGGAAGCGCGCTCCCGCC
AAACTAGCTGCACTATCGGT
AGAACCCTCTCTCTACAGGC
CGGGACAATGGCCCGTGTGA
CATTATTAACACGGGCTCAT
GCGCATTGTTGTCGTCATGG
CTCGGCGAAAAATGGTATCT
AACGCAAGAGGTCCATTATT
ACGACGTTCAGCGAAGAACG
GCTGGAAACGACTAGATAGA
ACATGGCTATAGCAACAATA
CGGGAATCCTGGGAGGGCCA
CAAATGCCTTTCTTTAGGCA
TCGTCATGGTTACAAAATTT
TAGGGGGTAAGGCCATTTGT
GCAGCAAAAATTTTGGACCG
TTTGGTGTGGAACCCATTCT
ACGTCCGAGCTATAGCAGCA
ATGTCGGGTCCGAGTTTGGC
CTAGATAGATCCAATTGGCC
GTTGGTGCGAAGAAACCGAA
CGCAAGAGGTCCATTATTAA
GGCTATAGCAACAATAATTA
AAGAAACCGAAATCGTGCCC
GCCAGTGCCCATCTCGGCGA
AAACCGAAATCGTGCCCCAT
GGGCTCATCTTTTTGCCCAT
GCCGAACCAGGTCAAAGGGA
TTATCTCGTGCATCCAGGAG
AAGGCCATTTGTGCCTTTTT
TGTGATCAGCGGCCGGGACA
GTCACATACACAATGCGGCT
GGCAAAGTTAGAGCGGGAAC
CGCCAGTGCCCATCTCGGCG
ACGCGGTCAACTTGTAACCC
GCTCATCTTTTTGCCCATTA
CGAACCAGGTCAAAGGGAGC
TATCGTAGCTTCACAAATGG
GAGTCACATACACAATGCGG
AGAGCCCCTTGCGCTGTATC
ATCAAGGTCCCACACAGCGT
TAGTCATGCGGGCTCGCACG
GTTAGGGGGTAAGGCCATTT
CAATGGCCCGTGTGAATGTG
GCGAGGCCCTTGACACGAAC
TCAAAGGGAGCTATAACAGC
CAAGAGGTCCATTATTAACA
TCTCCAGGAAGATGTTGGTG
TCCCGCGCCAGTGCCCATCT
AATCACGCGCATTGTTGTCG
ATCTCGGCGAAAAATGGTAT
AATGTCGGGTCCGAGTTTGG
TCAAGGTCCCACACAGCGTG
GGTACCAACGCAAGAGGTCC
CGTCATGGTTACAAAATTTA
GACAGAGCGAGGCCCTTGAC
GGAAGATGTTGGTGCGAAGA
AACACGGGCTCATCTTTTTG
CTATAGCAGCAAAAATTTTG
ATGGTATCTCAAGAACCCTC
TGTGGAAATCGATCGTATAC
TGCCTTTCTTTAGGCAGCAT
TCGGTTTGGTCACTTGCTGC
TGGTACCAACGCAAGAGGTC
CGAATGACCAGGCTCAATCG
TTGGACCGCTGTCCTTCCAG
TGTTGTCGTCATGGTTACAA
TAGGCAGCATTCAGCGCCCT
GCCCATCTCGGCGAAAAATG
ACAATAATTACGTCACCATT
TACAAAATTTACGTCCGAGC
AATTGGCCGAAATCACGCGC
TTTGGACCGCTGTCCTTCCA
GCGGGGTGTACCTCGGGCTT
GCAGAAGTTCGGCAAAGTTA
CCCGCCTTTATCTCGTGCAT
CAAAATTTACGTCCGAGCTA
CCATCTCGGCGAAAAATGGT
TGAATGTCGGGTCCGAGTTT
TCCTGGGAGGGCCAACCTGA
TCCGAGTTTGGCCGAACCAG
AGGCTAGTCATGCGGGCTCG
TATTAACACGGGCTCATCTT
GCTGCACTATCGGTTTGGTC
TATAGCAACAATAATTACGT
GTTACAAAATTTACGTCCGA
GGGTCCGAGTTTGGCCGAAC
GCCTTTTTGGTGTGGAACCC
AGGAGAGATTGAATAGAGAT
TGGTGTGGAACCCATTCTTC
ACAATGGCCCGTGTGAATGT
TAACCCGTCGGAACGGGAAT
AGTTACGCATTAACACGACG
CAGGAGAGATTGAATAGAGA
GCGCTGTATCGTAGCTTCAC
GCTCGCACGCGGTCAACTTG
GGTGCGAAGAAACCGAAATC
ATGCGGGCTCGCACGCGGTC
GGTGTACCTCGGGCTTTAAG
TACCGCAGAAGTTCGGCAAA
GTGCCCCATGTTCGCGTTTA
GTAAGGCCATTTGTGCCTTT
GCCCTTGACACGAACACTAA
GAACCAGGTCAAAGGGAGCT
TTACGTCCGAGCTATAGCAG
ATGAGTGACATCAAGGTCCC
ACCAACGCAAGAGGTCCATT
ATCAAAGGAGAGCCCCTTGC
ATTACGTCACCATTCCCAGC
TCACAAATGGTCGAAACTAG
ACATACACAATGCGGCTGTC
ACCATTCCCAGCAAATGCCT
AGCTGCACTATCGGTTTGGT
CAGCAAAAATTTTGGACCGC
GTGGTTGTGATCAGCGGCCG
TGGAACCCATTCTTCCCCTG
AGAGTCACATACACAATGCG
TAGCTGCACTATCGGTTTGG
CTAAACAAAGGTGGTTGTGA
AGTTAGAGCGGGAACTGCTG
GTTGTGATCAGCGGCCGGGA
CGTCGGAACGGGAATCCTGG
CACCATTCCCAGCAAATGCC
CCTTTTTGGTGTGGAACCCA
AAATTTTGGACCGCTGTCCT
CATGGCTATAGCAACAATAA
TCGAAACTAGCTGCACTATC
AAAGGGAGCTATAACAGCCA
ATATTCGAGAGTCACATACA
AAAAATTTTGGACCGCTGTC
TTAACACGACGTTCAGCGAA
TCGTATACCGCAGAAGTTCG
CGAACACTAAACAAAGGTGG
AGCAGCAAAAATTTTGGACC
AGTTCGGCAAAGTTAGAGCG
TTCCCAGCAAATGCCTTTCT
ATTCGAGAGTCACATACACA
GTTGTCGTCATGGTTACAAA
GGGGGTAAGGCCATTTGTGC
TTCGGCAAAGTTAGAGCGGG
AACAAAGGTGGTTGTGATCA
TGCTGCAGGGCTCCCGCGCC
TGGTAAGTTACGCATTAACA
CTTGACACGAACACTAAACA
CCATCAAAGGAGAGCCCCTT
AGCGAAGAACGAATACGACA
TTGGCCGAACCAGGTCAAAG
TTTATCTCGTGCATCCAGGA
CTCGGGCTTTAAGGTTAGGG
CGTAGCTTCACAAATGGTCG
TTACGTCACCATTCCCAGCA
GGAAACGACTAGATAGATCC
CGAGGCCCTTGACACGAACA
ACCTCGGGCTTTAAGGTTAG
TCCCACACAGCGTGCCATTC
TTTTTGCCCATTATAATCCA
AGCAAATGCCTTTCTTTAGG
CGATCGTATACCGCAGAAGT

49
chapter3/populate_templates.py

@ -0,0 +1,49 @@ @@ -0,0 +1,49 @@
import jinja2
import os
def main():
# Jinja env
env = jinja2.Environment(loader=jinja2.FileSystemLoader('.'))
problems = [
{
'chapter': '3',
'problem': 'a',
'title': 'Generate k-mer Composition of a String',
'description': 'Given an input string, generate a list of all kmers that are in the input string.',
'url': 'http://rosalind.info/problems/ba3a/'
},
{
'chapter': '3',
'problem': 'b',
'title': 'Reconstruct string from genome path',
'description': 'Reconstruct a string from its genome path, i.e., sequential fragments of overlapping DNA.',
'url': 'http://rosalind.info/problems/ba3b/'
},
{
'chapter': '3',
'problem': 'c',
'title': 'Construct the overlap graph of a set of k-mers',
'description': 'Given a set of overlapping k-mers, construct the overlap graph and print a sorted adjacency matrix',
'url': 'http://rosalind.info/problems/ba3c/'
},
]
print("Writing problem boilerplate code")
t = 'template.go.j2'
for problem in problems:
contents = env.get_template(t).render(**problem)
fname = 'ba'+problem['chapter']+problem['problem']+'.go'
if not os.path.exists(fname):
print("Writing to file %s..."%(fname))
with open(fname,'w') as f:
f.write(contents)
else:
print("File %s already exists, skipping..."%(fname))
print("Done")
if __name__=="__main__":
main()

49
chapter3/template.go.j2

@ -0,0 +1,49 @@ @@ -0,0 +1,49 @@
package rosalindchapter{{chapter}}
import (
"fmt"
"log"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Print problem description for Rosalind.info
// Problem BA{{chapter}}{{problem}}: {{title}}
func BA{{chapter}}{{problem}}Description() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA{{chapter}}{{problem}}:",
"{{title}}",
"",
"{{description}}",
"",
"URL: {{url}}",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Run the problem
func BA{{chapter}}{{problem}}(filename string) {
BA{{chapter}}{{problem}}Description()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("rosa.ReadLines: %v", err)
}
//// Input file contents
//input := lines[0]
//params := lines[1]
//result := rosa.PatternCount(input, pattern)
//
//fmt.Println("")
//fmt.Printf("Computed result from input file: %s\n", filename)
//fmt.Println(result)
}

4
rosalind/Readme.md

@ -0,0 +1,4 @@ @@ -0,0 +1,4 @@
# rosalind go package
This directory contains the `rosalind` Go package.

5
rosalind/data/clump_finding.txt

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
Input
GCGGTTATGCACCGTTCAAATTAGCAAACCACTAAGCGACGTAGTCTGGATTGATTTCTCCCTACCAGTGACCCAAGACGCGTTAGTGAGTTAAGTTCATATCCAGTACCTGCCGCCCTCTGTACTTGGGCGTCCGATTCGCATGCTTACTCAGGTGGAGGACACGATAATCTGATTAAACTGAGCTAAACCAGGTGGAACCAGAAACCAGGTGGGGAGTCTCGCTTCAAGCCGTTCTTGCGATCAAACCAGGTGGTCCATTATGAAACCAGGTGGCTAAACCAGGTGGTCCAGATCCTCGAATGATGTCGGTGCACATCAAAACCAGGTGGGGTGGTGGAACGTAAAACCAGGTGGCATAAACCAGGTGGGCCGGTTCGTAAACCAGGTGAAACCAGGTGGGGTGGAAACCAGGTGGGTTACAAATTACGTTGAGATGGCCCAAACCAGGTGGTGGGCTTCACCCATGTCAACAAACCACCCTATGGAACTAAACCAGGTGGAACCAGGTGGTGAAGGCTTATCCTCAGGAAAAACCAGGTGGAGGTGGTGAAATAAAACCAGGTGGACCAGGTGGATAACCCTCGCCTCGCTTCTCAACCGAGACCTGGATAAACCAGGTGGGGTGGTCCACCGATTTTTGAGACACTAGAAACCAGGTGGGCGGGGAAACCAGGTGGCAAACCAGGTGGGGTGGACGGAAACCAGGTGGATATGTCATAAAACCAAACCAGGTGGTGCACCCCCATGGTGTGTCTTATCCGTGCGTATAAACCAGGTGGTCGCACGGCTTCCACTTGCTGAGAATAGGCCCGCAGGGTCAGTGCCATGCCCTCCGTCACTCGATATGTGTTGTAAGAGTGGTTACCCCTTCATTGAAGTCGCCCACAGCCCCACCTGCATTGCTAGACTATCACCCTACAGTAGGCCTTTTCGCCTTCTTCAAGCAGCAATCTCTTATCCGCGGATGGGCGCGGCGAGCGTGGCGTCCCCGAACATTTTTACCTAACGTGTTTTGTTGGCCGCAAGCCTTCCCTCTAGTCCACCTCAGCCATTCAGCCTAGTAGCTTTCAAGCCGAGCCTTCCATATCTAATGGACCGTCCAGAATTTCACACGTTTCACAGGGCTGTGTTCGACCGCCCGTAATGCTGTTTCACAGGCGATCGCCTTGCGGTTTTTTCACAGATCGCAGCCGATGGACATGCCAACTCGATTTTCACAGAGTTTTTCACAGCGGTTTCACAGCACAGCAGTGATTGTTTCACAGCAATTTTCACTTTCACAGGGGCCCTTTTCACAGCTCAGGGCTCTTTTCACTTTCACAGTTTCACAGCGCTCCTTTCACAGAGCGGGGAAATTTAAGGGAACACTCAAGGGAACAAGGGAACACACAAAGGGAACACAACACAACACATAAGGGAACACTTTCACAGAACACAAAAGTCCGAAATCATCAGCGGCGAAGGGATTTCACAGACAGACACTTTCACAGCGCATTTCACAGATACGTACTTTCACAGGCGTACTTTCACAGACTTTCACAGAGGACAAGCTCAATTTTCACAGACAGGCTGGATAAATTTCACAGCGGTAAGGGTTTCACAGCACACATAAGGGAACACGAATTTCACAGCAGGGAACACCTCTACGAGTAATCTATTACTCTACCTACTGAAGGGAACACACCGAAGACCTACTATTACCTATTACTCTTAAAGGGAACACATTACAAGGGAACACACTCTCTCGTCATATCTCACCTCTCTATTACTCTTAAGGGAACACCTTCTCGATCAACCTATTACTCTATGGAGATAGAGATATTCCAGACATATGGAGATAACATGGAGATATGGAGATAATGGAGATGGAGATAGCTCTTATATTTATCCTATGGAGATATGATACTATTAATGGAGATAATTCTAATGGAGATATAATTACTCTAAGAGGATGGGATCTCGGGCTATTACTCTAATGGAGATAAGCACTATTACTCTAGGAAATGGAGATATGTCAATGGAGATATGTAATGGAGATAGAGGGAGATGGAGTCGCCATTTCATAATCGCCATTTCATAGTTCAGGAATCGCCATTTCCGCCATTTCTAAGATGGAGTCGCCATTTCTACGTATGGAGATAGGATCGCCATTTCATACGACCCGTTGGATATCGCCATTTCCTCGCCATTTCTGGTGACATTTCTCGCCATTTCATTTCTGGAGATAGATGGATCTCGCCATTTCATAGGAATCGCCATTTCCACGTAGGGGGGGCCACAATCCGTAGGTCGGAATTCAGACTCGCCATTTCCCATCGCCATTTCTTCACCTGTATGCCGATCCCTTCGCCATTTCTCATGGAGATAACTCTCTCTCGCCATTTCTCGCCATTTCCATTTCACTCTCATTCGCCATCGCCATTTCCATTCGCCATTTCATCGCCATTTCTTCAGGATAAGATATCGCCATTTCGACTCTCATTCGCATACTGACTCTCATTCTCATCTCGCCATTTCTCATCTGACTCTCATCCTGGGGGAAACTTGCGACTCTCATCACACTTCCGTCGACTCTCATACTGGCGGATAGCATAGGAGCCATTTAAAGACTCTCATTCTCATTCGAGACTCTCATTCAAATCCTACGAGGACTCTCATATAGACTCTCATATCATTACGAGGACTCTCATATACGAGCCATGCATGTGGCGACGACTCTCATCTACGAGCCATGCAAGCAGAATCTACGAGCGACTCTCATTACGAGCCATGTGACCGTACGAGCCATGCATGCATGCCATGCTGACTCTCATCGAGTACGAGCCATGGAAGTTCTTGTTGGTTCGTAGCCCAAGAGCTGAAGTTACGAGCCTACGAGCCATGAAGTTACTTTTACGAGCCATGAAGCTTACGATACGAGCCATGCGAGCCATGCATCCGCGCTACGAGCCATGTTCCAGTACGAGCCATGTTAGTTGCTGAAGTTAAGTTTGGCGCTGAAGTTTGTACGAGCCATGTGCCCGCTGAAGTTTGTTGTACGAGCCATGCATGCTGAAGTTAATGGCTGAAGTTAGCGTTTGCGGGCAGATCCTCATTCTACGATACGAGCCATGCCATGCAGCTGAAGTTAAGTTGGGTTACGAGCCATGCGAGCCATGTGAAGTACGAGCCATGCTGGCTGAAGTTGTTTGTGCTGCTGAAGTTGCTCTTGTCTCTAGCTGAAGTTGCCAACAGGGCTGAAGCTGAAGTTTAAGCTGAAGTTGCGAGCAGGCTGAAGTTATCGGATTGGGGCTGAAGTTCAACCTCCCGTCCCCCCACACTATATTCCCGTCCCCCCCCGCGCACGCGCCGTCTCCCGTCCCCCCTATCCCGTGCGCACGCGACGCGATCCCGTCCCCCCAGAGTGCGCGCACGCGTCCCCCTTCCCGTCCCCCTCTCCCGGGCGCACGCGTCGCTCAACATTTCCGCGCACGCGTCGCGCACGCGGGCGCACGCGGGTCCCGTCCCCCCCCCTCTTCGGCGCACGCGGAATTCCCGTCGCGCACGCGTCCCGTCCCGCGCACGCGTCGCGCACGCGACTGCCCTAACCAACAGTGCGCACGCGCCGGTAACCCGGTAACCCGGTAACCGCGCACGCGGGCGCACGCGCGTAACCCGCGCACGCGCCGCGCACGCGGCCCGGTTCCCGTCCCCCCCGGTAACCCGGTAACTCCCGTCCCCCGTAACCCGGTGCGCACGCGCCCGGCGCACGCGGAGCGCACGCGCCCCCCCCGGTAATAGCGCACGCGCCCGGGCGCACGCGCCCGGTAACCCGGTAACCCGGGCGCGCGCACGCGGCGGCGCACGCGGCGCACGCGGCGCACGCG
11 566 18
Output
AAACCAGGTGG

5
rosalind/data/frequent_words.txt

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
Input
CGGAAGCGAGATTCGCGTGGCGTGATTCCGGCGGGCGTGGAGAAGCGAGATTCATTCAAGCCGGGAGGCGTGGCGTGGCGTGGCGTGCGGATTCAAGCCGGCGGGCGTGATTCGAGCGGCGGATTCGAGATTCCGGGCGTGCGGGCGTGAAGCGCGTGGAGGAGGCGTGGCGTGCGGGAGGAGAAGCGAGAAGCCGGATTCAAGCAAGCATTCCGGCGGGAGATTCGCGTGGAGGCGTGGAGGCGTGGAGGCGTGCGGCGGGAGATTCAAGCCGGATTCGCGTGGAGAAGCGAGAAGCGCGTGCGGAAGCGAGGAGGAGAAGCATTCGCGTGATTCCGGGAGATTCAAGCATTCGCGTGCGGCGGGAGATTCAAGCGAGGAGGCGTGAAGCAAGCAAGCAAGCGCGTGGCGTGCGGCGGGAGAAGCAAGCGCGTGATTCGAGCGGGCGTGCGGAAGCGAGCGG
12
Output
CGGCGGGAGATT CGGGAGATTCAA CGTGCGGCGGGA CGTGGAGGCGTG CGTGGCGTGCGG GCGTGCGGCGGG GCGTGGAGGCGT GCGTGGCGTGCG GGAGAAGCGAGA GGAGATTCAAGC GGCGGGAGATTC GGGAGATTCAAG GTGCGGCGGGAG TGCGGCGGGAGA

5
rosalind/data/frequent_words_mismatch.txt

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
Input:
CACAGTAGGCGCCGGCACACACAGCCCCGGGCCCCGGGCCGCCCCGGGCCGGCGGCCGCCGGCGCCGGCACACCGGCACAGCCGTACCGGCACAGTAGTACCGGCCGGCCGGCACACCGGCACACCGGGTACACACCGGGGCGCACACACAGGCGGGCGCCGGGCCCCGGGCCGTACCGGGCCGCCGGCGGCCCACAGGCGCCGGCACAGTACCGGCACACACAGTAGCCCACACACAGGCGGGCGGTAGCCGGCGCACACACACACAGTAGGCGCACAGCCGCCCACACACACCGGCCGGCCGGCACAGGCGGGCGGGCGCACACACACCGGCACAGTAGTAGGCGGCCGGCGCACAGCC
10 2
Output:
GCACACAGAC GCGCACACAC

5
rosalind/data/frequent_words_mismatch_complements.txt

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
Input
CTTGCCGGCGCCGATTATACGATCGCGGCCGCTTGCCTTCTTTATAATGCATCGGCGCCGCGATCTTGCTATATACGTACGCTTCGCTTGCATCTTGCGCGCATTACGTACTTATCGATTACTTATCTTCGATGCCGGCCGGCATATGCCGCTTTAGCATCGATCGATCGTACTTTACGCGTATAGCCGCTTCGCTTGCCGTACGCGATGCTAGCATATGCTAGCGCTAATTACTTAT
9 3
Output
AGCGCCGCT AGCGGCGCT

4979
rosalind/data/genome_path_string.txt

File diff suppressed because one or more lines are too long

5
rosalind/data/hamming_distance.txt

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
Input
CCGAAGCAATTGAAACCCCCCCGGCCTGGGAGGCGCAAAAATCTGACCTCTTTGTGAGTTGACCACTTAATTTATGTCTGACCACGAGAAGGGCTACTGATTTGGTACGTCGGGTCATGACCCCCAGTTCTTAGCCGCCTGCTCCAATCTCTGACTTGTTTATCGAGGGGATGGAGTAACGAAATGCGATTCGCCCGCTCAGGCCAAGGTATATATTTGAGTAGCGGAAGGTTGCACTACCTACAACCACGGCACACCGGCACGTTGTCGTGCCCTGGCGGCCTGCGCACTTTCGCCACTGTCAAGTACGACTTCCCAAGCTCAACCAACATTCATAATCCGGTGCAATTCATACCGTATCATCGTGCTATAAGCGACGCCGATTCTCGGGGCCTGATAATTGAGACTGGACTACATAGTGGGTGCCCTCTCTGCGAGTAAGTGACGGAACAACGGAGATCAGGGACCAAATGGTAGCAAAACAGATCGAGGTACACGCAGGTAGCTGTCCGTGGAGTAGACCGCGCTTAGCGTCTGTTAGAGTATCATCGGGGTATTAGACACAGGAACCTCTATGCTGTTAAAAGGCCATACCCCGTAATTGTGCAAATTTGTTACGTTCAAATCTACGCAGTGAGGGTCCTAAGGTGATGGCAGGGATTGGAACTTCTCCGCTGGCTCTTAGATTACTTAGCCAGTCTACCCTCGAAGATACAAATCCTTCCACCAGAGGGAGCTCATTGAAATTCATTCCATGCTACTCGACCGCGCGTATGGGTGCGGGGCTCTATGGGATCTAACTCGATCCTTCAGAGTCCTTATTCAAATGCATTTCCGTCCCCGTATGTTTCGACGAAGCCGAAGCCCAAACCCTGGGATGGACGAATTAAGGACAGTACAGGCAATAGTGTTCTCCCATACTCGGAACAGACGCCTCATTTTTTCGCGAAATCGATCTGGGTTGGAAGAAGTTCCAGTGCAGAGTTCCTATCACACAATTCGTTCTCGGGGCTTCCGGCCCATAAGCGATACTACTGTCTTTGCGAGCTAACGATTACATTCGGGGGAACTTAGCTCGGACTGGACCAGGTACATGATCCAAAGCGCGATGTCTGTCTGTTACCCTCACCGCCGCTCTTTTATCGGGTA
GCGTAGTAGGTTCGCGTACCTAGTTCCGCCGAAAAGACAAAGGAGAAGGGAATGCTCCTAGTAGTTTCAGTCTAGCAAACATGTTATAACGCTAACTGTGTGCTGCAAAAAGGATTTGAACCCAAATTTTAAAGCGCTGATCGACAGAACGCTGTTGAAGAGGCGATGGTACTGAGATTCCCCAGAAACCACCTCCGCGCTATGTGCTCAAGACAACCCGCATTCGTTTTTACTAGATTTGGAGCCGAGTTGTGATTTGGATATTTTCACATAAGACCGAGCAGGAAATATACCTTGTTGCAGCTATTGACCCCGTTCTCTCGGAAATCCATGGAATAGTCTTCGGATATTCGTACCAATGGGCGCGATGTTGCGATAAGAGAGCACATTTCATTAAGTGGTGCTCCGCCGCTAAGATGGGAAGGGGCGAGTCTATCGCAGCATCGAAGGCTGAGTTGGCCATTGCCGAGAGTATACATATTTACGATCACACTCGCATAGTCCCACGCATTACGTCCGAGATAGTATGTCCCAATGCAACCTAAAGCCGCGAGATTCCCTAAGGAGAAAATTAAACACTGGAAATTAGGTGATGCTACATCCCATGGACACTTTCGGAACAATATCGGTGACACACATCATCCGTGATCCCGTGATATTTCATCCATGGAGAGAGTATGGTTTTACTACACCTGGTCTAGGCCAAGCCTAACCCCCTGTTCATCCGTTTTATACGAGTATTACCTTGACGACCATAGAGGATAGACTCGGTATCCCGCACACTCTACACACACGACTTAATCCGCTCCACGACCTTCCTAGCGATCTTTGGCGCAGCCGGTTCGCGTATTTTACGACCAACTCGATGGATCCCAATTATCCCCCTGGTAGTGCCCCTCCGCCTGAGAATTCGACGGGCGAGGTCCGGGGGACCGACATAGAGTGGAATGCTTCTTTCCGGGATAACACGTGATTGACATAAAAATGTAGGGCAGATAGGCATCGTTAGCACCTCTCTCCTTGCTGCACTGCGTTTATCGATCGAATTCAAGACTTGTGCATGTTGAAAACAACCTCGCGTTATCCCTGCTATTTGCTTCAGAGCCGTAGGAGGGGACCATGCGTGAGTCCTCCTGAGCAACCTCAATT
Output
844

4
rosalind/data/minimum_skew.txt

File diff suppressed because one or more lines are too long

10
rosalind/data/motif_enumeration.txt

@ -0,0 +1,10 @@ @@ -0,0 +1,10 @@
Input
5 2
TCTGAGCTTGCGTTATTTTTAGACC
GTTTGACGGGAACCCGACGCCTATA
TTTTAGATTTCCTCAGTCCACTATA
CTTACAATTTCGTTATTTATCTAAT
CAGTAGGAATAGCCACTTTGTTGTA
AAATCCATTAAGGAAAGACGACCGT
Output
AAACT AAATC AACAC AACAT AACCT AACTA AACTC AACTG AACTT AAGAA AAGCT AAGGT AAGTC AATAC AATAT AATCC AATCT AATGC AATTC AATTG ACAAC ACACA ACACC ACACG ACACT ACAGA ACAGC ACATC ACATG ACCAT ACCCT ACCGT ACCTA ACCTC ACCTG ACCTT ACGAC ACGAG ACGAT ACGCT ACGGT ACGTC ACGTT ACTAA ACTAG ACTAT ACTCA ACTCC ACTCG ACTCT ACTGA ACTGC ACTGT ACTTA ACTTC ACTTT AGAAA AGAAC AGAAG AGAAT AGACA AGACT AGATA AGATC AGCAT AGCCA AGCGT AGCTA AGCTC AGCTG AGCTT AGGAT AGGTA AGGTC AGTAA AGTAC AGTAT AGTCC AGTCG AGTCT AGTGA AGTTG ATAAA ATAAC ATACA ATACC ATAGA ATATA ATATC ATATG ATATT ATCAG ATCCC ATCCG ATCCT ATCGA ATCGC ATCTA ATCTC ATCTG ATGAC ATGAT ATGCA ATGCC ATGGA ATGGC ATGTA ATGTC ATTAA ATTAC ATTAG ATTAT ATTCA ATTCC ATTCG ATTGA ATTGC ATTGG ATTGT ATTTA ATTTC ATTTG ATTTT CAAAG CAACC CAACT CAAGA CAAGC CAATA CAATT CACAC CACAG CACCT CACGT CACTA CACTT CAGAA CAGAC CAGAT CAGGT CAGTA CAGTC CATAA CATAC CATAG CATAT CATCC CATCT CATGA CATGT CATTA CATTG CATTT CCAAG CCATA CCATG CCATT CCCGT CCCTA CCCTT CCGAA CCGAC CCGAT CCGCT CCGGT CCGTA CCGTC CCGTG CCGTT CCTAC CCTAT CCTCA CCTCC CCTTA CCTTC CCTTG CCTTT CGAAA CGAAG CGACA CGACT CGAGT CGATA CGATG CGATT CGCAA CGCAT CGCCA CGCGA CGCTA CGCTC CGCTT CGGAC CGGAT CGGCA CGGTA CGGTC CGGTT CGTAA CGTAC CGTCA CGTCG CGTCT CGTTA CGTTT CTAAC CTAAG CTAAT CTACA CTACC CTACG CTACT CTAGA CTAGC CTAGG CTAGT CTATA CTATC CTATG CTATT CTCAT CTCCG CTCGT CTCTA CTCTT CTGAA CTGAG CTGCA CTGCC CTGTA CTGTT CTTAA CTTAC CTTAG CTTAT CTTCA CTTGA CTTTA CTTTC CTTTG CTTTT GAAAT GAACA GAACT GAAGT GAATG GAATT GACAC GACAT GACCA GACCT GACGT GACTT GAGAA GAGAT GAGCT GATAA GATAC GATAG GATAT GATCA GATCC GATCG GATCT GATGT GATTA GATTC GATTG GATTT GCAAT GCACT GCATC GCATT GCCAT GCCGT GCCTA GCCTT GCGAT GCGGT GCGTC GCGTT GCTAA GCTAC GCTAG GCTAT GCTGA GCTGT GCTTA GCTTT GGAAT GGACA GGATA GGATC GGATT GGCTA GGGAT GGTAC GGTAG GGTAT GGTCA GGTCG GGTTA GTAAA GTAAG GTACA GTACC GTACG GTAGA GTATA GTATC GTATG GTATT GTCAA GTCAG GTCCG GTCCT GTCGA GTCGC GTCGT GTCTA GTCTG GTGAA GTGAG GTGCA GTGCG GTTAA GTTAC GTTAG GTTAT GTTCA GTTCC GTTCG GTTGA GTTTA TAAAC TAAAG TAACA TAACC TAACT TAAGA TAAGC TAATA TAATC TACAC TACAG TACCC TACCG TACCT TACGA TACGC TACGT TACTA TACTC TACTG TAGAA TAGAC TAGAG TAGAT TAGCC TAGCG TAGGA TAGTC TATAA TATAC TATAT TATCA TATCC TATCG TATGA TATGC TATGG TATGT TATTA TATTG TCAAC TCAAT TCACC TCACG TCACT TCAGA TCATA TCATG TCCAA TCCAC TCCAG TCCAT TCCCA TCCCT TCCGA TCCGC TCCGT TCCTA TCCTG TCCTT TCGAA TCGAC TCGAT TCGCC TCGCT TCGGA TCGGC TCGGG TCGGT TCGTC TCTAC TCTAG TCTAT TCTCC TCTCT TCTGG TCTGT TCTTA TCTTT TGAAA TGAAC TGAAT TGACA TGACC TGACT TGAGA TGAGC TGAGT TGATA TGATC TGATG TGATT TGCAA TGCAC TGCAG TGCAT TGCCA TGCCG TGCCT TGCGA TGCGT TGCTT TGGAA TGGAT TGGTA TGTAA TGTAG TGTAT TGTCC TGTCG TGTGG TGTTA TTAAA TTAAC TTAAG TTAAT TTACA TTACC TTACG TTACT TTAGA TTAGC TTAGG TTAGT TTATA TTATC TTATG TTATT TTCAA TTCAC TTCAT TTCCA TTCCC TTCCT TTCGA TTCGG TTCGT TTCTA TTCTG TTGAA TTGAC TTGAG TTGAT TTGCA TTGCG TTGGA TTGGG TTGTG TTTAA TTTAC TTTAG TTTAT TTTCA TTTCC TTTCG TTTGA TTTGG TTTTA TTTTG

2624
rosalind/data/neighbors.txt

File diff suppressed because it is too large Load Diff

5
rosalind/data/number_to_pattern.txt

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
Input
5353
7
Output
CCATGGC

19953
rosalind/data/overlap_graph.txt

File diff suppressed because it is too large Load Diff

5
rosalind/data/pattern_count.txt

File diff suppressed because one or more lines are too long

5
rosalind/data/pattern_matching.txt

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
Input
ACACCA
CCGAACACCCGTACACCGAACACCACACCACACCTTGCACACCACACCTACACCACACACCACACCGGACACCCACACCCACACCACGAACACCGAGAGTACACCTACACCTGACACCGGGGATCGTCACACCAAGTGGTGATACACCCACACCCTTTACACCTACACCACACCCGTACACCCTGAACACCACACCTAGAGAGTTGCACACCTCACACCGAAGGCACACCACACCATCCACACCATAAACACCGTTAACACCGTAGAACACCCAGCACACCCTTACCGCATACACCGACGTTAGACACCCACACCGGCAGTCACACCGTACACCCATTCGGTCCACACCCTACACCGCCTGCCACACCTACTGAGTTACACCGCATGACACCATTATCCGAACACACCAATATACACCAACACCATACACCATTTAACACCCCAAAACACCGACACCGACACCGCAAGCCCACACCACACCCACACCACAGACACCTACACCGTTTAGACACCAACACCGACACCACACCCCACACCCAAGACACCGCTACACCCTGCTGGACACCGACACCTACACCTCACACCGGACACCGCACACACCGCCACACCAATCACACCACACCACACCAGTACAACACCGACACCTACACCACACCACACCCAGATACACCCACACCGGACACCACACCAAACACCATTACACCCACACCGGTACACCACACCTCGTACACCAAGTAGACACCCAACACACCACACCTTGATGACACCTGACACCATACACCAAACACCACACCGAGGTAGACACCACACCGCCATCGACCACACCCTGACACCATACACCACACCACACCTAGTCGACACCCACACCCTCACACCTGACACCCGCGGCATACACCCACACCACTTACACCTACACCGGGGGAAACACCGAAACACCTCAACACCGGACACCACACCTAAGACACCGGGCGATACACCTGACCCTGACACCACACCACACCCAACACCCGAACACCACACCCAAACCTTGACACCCACACCAAAACACCCTTTATTAAAACACCCCGACCACCAAACACCACACCCCACACCGAACACCCACACCGCATACACCGGTCACACCTTATCTCGCCCACACCCTACACCCCACACCACACCACACCACACCGTACCACACCACACCCCCACACCAAAACACCACACCACACCGGTTACACCCCACACCAACACCCACACCATTACACCTACACCGCAACACCTGCACACCACACCAAGACTGGAGACACCTACCACACCCTCGTTTACACCACCTGACACCTTACACCTCCGACACCAAAAACCCGTTGGGTCATCGGATCAGGACACCTTTACACCACACCTTCGAGGACACCACGGACACCACACCCCACACCACACCGGTACACCGCGTTCACACCTCACACCGACACCACACCCCCTGAACTGTATACACCACACCACACCAACCCAACACCCTAGAAGACACCTGCCACACCTTACACCACACCACCGACACCAACACCCAAACACCTTTGACACACCACACCAACACCGTACACCGCAACACCCGCATTACACCTTACACCACACCACACCCCCCTACACCCACACCACACCCTCGGACACCAGTACACCACACCACAGATAGACACCATACACCTTACACCACATACACCTTTCACACCACACCCACACCCCGCTTAGACACCGACACCACACCACACCTGACACCACACCTCGCACACCGCCCTTACACCACACCCCAGCAGAAAACGAACACCCACACCACACCACACACCACACCACACCACACCGACACCTGACACCTAAACACCCCCACACCACACCTCTCCAACACCACACCAACACCTACACCAGAAAGACACCGACACCCGACACCCGCTGTTGTACACCCACACCATCGACACCACACCACACCACACCCTACACCGGCACACCATGCAAACACCACACACCTGGACACCCACACCACACCGCACACCACACCACACCTACACCACCGACACCACACCACACACCTACTCCACAACACCTACACCAAACACCCTACACCTACACCTACACCTACATACACCTACACCTAATATTATGGACACCACACCTTCAGACACCGTACACCACACACCCTATGTTACACCACAGGCAGAATTTGACACCTCACACCCACACCCACACCCGCACACCACACCAACACCACACCACACCCCCAACACCGCTCTTACACCTTACACCGACACCAACACCGACACCGACACCACACCCCAATATCCCTCACACCACACCTAACCAGTATACACCGTTGACAACACCCCAATTTACACCCCATACACCTCAGACCACACACCGGACGGGCAACACCTACACCGATGTTACTTTACACCGGGCTCGCGGACACCACTCGACACCAACACCCGACACCTTACACCACACCAGCTGCGTGAACACCTACACCATCCCAACACCACACCGACACCGTATGGACACCTACACCTCGAGAGTTCCGCTAGAACACCACACCCATACACCATACACCGCGTACACCGAACACCGACACCCACACCACACCCAATGACACCGATGACACCGGCTCGATACACCTACACCGAACACCATCAGACACCGCGTACACCCAACACCTGACACCAACACCGCGGCACACCTAGTGACACCTACACCTACACCACACCATACACCCTACACCGATGAACACCAACACCACTCTAAACACCCAGGACACCAACACACCTAGACACCACACCAACGACAGAGACACCCTACACCTGCCAAGCTTTACACCATTGGTGAATCACACACCACACCAACACCACACCACACCGCTTACACCCGACCCGAAAACACCCACACCACACCAACACCACACCACATTACTCCCGTTACACCTACACCAACACCACACCTTTACACCACACCCAGCAACACCACACCAAATGGACACCACACCACACCACACCTTAGCCGATGTGCCGACACCGCTGTCGTCACACCAGTGACACCTTAGCGTACACACCACACCCAACACCTACACCACACCCGAAACACCTGACACCACACCACACCACACCCTACACCACACCATGACCACACACCAGCCGACACCACACCATACACCTACACCGAAACACCTTTCTACACCACACCACACCTGAACACCTAGTCACACCACGACACCAACACCTGACCACACCGGGGGACACCTTTGGAACGACACCTAACACCGCCACACCACACCACACCCGACACCTATAACACCACACCACACCACACCAAAGGCACACCTTAACACCCACACCAAGGGCTACACCACACCACACCTCCAAAACAAGGGACACCACACCCAACACCACACCACACCGCGTGGACACCACACCTTGACACCAAATTGTGCACACCACACCTGCACACCTTAAGAACGACACCGTCAGTACACCGAAACCCTATGACACCTGGGACACCTGGCACACCAACTACACCACACCCACACCACACACCTGGACACCGTTTCGCGAGTGTGGGTTGCTTGACACCACACCACACCGCGGCCTTACACCGCACACCGTAAACACCGTTGACACCTCATTACTCGACACCACACCGCACACCCACACCCGACACCGAACACCACACCTGGGCATACACACCACACCGTACACCTACACCACACCTGTGCTACACCAGGGGTACACCACACCTAGTACACCACACCGATACACCCACACCACACCACACCCACCAACACCACACCATCAAGAACACCCTATACACCCACACCACACCTACACCACACCCTACACCACACCACACCACACCATCGACACCTACACCACACCAACACCACACCAAACACCACACCCACACCCGGACACCACACCCACACCACACCATAACACCTAACACCACACACCTACACCTACTCTGCTAAACACCCAACACCTCTACACCCTGCCGACACCGCGACACCGGCGACACCCTGTTACACCACACCTCACACCTTCGACACCAGCCAGAGACACCGGACACCGACACCCCGAACACCAACACACCCGA
Output
19 24 38 49 56 80 128 164 186 225 230 239 387 403 413 419 426 471 482 508 520 604 613 618 623 646 651 679 684 691 713 727 747 770 777 784 801 829 836 841 897 947 986 991 1011 1036 1075 1148 1153 1158 1173 1186 1194 1199 1220 1232 1262 1267 1303 1329 1369 1386 1395 1407 1444 1467 1472 1477 1516 1521 1530 1555 1560 1599 1604 1625 1640 1648 1653 1666 1680 1698 1728 1733 1745 1770 1800 1805 1812 1817 1822 1856 1872 1877 1889 1933 1942 1947 1952 1972 1983 2004 2016 2021 2032 2041 2046 2073 2131 2153 2172 2218 2223 2229 2234 2272 2290 2312 2430 2440 2460 2465 2486 2497 2547 2560 2595 2645 2678 2716 2721 2745 2751 2772 2788 2793 2831 2849 2854 2860 2865 2900 2905 2911 2916 2941 2947 2960 2975 2980 2991 2996 3001 3040 3063 3081 3102 3107 3112 3124 3129 3142 3152 3157 3188 3193 3216 3224 3279 3284 3305 3310 3315 3320 3345 3357 3362 3385 3397 3402 3418 3431 3445 3517 3526 3537 3580 3585 3643 3675 3694 3712 3728 3739 3753 3772 3777 3792 3797 3824 3835 3847 3852 3857 3862 3877 3882 3888 3893 3900 3919 3930 3935 3950 4032 4053 4088

4
rosalind/data/pattern_to_number.txt

@ -0,0 +1,4 @@ @@ -0,0 +1,4 @@
Input
CTTCTCACGTACAACAAAATC
Output
2161555804173

4
rosalind/data/reverse_complement.txt

@ -0,0 +1,4 @@ @@ -0,0 +1,4 @@
Input
GCACTAAAGCACCAGCGAGACTAGACAGTGCCTTACGCTGTATAGGGATAAAAGTTGTCAAGATGACTTGCGGGAATCGTTAGGCTGACACGCACTAATGCTCGCCTTCCGGGTGTTCTGTGAGTACGGTTGATCACGGTCGCCCTGCGGATGTACTACCATGAAAGTTGATCACGTGCCGCGCGCTCCCTAAGCTTAGAAGTTTGCACAATCTGCATTCTATCCTGCCACGCCTTCAATAATAAGTGGTGTATGCAATTTGGAGTCGATCTGGGAACCAACGATTAACTTGGGAAGTGGCTATATCAAAATACGATGTCTTCAGCGTCGCGGTCGACGCTGCGCAACGAACGAAAAGTCCGATGGACCCGAACTCTGATTATACCGAATCTCCGCTTTTACGACTCGCCACATACCGGCATAAGCCATTCTGGGGCTTTGCCCCCTTAGGTCTAGCCCACCCCCGACCTAGCTTGAGCGTGTCACACCCCAACAGCCGCATTACGCCCGCTCACCGACACTTGGCGGTCGTATAAGAAATCCAAAACCGAGACGAAAACTGAAGAATAAGGTTCATTCAGCATTGTGGAGTTGACAACATCAGTATGAGGGTGAGTTGCGTCAAAGTCGAAGAATATGGAGGGTCAAATCACGAGATGTAACATCCACGCGAACACTTAGCTAGTAATCATTTTTCCGTAAAGAGTCGTTGAGTCCGACCAGTTGAAGCTCAGTGTTTATCCGGTAGGGAATTGTAGGATCAACGATAGGGTCGCGGAACCGCCGTATTATAGAAAGAGATAGTCCCAACGTTCTTTATGCACTTCGCTGAGAGAGGGTGACCGGGCACGCAGAGACTTTGGCTTTGTAGCCCCATTCCGCGGCTCTTCGGATACTGACTGAGCTGTAGTCGGCACATCCTTTACAACAAAAAAGCTCATGTCCGAGATTTTAATGGCGGCGCACGGTCACTCGGAGTTGACGAATGCGCAGCGAATCGTTGGTTCCAGATAAAGGCAAGGCTGTGTTACTGTTTCGGAGGGCAATCGTCAACGAGCAAAGATGTTAGAATAGAAATCGGAGCGAGGCTCCCAGCAAATATGAGTTAGGATCTTTTTTGCGAAAGGGTTGGTCTCCATCTCCTCTCGCCTGCGAGCGAGTCCCCGAAGCACGTTCAACCTATTTGATTCGGTGCAGGACACCCTAGATTAGCATACAGGTATAATATCAGGAAGAGTCACCTTTCATTCCCGACCAGTAGGATGTATAGGAATGAGACTATCCAGTTCTTTGTCAGCTCAAGACAGCGTTGGCAATACGGCCGAGTATTGGGGGGAATACCCCGGAACATAGTATTGTGCCTTAGCTATTGCCCTAGATACCACGCGGCCCTTGAGCATTTGTCTACACTTTGGTGATCCTAGGCACCCCGCGCTCGTGGCAACGTCAGCATCTTGTGATAGCAAAGCGTATGTACCTGTAATGTAACATCAAAGTATATCGGCACCCTAGTGGGGGCGAAGGTTGGATCGCTTATCACTCGGGACGACGGTGGTATCCAGCCACAGTGTTGCTCATTAACGACCACACAGCTCTTGGAATCGAGCCATGGACAGGGGACGCCCCAGGATACATGATGTTCCTGTGAGCACAAGCACTATGGCAGGCTTAGAGCTAATTCTTCCATTGGGCCGGTAAGACGCCAGAGAAAGTCACCGGTGTGAGAAAGGGTTTCGTGTGGGGGAGGCGTCAAACAACAAGGATTTACGTCGAACCGATCAGCCCTTGTCTGATTCATTCCAGGTTTAAGCGAGCCCTGGCGGTGACCTCCCGGGGATTCTTGGTGACGATAAGTGTAGACTGGTTTATGACTGTCTATAAGTGCAAGCAGTCCGCGACTCGGCCGCTCCTCAGATCTCGTCCTCCCAATCCTTACGAGGCACTATTCCGGCCCTAAAAACTTACCTACCAACCGGACATAGCGAACGGTCTAAGTTTTCGGAAATTGAATAACACTCGAACAAAGGAGCCCAATACATGGCACAAGCACACATAAAGCTTGGCGCTGCTGACGGCCGGCCCCCACAGCAGGTGGGTATATCAGGATAATGCTCTACCTCCTCGGGGATGACCAGAGACGAACGTTCGGACGCTATTAGTTAGTGGTCGCCCAGATATTCTCCTAATCAAGCCCTCGAAGGCTAGTCTAAATTTTAGCAAAAACTCGTATAGCAGCACATGCGGTAGACTGGGCCTCAGCCAGGTAGAGCTGTGGCTGCACTCGAGCAATCACTACCGTATAGAGTGGTGTTATTTCGGGGTGAATGTCAGGGGTGGTCCAAAATCACAAACACGTCTATTCGCACCCGGGAATGCTCATGTTCCCACGGCGGGCCTGTACAGATGTGAGAGGCAGCGATCATACAAAGTTGCCTGGCCTCCCCACGAACACACGGCGGCCCATTAGGTCTGAACAGGTTTATCGTTAATATATTTTGCGGTGG
Output
CCACCGCAAAATATATTAACGATAAACCTGTTCAGACCTAATGGGCCGCCGTGTGTTCGTGGGGAGGCCAGGCAACTTTGTATGATCGCTGCCTCTCACATCTGTACAGGCCCGCCGTGGGAACATGAGCATTCCCGGGTGCGAATAGACGTGTTTGTGATTTTGGACCACCCCTGACATTCACCCCGAAATAACACCACTCTATACGGTAGTGATTGCTCGAGTGCAGCCACAGCTCTACCTGGCTGAGGCCCAGTCTACCGCATGTGCTGCTATACGAGTTTTTGCTAAAATTTAGACTAGCCTTCGAGGGCTTGATTAGGAGAATATCTGGGCGACCACTAACTAATAGCGTCCGAACGTTCGTCTCTGGTCATCCCCGAGGAGGTAGAGCATTATCCTGATATACCCACCTGCTGTGGGGGCCGGCCGTCAGCAGCGCCAAGCTTTATGTGTGCTTGTGCCATGTATTGGGCTCCTTTGTTCGAGTGTTATTCAATTTCCGAAAACTTAGACCGTTCGCTATGTCCGGTTGGTAGGTAAGTTTTTAGGGCCGGAATAGTGCCTCGTAAGGATTGGGAGGACGAGATCTGAGGAGCGGCCGAGTCGCGGACTGCTTGCACTTATAGACAGTCATAAACCAGTCTACACTTATCGTCACCAAGAATCCCCGGGAGGTCACCGCCAGGGCTCGCTTAAACCTGGAATGAATCAGACAAGGGCTGATCGGTTCGACGTAAATCCTTGTTGTTTGACGCCTCCCCCACACGAAACCCTTTCTCACACCGGTGACTTTCTCTGGCGTCTTACCGGCCCAATGGAAGAATTAGCTCTAAGCCTGCCATAGTGCTTGTGCTCACAGGAACATCATGTATCCTGGGGCGTCCCCTGTCCATGGCTCGATTCCAAGAGCTGTGTGGTCGTTAATGAGCAACACTGTGGCTGGATACCACCGTCGTCCCGAGTGATAAGCGATCCAACCTTCGCCCCCACTAGGGTGCCGATATACTTTGATGTTACATTACAGGTACATACGCTTTGCTATCACAAGATGCTGACGTTGCCACGAGCGCGGGGTGCCTAGGATCACCAAAGTGTAGACAAATGCTCAAGGGCCGCGTGGTATCTAGGGCAATAGCTAAGGCACAATACTATGTTCCGGGGTATTCCCCCCAATACTCGGCCGTATTGCCAACGCTGTCTTGAGCTGACAAAGAACTGGATAGTCTCATTCCTATACATCCTACTGGTCGGGAATGAAAGGTGACTCTTCCTGATATTATACCTGTATGCTAATCTAGGGTGTCCTGCACCGAATCAAATAGGTTGAACGTGCTTCGGGGACTCGCTCGCAGGCGAGAGGAGATGGAGACCAACCCTTTCGCAAAAAAGATCCTAACTCATATTTGCTGGGAGCCTCGCTCCGATTTCTATTCTAACATCTTTGCTCGTTGACGATTGCCCTCCGAAACAGTAACACAGCCTTGCCTTTATCTGGAACCAACGATTCGCTGCGCATTCGTCAACTCCGAGTGACCGTGCGCCGCCATTAAAATCTCGGACATGAGCTTTTTTGTTGTAAAGGATGTGCCGACTACAGCTCAGTCAGTATCCGAAGAGCCGCGGAATGGGGCTACAAAGCCAAAGTCTCTGCGTGCCCGGTCACCCTCTCTCAGCGAAGTGCATAAAGAACGTTGGGACTATCTCTTTCTATAATACGGCGGTTCCGCGACCCTATCGTTGATCCTACAATTCCCTACCGGATAAACACTGAGCTTCAACTGGTCGGACTCAACGACTCTTTACGGAAAAATGATTACTAGCTAAGTGTTCGCGTGGATGTTACATCTCGTGATTTGACCCTCCATATTCTTCGACTTTGACGCAACTCACCCTCATACTGATGTTGTCAACTCCACAATGCTGAATGAACCTTATTCTTCAGTTTTCGTCTCGGTTTTGGATTTCTTATACGACCGCCAAGTGTCGGTGAGCGGGCGTAATGCGGCTGTTGGGGTGTGACACGCTCAAGCTAGGTCGGGGGTGGGCTAGACCTAAGGGGGCAAAGCCCCAGAATGGCTTATGCCGGTATGTGGCGAGTCGTAAAAGCGGAGATTCGGTATAATCAGAGTTCGGGTCCATCGGACTTTTCGTTCGTTGCGCAGCGTCGACCGCGACGCTGAAGACATCGTATTTTGATATAGCCACTTCCCAAGTTAATCGTTGGTTCCCAGATCGACTCCAAATTGCATACACCACTTATTATTGAAGGCGTGGCAGGATAGAATGCAGATTGTGCAAACTTCTAAGCTTAGGGAGCGCGCGGCACGTGATCAACTTTCATGGTAGTACATCCGCAGGGCGACCGTGATCAACCGTACTCACAGAACACCCGGAAGGCGAGCATTAGTGCGTGTCAGCCTAACGATTCCCGCAAGTCATCTTGACAACTTTTATCCCTATACAGCGTAAGGCACTGTCTAGTCTCGCTGGTGCTTTAGTGC

9517
rosalind/data/string_composition.txt

File diff suppressed because one or more lines are too long

1020
rosalind/rosalind_ba1.go

File diff suppressed because it is too large Load Diff

1121
rosalind/rosalind_ba1_test.go

File diff suppressed because it is too large Load Diff

974
rosalind/rosalind_ba2.go

@ -0,0 +1,974 @@ @@ -0,0 +1,974 @@
package rosalind
import (
"errors"
"fmt"
"math/rand"
"strings"
"time"
)
////////////////////////////////
// BA2A
// Given a collection of strings Dna and an integer d,
// a k-mer is a (k,d)-motif if it appears in every
// string from Dna with at most d mismatches.
func FindMotifs(dna []string, k, d int) ([]string, error) {
for _, input := range dna {
if !CheckIsDNA(input) {
msg := fmt.Sprintf("Error: input was not DNA: %s\n", input)
return nil, errors.New(msg)
}
}
// Pseudocode:
// for each dna string:
// get hamming neighbor histogram k,d
// KmerHistogramMismatches(input,k,d)
// find intersection of all hamming neighbor histogram key sets
// start using GoDS -
// efficient data structures.
// learn from them and use them.
// For each dna string:
sets := make([]map[string]int, len(dna))
for i, input := range dna {
// Get hamming neighbor histogram
hist, _ := KmerHistogramMismatches(input, k, d)
// Add each Hamming neighbor to a hash set
sets[i] = hist
}
// Now we want the intersection of
// all of the key sets
intersect, err := KeySetIntersection(sets)
if err != nil {
return nil, err
}
return intersect, nil
}
// Find the intersection of the key sets
// for a slice of string to integer maps.
func KeySetIntersection(input []map[string]int) ([]string, error) {
saves := []string{}
for key := range input[0] {
// Assume this kmer is in each histogram
in_everyone := true
// Iterate over each histogram and
// make note if it is missing
for i := 1; i < len(input); i++ {
hist := input[i]
if hist[key] == 0 {
in_everyone = false
break
}
}
// If this kmer is in everyone's
// Hamming neighbor histogram,
// save it
if in_everyone {
saves = append(saves, key)
}
}
return saves, nil
}
////////////////////////////////
// BA2b
// Given a k-mer pattern
// and a longer string text,
// find the minimum distance
// from k-mer pattern to
// any possible k-mer in text.
func MinKmerDistance(pattern, text string) (int, error) {
// Algorithm 1 (faster):
//
// Run a sliding window over the input string,
// and extract all k-mers of width window and
// add them to a window set.
//
// Once the set is assembled, compute the
// distance from k-mer pattern to k-mers
// in the window set, and add to distance map.
hist, err := KmerHistogram(text, len(pattern))
if err != nil {
msg := fmt.Sprintf("Error: KmerHistogram(%s,%d) returned error",
text, len(pattern))
return -1, errors.New(msg)
}
min_dist := len(pattern) // max possible value
for kmer := range hist {
d, err := HammingDistance(pattern, kmer)
if err != nil {
msg := "Error: HammingDistance() returned error"
return -1, errors.New(msg)
}
if d < min_dist {
min_dist = d
}
}
// // Algorithm 2 (slower):
// //
// // Run a sliding window over the input string,
// // and compute the distance between the k-mer
// // pattern and the k-mer in the window.
// //
// // This is slow if we have small k and large
// // input string length, or many duplicate
// // distance calculations (e.g., 1M ATGATGATG).
// k := len(pattern)
// overlap := len(text) - k + 1
// min_dist := k // max possible value
// for i := 0; i < overlap; i++ {
// this_kmer := text[i : i+k]
// dist, err := HammingDistance(this_kmer, pattern)
// if err != nil {
// msg := "Error: HammingDistance() returned error"
// return -1, errors.New(msg)
// }
// if dist < min_dist {
// min_dist = dist
// }
// }
return min_dist, nil
}
// Given a k-mer pattern
// and a set of strings,
// find the sum (L1 norm)
// of the shortest distances
// from k-mer pattern to
// each input string.
func MinKmerDistances(pattern string, inputs []string) (int, error) {
s := 0
for _, text := range inputs {
d, err := MinKmerDistance(pattern, text)
s += d
if err != nil {
msg := fmt.Sprintf("Error: MinKmerDistance(%s,%s) returned error",
pattern, text)
return -1, errors.New(msg)
}
}
return s, nil
}
func MedianString(dna []string, k int) ([]string, error) {
// Algorithm:
// start with set of DNA strings dna_i
// turn each string into set of k-mers
// set_dna_i is set of k-mers from string dna_i
// for this_kmer in all_kmers:
//
// for set in set_dna_i:
//
// min_dist = k
// for that_kmer in set:
// dist = dist(this_kmer,that_kmer)
// min_dist = min(min_dist,dist)
// Turn each DNA string into a set of kmers
histograms := make([]map[string]int, len(dna))
for i, dna_i := range dna {
h, err := KmerHistogram(dna_i, k)
if err != nil {
msg := fmt.Sprintf("Error: KmerHistogram(%s, %d) returned an error",
dna_i, k)
return nil, errors.New(msg)
}
histograms[i] = h
}
// Total number of possible kmer
maxx := 1
for i := 0; i < k; i++ {
maxx *= 4
}
// Track min distance sum d(pattern,dna)
// for all possible kmer patterns
distances := make([]int, maxx)
// Iterate over every possible kmer
for iK := 0; iK < maxx; iK++ {
// Turn integer iK into kmer pattern
pattern, err := NumberToPattern(iK, k)
if err != nil {
msg := fmt.Sprintf("Error: NumberToPattern(%d,%d) raised an error",
iK, k)
return nil, errors.New(msg)
}
// Accumulate a min distance sum \sigma d(pattern,dna)
sigma_min_d := 0
// Iterate over every possible DNA string('s histogram)
for _, histogram := range histograms {
// Accumulate a min distance d(pattern,dna)
// for this kmer pattern
// and this DNA string
min_d := k
// Iterate over kmers in this DNA string('s histogram)
// (k,v - map)
for this_kmer, _ := range histogram {
d, err := HammingDistance(this_kmer, pattern)
if err != nil {
msg := fmt.Sprintf("Error: HammingDistance(%s,%s) returned error",
this_kmer, pattern)
return nil, errors.New(msg)
}
if d < min_d {
// New minimum
min_d = d
}
}
// Accumulate
sigma_min_d += min_d
}
distances[iK] = sigma_min_d
}
// Find the kmer corresponding to the minimum distance
running_min := distances[0]
var results_str []string
for i, d := range distances {
if d < running_min {
p, err := NumberToPattern(i, k)
if err != nil {
msg := fmt.Sprintf("Error: NumberToPattern(%d,%d) returned error",
i, k)
return nil, errors.New(msg)
}
// New running min, new min kmer
running_min = d
results_str = []string{p}
} else if d == running_min {
p, err := NumberToPattern(i, k)
if err != nil {
msg := fmt.Sprintf("Error: NumberToPattern(%d,%d) returned error",
i, k)
return nil, errors.New(msg)
}
// Another running min, another min kmer
results_str = append(results_str, p)
}
}
return results_str, nil
}
////////////////////////////////
// BA2c
// Given a slice of strings, determine
// the index of the given string.
func indexOfString(list []string, item string) int {
for i := 0; i < len(list); i++ {
if list[i] == item {
return i
}
}
return -1
}
// Given a profile matrix,
// and given a DNA input string,
// evaluate the probability of
// every kmer in the DNA string
// and find the most probable
// kmer in the text - the kmer that
// was most likely to have been
// generated by profile among all
// kmers in text.
//
// This particular method does not
// pay attention to order of occurrence
// of kmers.
func ProfileMostProbableKmers(dna string, k int, profile [][]float32) ([]string, error) {
nucleotides := []string{"A", "C", "G", "T"}
// Make sure we have well-formed inputs
if k < 1 {
msg := "Error: specified kmer length k was < 1\n"
return nil, errors.New(msg)
}
if !CheckIsDNA(dna) {
msg := fmt.Sprintf("Error: input was not DNA: %s\n", dna)
return nil, errors.New(msg)
}
if len(profile) != len(nucleotides) {
msg := fmt.Sprintf("Error: incorrect number of rows (%d) in profile, need 4, one for each nucleotide\n", len(profile))
return nil, errors.New(msg)
}
// Extract all k-mers occurring
// in the DNA string.
// We use the keys of this map for
// iterating over all kmers in the
// DNA string.
// Keys are not ordered!
hist, err := KmerHistogram(dna, k)
if err != nil {
return nil, err
}
// Compute the probability of each kmer
// by doing pairwise multiplication of
// probability of the nucleotide that
// occurs at the corresponding position.
//
// Keep track of the running maximum
// and the corresponding kmer(s).
var max_prob_kmer []string
max_prob := float32(-1.0)
for kmer := range hist {
probability := float32(1.0)
for j := 0; j < len(kmer); j++ {
ix := indexOfString(nucleotides, string(kmer[j]))
probability *= profile[ix][j]
}
if probability > max_prob {
max_prob = probability
max_prob_kmer = []string{kmer}
} else if probability == max_prob {
max_prob_kmer = append(max_prob_kmer, kmer)
}
}
return max_prob_kmer, nil
}
// Only return the _most_ probable kmer.
func ProfileMostProbableKmer(dna string, k int, profile [][]float32) (string, error) {
results, err := ProfileMostProbableKmers(dna, k, profile)
return results[0], err
}
// Return a list of kmers of length k that occur in a DNA string.
// This list preserves order in which the kmers appear in DNA.
// This list does not include duplicates.
func KmerInOrderList(dna string, k int) ([]string, error) {
// Ensure we have well-formed inputs
if len(dna) < k {
msg := "Error: KmerInOrderList called with a DNA string smaller than specified kmer size"
return nil, errors.New(msg)
}
// kmers is a list of kmers that occur, in order
// (without duplicates), in dna
var kmers []string
overlap := len(dna) - k + 1
for i := 0; i < overlap; i++ {
kmer := dna[i : i+k]
// Check if we already have this kmer
dupe := false
for j := 0; j < len(kmers); j++ {
if kmer == kmers[j] {
dupe = true
break
}
}
if !dupe {
kmers = append(kmers, kmer)
}
}
return kmers, nil
}
// This uses a probility matrix and evaluates
// all possible kmers in a DNA string to determine
// which kmers in the DNA string match the profile
// most closely.
//
// The greedy version maintains the order in which
// kmers occur in the original DNA string, and stops
// as soon as the first match is found.
func ProfileMostProbableKmersGreedy(dna string, k int, profile [][]float32) (string, error) {
nucleotides := []string{"A", "C", "G", "T"}
// Make sure we have well-formed inputs
if k < 1 {
msg := "Error: specified kmer length k was < 1\n"
return "", errors.New(msg)
}
if !CheckIsDNA(dna) {
msg := fmt.Sprintf("Error: input was not DNA: %s\n", dna)
return "", errors.New(msg)
}
if len(profile) != len(nucleotides) {
msg := fmt.Sprintf("Error: incorrect number of rows (%d) in profile, need 4, one for each nucleotide\n", len(profile))
return "", errors.New(msg)
}
kmers_inorder, err := KmerInOrderList(dna, k)
if err != nil || len(kmers_inorder) == 0 {
msg := fmt.Sprintf("Error: call to KmerInOrderList() failed: dna = %s, k = %d",
dna, k)
return "", errors.New(msg)
}
var max_prob_kmer string
max_prob := float32(-1.0)
for _, kmer := range kmers_inorder {
probability := float32(1.0)
for j := 0; j < len(kmer); j++ {
ix := indexOfString(nucleotides, string(kmer[j]))
probability *= profile[ix][j]
}
if probability > max_prob {
max_prob = probability
max_prob_kmer = kmer
} else if probability == max_prob {
// do nothing, be greedy
}
}
return max_prob_kmer, nil
}
////////////////////////////////
// BA2D
//
// This problem makes about as much sense
// as a camel in a jacuzzi.
//
// After much searching, and re-reading,
// found this great explanation:
//
// http://www.mrgraeme.co.uk/greedy-motif-search/
// ----------------------------
// Scored Motif Matrix struct
// Create a struct to hold a set of motifs (kmers)
// and their associated score. We continually assemble
// many of these possible sets of motifs, checking to
// find a set of motifs with a minimum score.
// The score is not updated dyanmically, see UpdateScore().
type ScoredMotifMatrix struct {
motifs []string
score int
}
// Constructor
func NewScoredMotifMatrix() ScoredMotifMatrix {
var s ScoredMotifMatrix
s.motifs = []string{}
s.score = -1
return s
}
// Add a motif to the motif matrix
func (s *ScoredMotifMatrix) AddMotif(motif string) error {
if len(s.motifs) > 0 {
if len(motif) != len(s.motifs[0]) {
msg := fmt.Sprintf("Error: could not add motif %s: length %d does not match existing motif length %d",
motif, len(motif), len(s.motifs[0]))
return errors.New(msg)
}
}
s.motifs = append(s.motifs, motif)
return nil
}
// Update the value of the score of a ScoredMotifMatrix.
// This assembles a kmer composed of the most common
// nucleotide per position, then computes the sum of
// the Hamming distances from that kmer for all motifs.
func (s *ScoredMotifMatrix) UpdateScore() error {
if len(s.motifs) == 0 {
msg := fmt.Sprintf("Error: call to scored matrix motif UpdateScore() method failed: there are no motifs!")
return errors.New(msg)
}
// Params
t := len(s.motifs)
k := len(s.motifs[0])
// Start by assembling a "most common"
// mer - the kmer containing the most
// probable nucleotide at each position.
most_common_kmer := make([]string, k)
// Loop over every nucleotide
for ik := 0; ik < k; ik++ {
// Determine most common nucleotide
// using a map to count frequencies
frequency := make(map[string]int)
// Loop over every DNA string,
// count nucleotide frequencies
for it := 0; it < t; it++ {
bp := string(s.motifs[it][ik])
frequency[bp] += 1
}
// Determine most frequent nucleotide
var max_bp string
var max_freq int
max_freq = 0
for ibp, ibp_freq := range frequency {
if ibp_freq > max_freq {
// Set new maximum occurring base pair
max_freq = ibp_freq
max_bp = ibp
}
}
most_common_kmer[ik] = max_bp
}
commonkmer := strings.Join(most_common_kmer, "")
// Now that we have the common kmer,
// we can compute the score of each motif,
// and sum their scores to get the total score.
s.score = 0
for it := 0; it < t; it++ {
d, _ := HammingDistance(commonkmer, s.motifs[it])
s.score += d
}
// Done
return nil
}
func (s *ScoredMotifMatrix) MakeProfile(pseudocounts bool) ([][]float32, error) {
// Params
t := len(s.motifs)
k := len(s.motifs[0])
nucleotides := []string{"A", "C", "G", "T"}
// Profile is a 4 x k matrix of float32s
profile := make([][]float32, 4)
for jj := 0; jj < 4; jj++ {
profile[jj] = make([]float32, k)
}
// For each column, i.e. kmer nucleotide location,
// compute the probability
// of each of the four nucleotides
//
// P_i = N_i / sum_j N_j
//
for ik := 0; ik < k; ik++ {
counts := map[string]int{
"A": 0,
"C": 0,
"G": 0,
"T": 0,
}
// Populate counts
for it := 0; it < t; it++ {
nucleotide := string(s.motifs[it][ik])
counts[nucleotide] += 1
}
if pseudocounts {
found_zero := false
for _, nuc := range nucleotides {
count := counts[nuc]
if count == 0 {
found_zero = true
break
}
}
if found_zero {
for _, nuc := range nucleotides {
counts[nuc] += 1
}
}
}
// Sum all values
summ := 0
for _, nuc := range nucleotides {
summ += counts[nuc]
}
// Populate p_i
for inuc, nuc := range nucleotides {
val := float32(counts[nuc])
val /= float32(summ)
profile[inuc][ik] = val
}
}
return profile, nil
}
// ----------------------------
// BA2D and BA2E functions
//
// Note: the function below is for
// BA2D and BA2E, depending on the
// value of the pseudocounts boolean.
// Given an integer k (kmer size) and t (len(dna)),
// return a collection of kmer strings
// that have the lowest score (highest similarity).
// If at any step you find more than one
// Profile-most probable k-mer in a given
// DNA string, use the one occurring first.
// Boolean pseudocounts turns on/off pseudocounts.
func GreedyMotifSearch(dna []string, k, t int, pseudocounts bool) ([]string, error) {
var best_smm ScoredMotifMatrix
// bestmotifs is initially an empty list with score 0
best_smm = NewScoredMotifMatrix()
// One way of getting kmer motifs
// is to create a hash table of all
// motifs that occur in the DNA string.
// This does not maintain the original
// order in which the motifs occur.
// To be *greedy* we should pay attention
// to which one comes first.
motifs, err := KmerInOrderList(dna[0], k)
if err != nil {
msg := fmt.Sprintf("Error: call to KmerInOrderList() failed with params:\n\tdna = %s\n\tk = %d",
dna[0], k)
return nil, errors.New(msg)
}
for _, kmer_motif := range motifs {
// Create a new scored motif group
this_smm := NewScoredMotifMatrix()
// Add our motif, which we chose from dna[0]
// This motif kicks off the new motif group
this_smm.AddMotif(kmer_motif)
// Loop over all remaining dna strings
for i := 1; i < len(dna); i++ {
idna := dna[i]
// Form a profile matrix from
// all the motifs from dna strings
// up to, but not including, this one
profile, _ := this_smm.MakeProfile(pseudocounts)
// Use the profile to find the profile-most
// probable kmer in this string of dna, idna
result, _ := ProfileMostProbableKmersGreedy(idna, k, profile)
// Add the profile-most probable kmer
// to the list of motifs
if len(result) > 0 {
this_smm.AddMotif(result)
}
}
err := this_smm.UpdateScore()
if err != nil {
return nil, err
}
if this_smm.score < best_smm.score || best_smm.score < 0 {
best_smm = this_smm
}
}
return best_smm.motifs, nil
}
// ----------------------------
// BA2F functions
// Run a greedy motif search using regular counts.
func GreedyMotifSearchNoPseudocounts(dna []string, k, t int) ([]string, error) {
return GreedyMotifSearch(dna, k, t, false)
}
// ----------------------------
// BA2E functions
// Run a greedy motif search using pseudocounts.
func GreedyMotifSearchPseudocounts(dna []string, k, t int) ([]string, error) {
return GreedyMotifSearch(dna, k, t, true)
}
// ----------------------------
// BA2F functions
// Run a random motif search with pseudocounts.
func RandomMotifSearchPseudocounts(dna []string, k, t int) ([]string, int, error) {
s := rand.NewSource(time.Now().UnixNano())
r := rand.New(s)
pseudocounts := true
var result_motifs []string
var result_score int
// ---------------------------------
// Fencepost algorithm:
// Create a set of random motifs and calculate
// their score, once, before we go into the loop.
// Create a new scored motif group to create the first profile
this_smm := NewScoredMotifMatrix()
// Pick a random kmer motif from each DNA string
var ri int
for i := 0; i < len(dna); i++ {
// Non-inclusive [0,n)
overlap := len(dna[i]) - k + 1
ri = r.Intn(overlap)
result := dna[i][ri : ri+k]
this_smm.AddMotif(result)
}
// Update the first (currently best) score
this_smm.UpdateScore()
best_score := this_smm.score
// ---------------------------------
// Main loop:
// Get the profile from our current scored
// motif matrix, and use it to choose the
// profile-most probable kmers for the next
// round.
stop_loop := false
for stop_loop == false {
//fmt.Printf("----------------------------\n")
//fmt.Printf("Current motifs = %s\n", strings.Join(this_smm.motifs, " "))
//fmt.Printf("Current best score = %d\n", best_score)
// Get profile from this_smm first
profile, _ := this_smm.MakeProfile(pseudocounts)
//fmt.Printf("Current profile = \n%v\n", profile)
// Make a new scored motif matrix
next_smm := NewScoredMotifMatrix()
// Loop over all dna strings
for i := 0; i < len(dna); i++ {
// Use the profile to find the profile-most
// probable kmer in this string of dna, idna
result, _ := ProfileMostProbableKmersGreedy(dna[i], k, profile)
// Add the profile-most probable kmer
// to the list of motifs
if len(result) > 0 {
next_smm.AddMotif(result)
}
}
next_smm.UpdateScore()
next_score := next_smm.score
//fmt.Printf("Next motifs = %s\n", strings.Join(next_smm.motifs, " "))
//fmt.Printf("Next score = %d\n", next_score)
if next_score < best_score {
best_score = next_score
this_smm = next_smm
//fmt.Printf(" +++ Next motifs are better... continuing... new score = %d\n", best_score)
} else {
// This score does not improve the best score,
// so stop now and return prior result.
result_motifs = this_smm.motifs
result_score = this_smm.score
stop_loop = true
//fmt.Printf(" --- Next motifs are not better... ending... old score = %d\n", best_score)
}
}
return result_motifs, result_score, nil
}
// Driver function to run multiple random motif searches
// and keep the best of all runs.
func ManyRandomMotifSearches(dna []string, k, t, n int) ([]string, error) {
// Initial best motifs
min_bm, min_bm_score, _ := RandomMotifSearchPseudocounts(dna, k, t)
// Run algorithm n times,
// look for lowest score
for i := 0; i < n; i += 1 {
bm, bm_score, _ := RandomMotifSearchPseudocounts(dna, k, t)
if bm_score < min_bm_score {
min_bm_score = bm_score
min_bm = bm
}
}
return min_bm, nil
}
// ----------------------------
// BA2G functions
// Implement a Gibbs sampler with pseudocounts.
// The Gibbs sampler starts with random kmers,
// and samples kmers randomly generated from a
// Profile matrix. Better sampling makes the
// algorithm faster.
func GibbsSampler(dna []string, k, t, n int) ([]string, int, error) {
s := rand.NewSource(time.Now().UnixNano())
r := rand.New(s)
pseudocounts := true
var best_motifs []string
var best_score int
var ri int
// Create a new scored motif group to create the first profile
this_smm := NewScoredMotifMatrix()
// Pick a random kmer motif from each DNA string
for i := 0; i < len(dna); i++ {
// Non-inclusive [0,n)
overlap := len(dna[i]) - k + 1
ri = r.Intn(overlap)
result := dna[i][ri : ri+k]
this_smm.AddMotif(result)
}
// Update the first (currently best) score
this_smm.UpdateScore()
best_score = this_smm.score
// ---------------------------------
// Main loop:
// Pick out a random motif, and hold it out
// while we calculate a profile and a resulting
// profile-most-probable from all remaining
// motifs. If the motif score improves,
// keep the new motifs, otherwise toss 'em.
for j := 0; j < n; j++ {
// Non-inclusive [0,n)
ri = r.Intn(t)
// Now, we hold out a random motif from
// the current scored motif matrix.
// Make a new scored motif matrix that
// holds out that motif, and assemble it
// from all motifs except the holdout
holdout_smm := NewScoredMotifMatrix()
for i := 0; i < len(this_smm.motifs); i++ {
if i != ri {
holdout_smm.AddMotif(this_smm.motifs[i])
}
}
// Make profile with holdout motif missing
profile, err := holdout_smm.MakeProfile(pseudocounts)
if err != nil {
return nil, 0, err
}
// Use the profile to pick the
// profile-most-probable kmer
// (from DNA string ri)
// as our candidate motif
var candidate_motif string
candidate_motif, err = ProfileMostProbableKmer(dna[ri], k, profile)
if err != nil {
return nil, 0, err
}
// Add candidate motif to holdout motif matrix
holdout_smm.AddMotif(candidate_motif)
// Update the score and use it to determine if
// we keep the candidate motif or the old motif
err = holdout_smm.UpdateScore()
if err != nil {
return nil, 0, err
}
// Update current scored motif matrix with
// the candidate motif
this_smm.motifs[ri] = candidate_motif
err = this_smm.UpdateScore()
if err != nil {
return nil, 0, err
}
// If candidate motif leads to a better motif matrix
// (if holdout score > this score), replace old random
// motif with candidate motif.
// Otherwise, candidate motif did not improve
// the score, so keep the old motif.
if this_smm.score < best_score {
// Clear best_motifs and copy in this_smm.motifs
best_motifs = []string{}
for cc := 0; cc < len(this_smm.motifs); cc++ {
best_motifs = append(best_motifs, this_smm.motifs[cc])
}
best_score = this_smm.score
}
}
return best_motifs, best_score, nil
}
// Driver function to run multiple random motif searches
// and keep the best of all runs.
// n is the number of inner loops in one run of the Gibbs Sampler.
// n_starts is the number of times the Gibbs Sampler is run.
func ManyGibbsSamplers(dna []string, k, t, n, n_starts int) ([]string, error) {
// Initial best motifs
min_bm, min_bm_score, err := GibbsSampler(dna, k, t, n)
if err != nil {
return nil, err
}
// Run algorithm n times
for i := 0; i < n_starts-1; i += 1 {
// Get a new motifs and score
bm, bm_score, err := GibbsSampler(dna, k, t, n)
if err != nil {
return nil, err
}
// If we did better, save it
if bm_score < min_bm_score {
min_bm_score = bm_score
min_bm = bm
}
}
return min_bm, nil
}

882
rosalind/rosalind_ba2_test.go

@ -0,0 +1,882 @@ @@ -0,0 +1,882 @@
package rosalind
import (
"fmt"
"log"
"math"
"sort"
"strconv"
"strings"
"testing"
)
/////////////////////////////////
// BA2a Test
func TestKeySetIntersection(t *testing.T) {
gold := []string{"AAA", "BBB"}
m1 := map[string]int{
"AAA": 1,
"BBB": 2,
"CCC": 2,
"DDD": 2,
}
m2 := map[string]int{
"AAA": 2,
"BBB": 3,
"EEE": 3,
"FFF": 3,
}
m3 := map[string]int{
"AAA": 3,
"BBB": 4,
"GGG": 4,
"HHH": 4,
}
mslice := make([]map[string]int, 3)
mslice[0] = m1
mslice[1] = m2
mslice[2] = m3
results, err := KeySetIntersection(mslice)
if err != nil {
t.Error(fmt.Sprintf("Error: KeySetIntersection() returned error: %v", err))
}
// Sort before comparing
sort.Strings(gold)
sort.Strings(results)
if !EqualStringSlices(results, gold) {
msg := fmt.Sprintf("Error testing KeySetIntersection()\ncomputed = %v\ngold = %v",
results, gold)
t.Error(msg)
}
}
// Test the FindMotifs function using a single problem.
func TestFindMotifs(t *testing.T) {
k := 3
d := 1
dna := []string{"ATTTGGC", "TGCCTTA", "CGGTATC", "GAAAATT"}
results, err := FindMotifs(dna, k, d)
if err != nil {
t.Error(fmt.Sprintf("Error: FindMotifs() returned error: %v", err))
}
gold := []string{"ATA", "ATT", "GTT", "TTT"}
// Sort before comparing
sort.Strings(gold)
sort.Strings(results)
if !EqualStringSlices(results, gold) {
msg := fmt.Sprintf("Error testing FindMotifs():\ncomputed = %v\ngold = %v",
results, gold)
t.Error(msg)
}
}
// Test the FindMotifs function using a test matrix
// of debug cases.
func TestMatrixFindMotifs(t *testing.T) {
var tests = []struct {
k int
d int
dna []string
gold []string
}{
{3, 1,
[]string{"ATTTGGC", "TGCCTTA", "CGGTATC", "GAAAATT"},
[]string{"ATA", "ATT", "GTT", "TTT"},
},
{3, 0,
[]string{"ACGT", "ACGT", "ACGT"},
[]string{"ACG", "CGT"},
},
{3, 1,
[]string{"AAAAA", "AAAAA", "AAAAA"},
[]string{"AAA", "AAC", "AAG", "AAT", "ACA", "AGA", "ATA", "CAA", "GAA", "TAA"},
},
{3, 3,
[]string{"AAAAA", "AAAAA", "AAAAA"},
[]string{"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"},
},
{3, 0,
[]string{"AAAAA", "AAAAA", "AACAA"},
[]string{},
},
{3, 0,
[]string{"AACAA", "AAAAA", "AAAAA"},
[]string{},
},
}
for _, test := range tests {
// Money shot
results, err := FindMotifs(test.dna, test.k, test.d)
if err != nil {
t.Error(err)
}
// Sort before comparing
sort.Strings(test.gold)
sort.Strings(results)
if !EqualStringSlices(results, test.gold) {
msg := fmt.Sprintf("Error testing FindMotifs()\nk = %d, d = %d, len(dna) = %d\ncomputed = %v\ngold = %v",
test.k, test.d, len(test.dna),
results, test.gold)
t.Error(msg)
}
}
}
// Test the FindMotifs function using a large
// test case loaded from a file.
func TestFindMotifsFile(t *testing.T) {
filename := "data/motif_enumeration.txt"
// Read the contents of the input file
// into a single string
lines, err := ReadLines(filename)
if err != nil {
log.Fatalf("ReadLines: %v", err)
}
// Input file contents
// lines[0]: Input
params := strings.Split(lines[1], " ")
k, _ := strconv.Atoi(params[0])
d, _ := strconv.Atoi(params[1])
// lines[-2]: Output
// lines[-1]: gold standard
gold := strings.Split(lines[len(lines)-1], " ")
// This requires some trickery.
// 4 lines in the input file are for
// input/parameters/output/gold standard.
// The rest of the lines are DNA strings.
// Make space for DNA strings
dna := make([]string, len(lines)-4)
iLstart := 2
iLend := len(lines) - 2
// Two counters:
// one for the line index (iL),
// one for the array index (iA).
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 {
dna[iA] = lines[iL]
}
// Money shot
results, err := FindMotifs(dna, k, d)
if err != nil {
t.Error(err)
}
// Sort before comparing
sort.Strings(gold)
sort.Strings(results)
if !EqualStringSlices(results, gold) {
msg := fmt.Sprintf("Error testing FindMotifs()\ncomputed = %v\ngold = %v",
results, gold)
t.Error(msg)
}
}
/////////////////////////////////
// BA2b Test
// Test the MinKmerDistance function.
func TestMatrixMinKmerDistance(t *testing.T) {
var tests = []struct {
pattern string
text string
d int
}{
{"ATA", "AAATTGACGCAT", 1},
{"AAA", "AAAAAAAAAAA", 0},
{"AAA", "CCCCCCCCC", 3},
{"AAA", "GAAGAAGAAGAA", 1},
{"AAAA", "GAAG", 2},
{"AAAA", "GAAGAA", 1},
}
for _, test := range tests {
// Money shot
c, err := MinKmerDistance(test.pattern, test.text)
if err != nil {
t.Error(err)
}
if c != test.d {
msg := fmt.Sprintf("Error testing MinKmerDistance()\npattern = %s, text = %s\ncomputed = %d\ngold = %d",
test.pattern, test.text,
c, test.d)
t.Error(msg)
}
}
}
// Test the MinKmerDistances function.
func TestMatrixMinKmerDistances(t *testing.T) {
var tests = []struct {
pattern string
inputs []string
d int
}{
{
"AAA",
[]string{"AAAA", "CCCC", "GGGG", "TTTT"},
9},
{
"AAA",
[]string{"GAAG", "CAAC", "TAAG", "TAAC"},
4},
}
for _, test := range tests {
// Money shot
c, err := MinKmerDistances(test.pattern, test.inputs)
if err != nil {
t.Error(err)
}
if c != test.d {
msg := fmt.Sprintf("Error testing MinKmerDistance()\npattern = %s, inputs = %v\ncomputed = %d\ngold = %d",
test.pattern, test.inputs,
c, test.d)
t.Error(msg)
}
}
}
// Test MedianString
func TestMedianString(t *testing.T) {
k := 3
dna := []string{
"AAATTGACGCAT",
"GACGACCACGTT",
"CGTCAGCGCCTG",
"GCTGAGCACCGG",
"AGTACGGGACAG",
}
result, _ := MedianString(dna, k)
gold := "GAC"
// Since they only report one kmer, and we report all,
// we should check if their kmer is in our slice.
var passed_test bool
for _, r := range result {
if r == gold {
passed_test = true
break
}
}
if !passed_test {
// Uh oh, their kmer is not in our slice.
msg := fmt.Sprintf("Error testing MostFrequentKmers using test case from file: most frequent kmers in gold not in results.\ncomputed = %q\ngold = %q\n",
result, gold)
t.Error(msg)
}
}
/////////////////////////////////
// BA2c Test
func TestProfileMostProbableKmers(t *testing.T) {
gold := "CCGAG"
dna := "ACCTGTTTATTGCCTAAGTTCCGAACAAACCCAATATAGCCCGAGGGCCT"
k := 5
prof := [][]float32{
[]float32{0.2, 0.2, 0.3, 0.2, 0.3},
[]float32{0.4, 0.3, 0.1, 0.5, 0.1},
[]float32{0.3, 0.3, 0.5, 0.2, 0.4},
[]float32{0.1, 0.2, 0.1, 0.1, 0.2},
}
result, _ := ProfileMostProbableKmers(dna, k, prof)
// Check if gold answer is in our results slice
var passed_test bool
for _, r := range result {
if r == gold {
passed_test = true
break
}
}
if !passed_test {
// The correct kmer was not found in our result
msg := fmt.Sprintf("Error testing ProfileMostProbableKmer(): found incorrect most probable kmer:\n Gold: %s\n Computed: %s\n",
gold, strings.Join(result, " "))
t.Error(msg)
}
}
func TestProfileMostProbableKmers2(t *testing.T) {
gold := "TGTCGC"
dna := "TGCCCGAGCTATCTTATGCGCATCGCATGCGGACCCTTCCCTAGGCTTGTCGCAAGCCATTATCCTGGGCGCTAGTTGCGCGAGTATTGTCAGACCTGATGACGCTGTAAGCTAGCGTGTTCAGCGGCGCGCAATGAGCGGTTTAGATCACAGAATCCTTTGGCGTATTCCTATCCGTTACATCACCTTCCTCACCCCTA"
k := 6
prof := [][]float32{
[]float32{0.364, 0.333, 0.303, 0.212, 0.121, 0.242},
[]float32{0.182, 0.182, 0.212, 0.303, 0.182, 0.303},
[]float32{0.121, 0.303, 0.182, 0.273, 0.333, 0.303},
[]float32{0.333, 0.182, 0.303, 0.212, 0.364, 0.152},
}
result, err := ProfileMostProbableKmers(dna, k, prof)
if err != nil {
t.Error(err)
}
// Check if gold answer is in our results slice
var passed_test bool
for _, r := range result {
if r == gold {
passed_test = true
break
}
}
if !passed_test {
// The correct kmer was not found in our result
msg := fmt.Sprintf("Error testing ProfileMostProbableKmer(): found incorrect most probable kmer:\n Gold: %s\n Computed: %s\n",
gold, strings.Join(result, " "))
t.Error(msg)
}
}
/////////////////////////////////
// BA2D Test
// Test our ScoredMotifMatrix structure
func TestScoredMotifMatrix(t *testing.T) {
s := NewScoredMotifMatrix()
s.AddMotif("AAAAA")
err := s.UpdateScore()
if err != nil {
msg := "Error: UpdateScore() failed with 9 identical kmers"
t.Error(msg)
}
if s.score != 0 {
msg := fmt.Sprintf("Error: computed incorrect score (computed %d, should be %d)",
s.score, 0)
t.Error(msg)
}
s.AddMotif("AAAAA")
s.AddMotif("AAAAA")
s.AddMotif("AAAAA")
s.AddMotif("AAAAA")
s.AddMotif("AAAAA")
s.AddMotif("AAAAA")
s.AddMotif("AAAAA")
s.AddMotif("AAAAA")
err = s.UpdateScore()
if err != nil {
msg := "Error: UpdateScore() failed with 9 identical kmers"
t.Error(msg)
}
if s.score != 0 {
msg := fmt.Sprintf("Error: computed incorrect score (computed %d, should be %d)",
s.score, 0)
t.Error(msg)
}
s.AddMotif("CCCCC")
err = s.UpdateScore()
if err != nil {
msg := "Error: UpdateScore() failed with 9 identical kmers and 1 different kmer"
t.Error(msg)
}
if s.score != 5 {
msg := fmt.Sprintf("Error: computed incorrect score (computed %d, should be %d)",
s.score, 5)
t.Error(msg)
}
s.AddMotif("TAAAA")
err = s.UpdateScore()
if err != nil {
msg := "Error: UpdateScore() failed with 9 identical kmers and 1 different kmer"
t.Error(msg)
}
if s.score != 6 {
msg := fmt.Sprintf("Error: computed incorrect score (computed %d, should be %d)",
s.score, 6)
t.Error(msg)
}
}
// Test the construction of a profile
// from a ScoredMotifMatrix
func TestProfileConstruction(t *testing.T) {
// To create a test case for a motif matrix
// being turned into a profile, we use the
// following calculation from the textbook
// (page 74):
//
// TCGGGGGTTTTT
// CCGGTGACTTAC
// ACGGGGATTTTC
// TTGGGGACTTTT
// AAGGGGACTTCC
// TTGGGGACTTCC
// TCGGGGATTCAT
// TCGGGGATTCCT
// TAGGGGAACTAC
// TCGGGTATAACC
//
// which results in the following profile:
//
// .2 .2 0 0 0 0 .9 .1 .1 .1 .3 0
// .1 .6 0 0 0 0 0 .4 .1 .2 .4 .6
// 0 0 1 1 .9 .9 .1 0 0 0 0 0
// .7 .2 0 0 .1 .1 0 .5 .8 .7 .3 .4
motifs := []string{
"TCGGGGGTTTTT",
"CCGGTGACTTAC",
"ACGGGGATTTTC",
"TTGGGGACTTTT",
"AAGGGGACTTCC",
"TTGGGGACTTCC",
"TCGGGGATTCAT",
"TCGGGGATTCCT",
"TAGGGGAACTAC",
"TCGGGTATAACC",
}
gold := [][]float32{
[]float32{.2, .2, 0, 0, 0, 0, .9, .1, .1, .1, .3, 0},
[]float32{.1, .6, 0, 0, 0, 0, 0, .4, .1, .2, .4, .6},
[]float32{0, 0, 1, 1, .9, .9, .1, 0, 0, 0, 0, 0},
[]float32{.7, .2, 0, 0, .1, .1, 0, .5, .8, .7, .3, .4},
}
smg := NewScoredMotifMatrix()
for _, motif := range motifs {
smg.AddMotif(motif)
}
result, err := smg.MakeProfile(false)
if err != nil {
t.Error(err)
}
var passed_test bool
passed_test = true
if len(gold) == len(result) {
if len(gold[0]) == len(result[0]) {
// Dimensions match,
// so now we compare element-wise.
for i := 0; i < len(gold); i++ {
for j := 0; j < len(gold[0]); j++ {
// Comparing floats,
// so don't use !=
if !TheseFloatsAreEqual(gold[i][j], result[i][j]) {
passed_test = false
break
}
}
}
} else {
passed_test = false
}
} else {
passed_test = false
}
if !passed_test {
msg := fmt.Sprintf("Error testing MakeProfile() (no pseudocounts) for Scored Motif Matrix: found incorrect motifs\n Gold: %v\n Computed: %v\n",
gold, result)
t.Error(msg)
}
}
func TestProfilePseudocountsConstruction(t *testing.T) {
// Test case:
//
// TAAC
// GTCT
// ACTA
// AGGT
//
// Results in the profile:
//
// 0.375 0.250 0.250 0.250
// 0.125 0.250 0.250 0.250
// 0.250 0.250 0.250 0.125
// 0.250 0.250 0.250 0.375
motifs := []string{
"TAAC",
"GTCT",
"ACTA",
"AGGT",
}
gold := [][]float32{
[]float32{0.375, 0.250, 0.250, 0.250},
[]float32{0.125, 0.250, 0.250, 0.250},
[]float32{0.250, 0.250, 0.250, 0.125},
[]float32{0.250, 0.250, 0.250, 0.375},
}
smg := NewScoredMotifMatrix()
for _, motif := range motifs {
smg.AddMotif(motif)
}
result, err := smg.MakeProfile(true)
if err != nil {
t.Error(err)
}
var passed_test bool
passed_test = true
if len(gold) == len(result) {
if len(gold[0]) == len(result[0]) {
// Dimensions match,
// so now we compare element-wise.
for i := 0; i < len(gold); i++ {
for j := 0; j < len(gold[0]); j++ {
// Comparing floats,
// so don't use !=
if !TheseFloatsAreEqual(gold[i][j], result[i][j]) {
passed_test = false
break
}
}
}
} else {
passed_test = false
}
} else {
passed_test = false
}
if !passed_test {
msg := fmt.Sprintf("Error testing MakeProfile() (with pseudocounts) for Scored Motif Matrix: found incorrect motifs\n Gold: %v\n Computed: %v\n",
gold, result)
t.Error(msg)
}
}
// Test a single iteration of the inner loop for the greedy motif algorithm.
// This makes sure that the ProfileMostProbableKmersGreedy() function is
// returning the right kmer. If the probability of all kmers are 0.0, it should
// return the first kmer, which is the case that this test targets.
func TestGreedyMotifFirstInnerIteration(t *testing.T) {
// This motif is the first motif we see in the original DNA string
// of the BA2D example.
motif := "GGC"
// Define kmer motif length
k := len(motif)
// This is the profile-most probable kmer that should be found
gold1 := "AAG"
// These are the motifs that should be in the ScoredMotifMatrix
gold_motifs1 := []string{"GGC", "AAG"}
// This DNA string is the second DNA string, so the first one
// that we extract possible motifs from in the inner iteration
// of the greedy motif finding function.
dna1 := "AAGAATCAGTCA"
// Create a ScoredMotifMatrix to create a profile matrix
s := NewScoredMotifMatrix()
// Add the original motif
s.AddMotif(motif)
// Create a profile matrix
profile, err := s.MakeProfile(false)
if err != nil {
msg := "Error: MakeProfile(false) call failed"
t.Error(msg)
}
// Use the profile and the input DNA string to find the
// most probable kmer, greedy style.
result, err := ProfileMostProbableKmersGreedy(dna1, k, profile)
// Add the most probable kmer to the motifs
s.AddMotif(result)
// First, check that we found the correct
// profile-most probable kmers
if result != gold1 {
msg := fmt.Sprintf("Error: ProfileMostProbableKmers failed:\n Computed profile-most probable kmer: %s\n Gold profile-most probable kmer: %s\n DNA string: %s\n k: %d\n profile: %v\n\n",
result, gold1, dna1, k, profile)
t.Error(msg)
}
// Second, check the ScoredMotifMatrix motifs
var passed_test bool
passed_test = true
if len(s.motifs) == len(gold_motifs1) {
for i := 0; i < len(s.motifs); i++ {
if s.motifs[i] != gold_motifs1[i] {
passed_test = false
break
}
}
} else {
passed_test = false
}
if !passed_test {
msg := fmt.Sprintf("Error testing greedy motif first inner iteration: the ScoredMotifMatrix motifs array was not correct.\n Computed: %s\n Gold: %s",
strings.Join(s.motifs, " "),
strings.Join(gold_motifs1, " "))
t.Error(msg)
}
// One more
dna2 := "CAAGGAGTTCGC"
// This is the profile-most probable kmer that should be found
gold2 := "AAG"
// These are the motifs that should be in the ScoredMotifMatrix
gold_motifs2 := []string{"GGC", "AAG", "AAG"}
// Create a profile matrix
profile, err = s.MakeProfile(false)
if err != nil {
msg := "Error: MakeProfile(false) call failed"
t.Error(msg)
}
// Use the profile and the input DNA string to find the
// most probable kmer, greedy style.
result, err = ProfileMostProbableKmersGreedy(dna2, k, profile)
if err != nil {
msg := "Error: ProfileMostProbableKmersGreedy() call failed"
t.Error(msg)
}
// Add the most probable kmer to the motifs
s.AddMotif(result)
// First, check that we found the correct
// profile-most probable kmers
if result != gold2 {
msg := fmt.Sprintf("Error: ProfileMostProbableKmers failed:\n Computed profile-most probable kmer: %s\n Gold profile-most probable kmer: %s\n DNA string: %s\n k: %d\n profile: %v\n\n",
result, gold2, dna2, k, profile)
t.Error(msg)
}
// Second, check the ScoredMotifMatrix motifs
passed_test = true
if len(s.motifs) == len(gold_motifs2) {
for i := 0; i < len(s.motifs); i++ {
if s.motifs[i] != gold_motifs2[i] {
passed_test = false
break
}
}
} else {
passed_test = false
}
if !passed_test {
msg := fmt.Sprintf("Error testing greedy motif first inner iteration: the ScoredMotifMatrix motifs array was not correct.\n Computed: %s\n Gold: %s",
strings.Join(s.motifs, " "),
strings.Join(gold_motifs2, " "))
t.Error(msg)
}
}
// Test out the greedy motif search with regular counts.
func TestGreedyMotifSearch(t *testing.T) {
gold := []string{"CAG", "CAG", "CAA", "CAA", "CAA"}
k_in := 3
t_in := 5
dna := []string{
"GGCGTTCAGGCA",
"AAGAATCAGTCA",
"CAAGGAGTTCGC",
"CACGTCAATCAC",
"CAATAATATTCG",
}
result, err := GreedyMotifSearchNoPseudocounts(dna, k_in, t_in)
if err != nil {
t.Error(err)
}
// Element-wise comparison of gold and computed result
var passed_test bool
passed_test = true
if len(gold) == len(result) {
for i := 0; i < len(result); i++ {
if result[i] != gold[i] {
passed_test = false
break
}
}
} else {
passed_test = false
}
if !passed_test {
msg := fmt.Sprintf("Error testing GreedyMotifSearch(): found incorrect motifs\n Gold: %s\n Computed: %s\n",
strings.Join(gold, " "),
strings.Join(result, " "))
t.Error(msg)
}
}
// Test out the greedy motif search with pseudocounts
func TestGreedyMotifSearchPseudocounts(t *testing.T) {
gold := []string{"TTC", "ATC", "TTC", "ATC", "TTC"}
k_in := 3
t_in := 5
dna := []string{
"GGCGTTCAGGCA",
"AAGAATCAGTCA",
"CAAGGAGTTCGC",
"CACGTCAATCAC",
"CAATAATATTCG",
}
result, err := GreedyMotifSearchPseudocounts(dna, k_in, t_in)
if err != nil {
t.Error(err)
}
// Element-wise comparison of gold and computed result
var passed_test bool
passed_test = true
if len(gold) == len(result) {
for i := 0; i < len(result); i++ {
if result[i] != gold[i] {
passed_test = false
break
}
}
} else {
passed_test = false
}
if !passed_test {
msg := fmt.Sprintf("Error testing GreedyMotifSearchPseudocounts(): found incorrect motifs\n Gold: %s\n Computed: %s\n",
strings.Join(gold, " "),
strings.Join(result, " "))
t.Error(msg)
}
}
// Test out the random motif search with pseudocounts
func TestRandomMotifSearchPseudocounts(t *testing.T) {
gold := []string{"TCTCGGGG", "CCAAGGTG", "TACAGGCG", "TTCAGGTG", "TCCACGTG"}
k_in := 8
t_in := 5
dna := []string{
"CGCCCCTCTCGGGGGTGTTCAGTAAACGGCCA",
"GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG",
"TAGTACCGAGACCGAAAGAAGTATACAGGCGT",
"TAGATCAAGTTTCAGGTGCACGTCGGTGAACC",
"AATCCACCAGCTCCACGTGCAATGTTGGCCTA",
}
n := 100
motifs, err := ManyRandomMotifSearches(dna, k_in, t_in, n)
if err != nil {
t.Error(err)
}
gold_smm := NewScoredMotifMatrix()
for _, gold_motif := range gold {
gold_smm.AddMotif(gold_motif)
}
gold_smm.UpdateScore()
gold_score := gold_smm.score
lead_smm := NewScoredMotifMatrix()
for _, lead_motif := range motifs {
lead_smm.AddMotif(lead_motif)
}
lead_smm.UpdateScore()
lead_score := lead_smm.score
var passed_test bool
pct_err := math.Abs(float64(gold_score-lead_score) / float64(gold_score))
if pct_err < 0.40 {
passed_test = true
} else {
passed_test = false
}
if !passed_test {
msg := fmt.Sprintf("Error testing RandomMotifSearchPseudocounts(): found incorrect motifs\n Gold: %s\n Computed: %s\n",
strings.Join(gold_smm.motifs, " "),
strings.Join(lead_smm.motifs, " "))
t.Error(msg)
}
}
// Test out the gibbs sampler
func TestGibbsSampler(t *testing.T) {
gold := []string{"TCTCGGGG", "CCAAGGTG", "TACAGGCG", "TTCAGGTG", "TCCACGTG"}
k_in := 8
t_in := 5
n_in := 100
dna := []string{
"CGCCCCTCTCGGGGGTGTTCAGTAAACGGCCA",
"GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG",
"TAGTACCGAGACCGAAAGAAGTATACAGGCGT",
"TAGATCAAGTTTCAGGTGCACGTCGGTGAACC",
"AATCCACCAGCTCCACGTGCAATGTTGGCCTA",
}
n_starts := 20
motifs, err := ManyGibbsSamplers(dna, k_in, t_in, n_in, n_starts)
if err != nil {
t.Error(err)
}
gold_smm := NewScoredMotifMatrix()
for _, gold_motif := range gold {
gold_smm.AddMotif(gold_motif)
}
gold_smm.UpdateScore()
gold_score := gold_smm.score
lead_smm := NewScoredMotifMatrix()
for _, lead_motif := range motifs {
lead_smm.AddMotif(lead_motif)
}
lead_smm.UpdateScore()
lead_score := lead_smm.score
var passed_test bool
pct_err := math.Abs(float64(gold_score-lead_score) / float64(gold_score))
if pct_err < 0.40 {
passed_test = true
} else {
passed_test = false
}
if !passed_test {
msg := fmt.Sprintf("Error testing GibbsSampler(): found incorrect motifs\n Gold: %s\n Computed: %s\n",
strings.Join(gold_smm.motifs, " "),
strings.Join(lead_smm.motifs, " "))
t.Error(msg)
}
}

151
rosalind/rosalind_ba3.go

@ -0,0 +1,151 @@ @@ -0,0 +1,151 @@
package rosalind
import (
"errors"
"fmt"
"strings"
)
////////////////////////////////
// BA3a
// Given an input DNA string, generate a set of all
// k-mers of length k in the input string.
func KmerComposition(input string, k int) ([]string, error) {
// Get a histogram of all kmers in this string
hist, err := KmerHistogram(input, k)
if err != nil {
msg := fmt.Sprintf("Error: Function KmerHistogram(%s,%d) returned an error\n",
input, k)
return nil, errors.New(msg)
}
// Populate the string slice of kmers
result := make([]string, len(hist))
i := 0
for k, _ := range hist {
result[i] = k
i++
}
// Return the string slice
return result, nil
}
////////////////////////////////
// BA3b
// Given a genome path, i.e., a set of k-mers that
// overlap by some unknown number (up to k-1) of
// characters each, assemble the paths into a
// single string containing the genome.
func ReconstructGenomeFromPath(contigs []string) (string, error) {
pieces := []string{}
for i := 0; i < len(contigs)-1; i++ {
pattern1 := contigs[i]
pattern2 := contigs[i+1]
// Stride left-hand string and find where
// it lines up to right-hand string
overlap_index1 := -1
overlap_index2 := -1
for i := 0; i < len(pattern1); i++ {
// Left-hand string: backwards-sliding window
start1 := i // sliding
end1 := len(pattern1) // fixed
slice1 := pattern1[start1:end1]
// Right-hand string: fixed shrinking window
start2 := 0 // fixed
end2 := (end1 - start2) - i // sliding
slice2 := pattern2[start2:end2]
if slice1 == slice2 {
// Many Bothans died to discover this algorithm.
overlap_index1 = start1
overlap_index2 = end2
break
}
}
if overlap_index1 < 0 {
msg := fmt.Sprintf("Error: ReconstructGenomeFromPath(): No overlap detected between %s and %s\n",
pattern1, pattern2)
return "", errors.New(msg)
}
// Add on the prefix of the left-hand piece - that's the part
// that doesn't overlap with the next right-hand piece.
pieces = append(pieces, pattern1[:overlap_index1])
// Once we're on the last pair of pieces,
// include the suffix of the left-hand piece -
// the part that overlaps with the next right-hand piece -
// then add the suffix of the right-hand piece
// (the part that doesn't overlap with the previous
// left-hand piece).
if i == len(contigs)-2 {
pieces = append(pieces, pattern1[overlap_index1:]+pattern2[overlap_index2:])
}
}
return strings.Join(pieces, ""), nil
}
////////////////////////////////
// BA3c
// Given a set of k-mers, construct an overlap graph
// where each k-mer is represented by a node, and each
// directed edge represents a pair of k-mers such that
// the suffix (k-1 chars) of the k-mer at the source of
// the edge overlaps with the prefix (k-1 chars) of the
// k-mer at the head of the edge.
func OverlapGraph(patterns []string) (DirGraph, error) {
var g DirGraph
// Add every k-mer as a node to the overlap graph
k := len(patterns[0])
for _, pattern := range patterns {
n := Node{pattern}
g.AddNode(&n)
// Verify k-mers are all same length
if len(pattern) != k {
msg := fmt.Sprintf("Error: kmer lengths do not match, k = %d but len(\"%s\") = %d\n",
k, pattern, len(pattern))
return g, errors.New(msg)
}
}
// Iterate pairwise through the input patterns
// to determine which pairs should have edges
// and in which direction
for i, pattern1 := range patterns {
for j, pattern2 := range patterns {
if j > i {
prefix1 := pattern1[:k-1]
suffix1 := pattern1[1:]
prefix2 := pattern2[:k-1]
suffix2 := pattern2[1:]
if suffix1 == prefix2 {
// 1 -> 2
n1 := g.GetNode(pattern1)
n2 := g.GetNode(pattern2)
g.AddEdge(n1, n2)
} else if suffix2 == prefix1 {
// 2 -> 1
n2 := g.GetNode(pattern2)
n1 := g.GetNode(pattern1)
g.AddEdge(n2, n1)
}
}
}
}
return g, nil
}

270
rosalind/rosalind_ba3_test.go

@ -0,0 +1,270 @@ @@ -0,0 +1,270 @@
package rosalind
import (
"fmt"
"log"
"sort"
"strconv"
"strings"
"testing"
)
/////////////////////////////////
// BA3a Test
func TestKmerComposition(t *testing.T) {
k := 5
input := "CAATCCAAC"
gold := []string{"AATCC", "ATCCA", "CAATC", "CCAAC", "TCCAA"}
results, err := KmerComposition(input, k)
if err != nil {
t.Error(fmt.Sprintf("Error: %v", err))
}
// Sort before comparing
sort.Strings(gold)
sort.Strings(results)
if !EqualStringSlices(results, gold) {
msg := fmt.Sprintf("Error testing KmerComposition()\ncomputed = %v\ngold = %v",
results, gold)
t.Error(msg)
}
}
func TestKmerCompositionFile(t *testing.T) {
filename := "data/string_composition.txt"
// Read the contents of the input file
// into a single string
lines, err := ReadLines(filename)
if err != nil {
log.Fatalf("ReadLines: %v", err)
}
// Input file contents
// lines[0]: Input
k_str := lines[1]
k, _ := strconv.Atoi(k_str)
input := lines[2]
// lines[3]: Output
// lines[4+]: gold standard answers
// Make space for DNA strings
iLstart := 4
iLend := len(lines)
gold := make([]string, len(lines)-iLstart)
// Two counters:
// one for the line index (iL),
// one for the array index (iA).
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 {
gold[iA] = lines[iL]
}
results, err := KmerComposition(input, k)
if err != nil {
t.Error(fmt.Sprintf("Error: %v", err))
}
// Check that lengths are equal
if len(results) != len(gold) {
msg := "Error testing KmerComposition(): length of computed kmer composition does not match gold standard:"
msg += fmt.Sprintf("len(computed) = %d, len(gold) = %d\n", len(results), len(gold))
t.Error(msg)
}
// Sort before comparing
sort.Strings(gold)
sort.Strings(results)
if !EqualStringSlices(results, gold) {
msg := fmt.Sprintf("Error testing KmerComposition() from file %s:\ncomputed = %d\ngold = %d",
filename,
len(results), len(gold))
t.Error(msg)
}
}
/////////////////////////////////
// BA3b Test
func TestReconstructGenome(t *testing.T) {
contigs := []string{"ACCGA", "CCGAA", "CGAAG", "GAAGC", "AAGCT"}
gold := "ACCGAAGCT"
results, err := ReconstructGenomeFromPath(contigs)
if err != nil {
t.Error(err)
}
if results != gold {
msg := fmt.Sprintf("Error testing ReconstructGenomeFromPath():\ninputs = %s\ncomputed = %s\ngold = %s",
strings.Join(contigs, " "), results, gold)
t.Error(msg)
}
}
func TestReconstructGenomeFile(t *testing.T) {
filename := "data/genome_path_string.txt"
// Read the contents of the input file
// into a single string
lines, err := ReadLines(filename)
if err != nil {
log.Fatalf("ReadLines: %v", err)
}
// Input file contents
// lines[0]: Input
// Make space for DNA fragments
contigs := make([]string, len(lines)-3)
iLstart := 1
iLend := len(lines) - 2
// Two counters:
// one for the line index (iL),
// one for the array index (iA).
for iA, iL := 0, iLstart; iL < iLend; iA, iL = iA+1, iL+1 {
contigs[iA] = lines[iL]
}
// lines[-2]: Output
gold := lines[len(lines)-1]
gold = strings.Trim(gold, " ")
results, err := ReconstructGenomeFromPath(contigs)
if err != nil {
msg := "Error: ReconstructGenomeFromPath(): function returned an error"
t.Error(msg)
}
if len(results) != len(gold) {
msg := "Error testing ReconstructGenomeFromPath(): length of reconstructed genome does not match length of correct result\n"
msg += fmt.Sprintf("len(computed) = %d, len(gold) = %d\n", len(results), len(gold))
t.Error(msg)
} else if results != gold {
msg := "Error testing ReconstructGenomeFromPath(): computed genome and correct genome do not match\n"
for i := 0; i < len(results); i++ {
if results[i] != gold[i] {
msg += fmt.Sprintf("Difference at index i = %d: computed[%d] = %s, gold[%d] = %s\n", i, i, string(results[i]), i, string(gold[i]))
}
}
t.Error(msg)
}
}
/////////////////////////////////
// BA3c Test
func TestOverlapGraph(t *testing.T) {
patterns := []string{"ATGCG", "GCATG", "CATGC", "AGGCA", "GGCAT"}
g, err := OverlapGraph(patterns)
if err != nil {
t.Error(err)
}
s := g.String()
gold := "AGGCA -> GGCAT\nCATGC -> ATGCG\nGCATG -> CATGC\nGGCAT -> GCATG"
if s != gold {
msg := "Error testing OverlapGraph(): string representation of graphs don't match"
t.Error(msg)
}
}
/*
func TestOverlapGraphFile(t *testing.T) {
filename := "data/overlap_graph.txt"
// Read the contents of the input file
// into a single string
lines, err := ReadLines(filename)
if err != nil {
log.Fatalf("ReadLines: %v", err)
}
// Input file contents
// lines[0]: Input
// We have an unknown number of fragments
// and an unknown number of edges,
// but they are split by a line with
// "Output:"
contigs := []string{}
gold_edges := []string{}
var stop bool
// Loop over the first section of the file,
// containing overlapping kmers
stop = false
iL := 1
for stop == false {
// Abort if we prematurely reach the
// end of the file
if iL >= len(lines) {
msg := "Error: could not properly parse file, no line with 'Output:' found."
t.Error(msg)
}
// Get the line
line := lines[iL]
// Break if we reached "Output:"
if "Output:" == strings.Trim(line, " ") {
// step over this line
iL++
break
}
// Add line to list of contigs
contigs = append(contigs, strings.Trim(line, " "))
iL++
}
// Loop over the second section of the file,
// containing overlapping kmer edges
stop = false
for stop == false {
// Break if we reach the end of the file
if iL == len(lines) {
break
}
// Get the line
line := lines[iL]
// Add line to list of edges
gold_edges = append(gold_edges, strings.Trim(line, " "))
iL++
}
// Construct the graph
g, err := OverlapGraph(contigs)
if err != nil {
t.Error(err)
}
// Get the edge list representation of the graph
computed_edges := strings.Split(g.String(), "\n")
if !EqualStringSlices(computed_edges, gold_edges) {
msg := fmt.Sprintf("Error testing OverlapGraph() with file %s: edge lists do not match\n", filename)
msg += fmt.Sprintf("len(gold_edges) = %d\nlen(computed_edges) = %d\n", len(gold_edges), len(computed_edges))
t.Error(msg)
}
}
*/

88
rosalind/rosalind_datastructures.go

@ -0,0 +1,88 @@ @@ -0,0 +1,88 @@
package rosalind
import (
"fmt"
"sort"
"strings"
"sync"
)
// Directed graph type
type DirGraph struct {
nodes []*Node
edges map[Node][]*Node
lock sync.RWMutex
}
// Graph node
type Node struct {
name string
}
// Convert a node to a string
func (n *Node) String() string {
return fmt.Sprintf("%s", n.name)
}
// Add a node to the directed graph
func (g *DirGraph) AddNode(n *Node) {
g.lock.Lock()
g.nodes = append(g.nodes, n)
g.lock.Unlock()
}
// Add a directed edge
func (g *DirGraph) AddEdge(n1, n2 *Node) {
g.lock.Lock()
if g.edges == nil {
g.edges = make(map[Node][]*Node)
}
g.edges[*n1] = append(g.edges[*n1], n2)
g.lock.Unlock()
}
// Get a total count of edges in the graph
func (g *DirGraph) EdgeCount() int {
iC := 0
for _, targets := range g.edges {
for i := 0; i < len(targets); i++ {
iC++
}
}
return iC
}
// Get a node, given a label
func (g *DirGraph) GetNode(label string) *Node {
for _, n := range g.nodes {
if n.name == label {
return n
}
}
return nil
}
// Return a sorted edge list representation of the graph
func (g *DirGraph) String() string {
g.lock.RLock()
// Keep it simple:
// iterate through set of edges in random order,
// and sort a bunch of strings at the end.
edge_strings := make([]string, g.EdgeCount())
iS := 0
iE := 0
for edge_src, edge_targets := range g.edges {
for _, edge_target := range edge_targets {
edge_string := edge_src.name + " -> " + edge_target.name
edge_strings[iE] = edge_string
iE += 1
}
iS += 1
}
sort.Strings(edge_strings)
result := strings.Join(edge_strings, "\n")
g.lock.RUnlock()
return result
}

53
rosalind/rosalind_datastructures_test.go

@ -0,0 +1,53 @@ @@ -0,0 +1,53 @@
package rosalind
import (
"fmt"
"testing"
)
func fillGraph() DirGraph {
var g DirGraph
n1a := Node{"AGGCA"}
n1b := Node{"GGCAT"}
g.AddNode(&n1a)
g.AddNode(&n1b)
g.AddEdge(&n1a, &n1b)
n2a := Node{"CATGC"}
n2b := Node{"ATGCG"}
g.AddNode(&n2a)
g.AddNode(&n2b)
g.AddEdge(&n2a, &n2b)
n3a := Node{"GCATG"}
n3b := Node{"CATGC"}
g.AddNode(&n3a)
g.AddNode(&n3b)
g.AddEdge(&n3a, &n3b)
n4a := Node{"GGCAT"}
n4b := Node{"GCATG"}
g.AddNode(&n4a)
g.AddNode(&n4b)
g.AddEdge(&n4a, &n4b)
return g
}
func TestDatastructureDirGraph(t *testing.T) {
g := fillGraph()
s := g.String()
gold := "AGGCA -> GGCAT\nCATGC -> ATGCG\nGCATG -> CATGC\nGGCAT -> GCATG"
if s != gold {
msg := "Error: DirGraph data structure did not print properly\n"
msg += fmt.Sprintf("computed:\n%v\n\n", s)
msg += fmt.Sprintf("gold:\n%v\n\n", gold)
t.Error(msg)
}
}

64
rosalind/rosalind_stronghold.go

@ -0,0 +1,64 @@ @@ -0,0 +1,64 @@
package rosalind
import (
"errors"
"fmt"
)
// Count the number of each type of nucleotide ACGT.
func CountNucleotides(dna string) (map[string]int, error) {
if !CheckIsDNA(dna) {
msg := fmt.Sprintf("Error: input string was not DNA: %s", dna)
return nil, errors.New(msg)
}
// Map to store counts for each nucleotide
result := make(map[string]int)
// Get bitmask representations
bms, err := DNA2Bitmasks(dna)
if err != nil {
msg := fmt.Sprintf("Error: DNA2Bitmasks() threw an error for input %s",
dna)
return nil, errors.New(msg)
}
// Iterate over every possible nucleotide
bases := []string{"A", "C", "G", "T"}
for _, base := range bases {
// Bitmap for this nucleotide
bm := bms[base]
// Frequency for this nucleotide
sum := 0
for j := 0; j < len(bm); j++ {
if bm[j] {
sum++
}
}
// Store the result
result[base] = sum
}
return result, nil
}
// Count the number of each type of nucleotide ACGT
// and return as an array in order A, C, G, T.
func CountNucleotidesArray(dna string) ([]int, error) {
result := make([]int, 4)
mresult, err := CountNucleotides(dna)
if err != nil {
msg := fmt.Sprintf("Error: CountNucleotides() returned an error: %v", err)
return nil, errors.New(msg)
}
result[0] = mresult["A"]
result[1] = mresult["C"]
result[2] = mresult["G"]
result[3] = mresult["T"]
return result, nil
}

21
rosalind/rosalind_stronghold_test.go

@ -0,0 +1,21 @@ @@ -0,0 +1,21 @@
package rosalind
import (
"fmt"
"testing"
)
func TestCountNucleotides(t *testing.T) {
input := "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
results, err := CountNucleotidesArray(input)
if err != nil {
t.Error(err)
}
gold := []int{20, 12, 17, 21}
if !EqualIntSlices(results, gold) {
err := fmt.Sprintf("Error testing CountNucleotides(): input = %s\ncomputed = %v\ngold = %v\n",
input, results, gold)
t.Error(err)
}
}

195
rosalind/utils.go

@ -0,0 +1,195 @@ @@ -0,0 +1,195 @@
package rosalind
import (
"bufio"
"errors"
"fmt"
"math"
"os"
"strconv"
"strings"
)
// ReadLines reads a whole file into memory
// and returns a slice of its lines.
func ReadLines(path string) ([]string, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
var lines []string
scanner := bufio.NewScanner(file)
buf := make([]byte, 2)
// This is awkward.
// Scanners aren't good for big files,
// just simple stuff.
BIGNUMBER := 90000
scanner.Buffer(buf, BIGNUMBER)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
return lines, scanner.Err()
}
// WriteLines writes the lines to the given file.
func WriteLines(lines []string, path string) error {
file, err := os.Create(path)
if err != nil {
return err
}
defer file.Close()
w := bufio.NewWriter(file)
for _, line := range lines {
fmt.Fprintln(w, line)
}
return w.Flush()
}
// ReadMatrix takes a set of lines containing a
// multidimensional array of floating point values,
// k elements per line, n lines, and returns a
// slice of slices with size slice[k][n]
// and with type float32.
func ReadMatrix32(lines []string, k int) ([][]float32, error) {
separator := " " // change to , or whatever separator
nLines := len(lines)
// Return a multidimensional slice of floats.
// To make multidimensional slice,
// make a slice, then loop and make more slices
result := make([][]float32, nLines)
for i := 0; i < nLines; i++ {
// Make space for this row of values
result[i] = make([]float32, k)
// Split row string into tokens
tokens := strings.Split(lines[i], separator)
if len(tokens) != k {
msg := fmt.Sprintf("Error: length of line %d was %d, should be %d", i+1, len(tokens), k)
return nil, errors.New(msg)
}
for j, token := range tokens {
// Convert each token to a float64,
// then to a float32.
// https://golang.org/pkg/strconv/#ParseFloat
f, err := strconv.ParseFloat(token, 32)
if err != nil {
return nil, err
}
result[i][j] = float32(f)
}
}
return result, nil
}
// Utility function: check if two string arrays/array slices
// are equal. This is necessary because of squirrely
// behavior when comparing arrays (of type [1]string)
// and slices (of type []string).
func EqualStringSlices(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := 0; i < len(a); i++ {
if a[i] != b[i] {
return false
}
}
return true
}
// Utility function: check if two boolean arrays/array slices
// are equal. This is necessary because of squirrely
// behavior when comparing arrays (of type [1]bool)
// and slices (of type []bool).
func EqualBoolSlices(a, b []bool) bool {
if len(a) != len(b) {
return false
}
for i := 0; i < len(a); i++ {
if a[i] != b[i] {
return false
}
}
return true
}
// Check if two int arrays/array slices are equal.
func EqualIntSlices(a, b []int) bool {
if len(a) != len(b) {
return false
}
for i := 0; i < len(a); i++ {
if a[i] != b[i] {
return false
}
}
return true
}
// Check if two floats are equal, to within
// some small tolerance.
func TheseFloatsAreEqual(a, b float32) bool {
if math.Abs(float64(b-a)) < 1.0e-6 {
return true
} else {
return false
}
}
// Compute the factorial of an integer.
func Factorial(n int) int {
if n < 2 {
// base case
return 1
} else {
// recursive case
return n * Factorial(n-1)
}
}
// Returns value of Binomial Coefficient Binom(n, k).
func Binomial(n, k int) int {
result := 1
// Since C(n, k) = C(n, n-k)
if k > (n - k) {
k = n - k
}
// Calculate value of:
// ( n * (n-1) * ... * (n-k+1) )
// -----------------------------
// ( k * (k-1) * ... * 1 )
for i := 0; i < k; i++ {
result *= n - i
result /= i + 1
}
return result
}
// Do this because otherwise we use
// math.Max() which requires us to do
// int -> float -> int
func minint(a, b int) int {
if a < b {
return a
}
return b
}
func maxint(a, b int) int {
if a > b {
return a
}
return b
}

39
rosalind/utils_test.go

@ -0,0 +1,39 @@ @@ -0,0 +1,39 @@
package rosalind
import "testing"
func TestEqualStringSlices(t *testing.T) {
a := []string{"peanut", "butter", "jelly", "time"}
b := make([]string, 4)
b[0] = "peanut"
b[1] = "butter"
b[2] = "jelly"
b[3] = "time"
if !EqualStringSlices(a, b) {
msg := "Error: EqualStringSlices() is broken!"
t.Fatal(msg)
}
}
func TestEqualBoolSlices(t *testing.T) {
a := []bool{true, true, true, false, false, false, true, true, true}
b := make([]bool, 9)
b[0], b[1], b[2] = true, true, true
b[3], b[4], b[5] = false, false, false
b[6], b[7], b[8] = true, true, true
if !EqualBoolSlices(a, b) {
msg := "Error: EqualBoolSlices() is broken!"
t.Fatal(msg)
}
}
func TestEqualIntSlices(t *testing.T) {
a := []int{3, 1, 4, 1, 5, 9}
b := make([]int, 6)
b[0], b[1], b[2] = 3, 1, 4
b[3], b[4], b[5] = 1, 5, 9
if !EqualIntSlices(a, b) {
msg := "Error: EqualIntSlices() is broken!"
t.Fatal(msg)
}
}

1
scripts/.gitignore vendored

@ -0,0 +1 @@ @@ -0,0 +1 @@
godoc

38
scripts/gendoc.sh

@ -0,0 +1,38 @@ @@ -0,0 +1,38 @@
#!/bin/bash
set -u
set -x
DOC_DIR=godoc
PKG=github.com/charlesreid1/go-rosalind
# Run a godoc server which we will scrape. Clobber the GOPATH to include
# only our dependencies.
GOPATH=$(pwd):$(pwd)/vendor godoc -http=localhost:6060 &
DOC_PID=$!
# Wait for the server to init
while :
do
# version 1:
curl -s "http://localhost:6060/pkg/$PKG" > /dev/null
if [ $? -eq 0 ] # exit code is 0 if we connected
then
break
fi
done
# Scrape the pkg directory for the API docs. Scrap lib for the CSS/JS. Ignore everything else.
# The output is dumped to the directory "localhost:6060".
wget -r -m -k -E -p -erobots=off --include-directories="/pkg,/lib" --exclude-directories="*" "http://localhost:6060/pkg/$PKG/"
# Stop the godoc server
kill -9 $DOC_PID
# Delete the old directory or else mv will put the localhost dir into
# the DOC_DIR if it already exists.
rm -rf $DOC_DIR
mv localhost\:6060 $DOC_DIR
echo "Docs can be found in $DOC_DIR"
echo "Replace /lib and /pkg in the gh-pages branch to update gh-pages"

50
stronghold/dna.go

@ -0,0 +1,50 @@ @@ -0,0 +1,50 @@
package rosalindstronghold
import (
"fmt"
"log"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Print problem description for Rosalind.info
// Problem DNA: Counting DNA Nucleotides
func DNADescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem DNA:",
"Counting DNA Nucleotides",
"",
"Given a DNA string, return a count of each base pair as an array, in the order A, C, G, T",
"",
"URL: http://rosalind.info/problems/dna/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Run the problem
func DNA(filename string) {
DNADescription()
// Read the contents of the input file
// into a single string
lines, err := rosa.ReadLines(filename)
if err != nil {
log.Fatalf("readLines: %v", err)
}
// Input file contents
input := lines[0]
result, _ := rosa.CountNucleotidesArray(input)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n", filename)
for _, r := range result {
fmt.Printf("%d ", r)
}
fmt.Printf("\n\n")
}

1
stronghold/for_real/rosalind_dna.txt

@ -0,0 +1 @@ @@ -0,0 +1 @@
AGCAGGTAGAAGGCGTGTTAGAGGCATCCCTCATGCCTAAATCTATGTAAGAGTCGTGCTGGTCCCCCGTGTGCACCATGTGTGCACACTAAGAGGTCCTACACAACTTAGGATGGACAGATCAGCGTAGAAAGGGGGGGTTACTCGATGGCTTAATCCGAGCTCTACATGTGCGTTACTCCCCCGACTGGTCGCTGCGAACCGGACACACGACAGGTGCGCCGGTACGGACGGGTTAACTTTGTTTATTCACAGCAACGCGCGACAGTCCTACCTATCTTTTCAAGGAAGTGACAGGGACTAGTTCGGCAGGACACAAACCAGAAAACCTGATCAGACGACATGTCACAGGCGATAAGATGGTGTCCGTGGCGATATCCATTAATACCAAAGTATCAGCGGCCGACGCATCTAGCGCTAAGCCGATCGAGCGCAACCATTACCAATATACTGCGGCGGGCTGGTATGTAGTGCAGACATTGTGGGTTCCACGGCTCATGTATCTCGAAACCCTTACTGTAAAATGTAGGTAATTGAGCGCAGATCCATACACGCGAGTGTCTCCGTCACAAATACTGATGGCTCTATTGGCGTCGGATATTATTAGTTTGTATATCCCGCCTTACCTATTCTACCTAAGGCGAGCATGATTGTGCTCGGCCCTGGAGTTAAGTCTGTGAGTTCCCGTGGATGACAACTGGACAGCGTCACGTCATTGTTGAACCGTCTATTCTGCTTTACGTCATGCAGGTTGACGGGGCCAGAGCTTCTCTGCCGCACCAGCTCTGTTCTATATGATTATTTTTTATGGCTAAGACCGATCTACCATTATTGTTGTATGCGACGCCAGTTCGCACATGCCGGGCCTATACTTCGCATAACTCGAGAGGGCATGACTTGCGAGCGGCGGACTCG

34
stronghold/populate_templates.py

@ -0,0 +1,34 @@ @@ -0,0 +1,34 @@
import jinja2
import os
def main():
# Jinja env
env = jinja2.Environment(loader=jinja2.FileSystemLoader('.'))
problems = [
{
'id': 'DNA',
'title': 'Counting DNA Nucleotides',
'description': 'Given a DNA string, return a count of each base pair as an array, in the order A, C, G, T',
'url': 'http://rosalind.info/problems/dna/'
},
]
print("Writing problem boilerplate code")
t = 'template.go.j2'
for problem in problems:
contents = env.get_template(t).render(**problem)
fname = problem['id'].lower()+'.go'
if not os.path.exists(fname):
print("Writing to file %s..."%(fname))
with open(fname,'w') as f:
f.write(contents)
else:
print("File %s already exists, skipping..."%(fname))
print("Done")
if __name__=="__main__":
main()

7
stronghold/stronghold_test.go

@ -0,0 +1,7 @@ @@ -0,0 +1,7 @@
package rosalindstronghold
import "testing"
func TestDNA(t *testing.T) {
DNA("for_real/rosalind_dna.txt")
}

49
stronghold/template.go.j2

@ -0,0 +1,49 @@ @@ -0,0 +1,49 @@
package rosalindstronghold
import (
"fmt"
"log"
rosa "github.com/charlesreid1/go-rosalind/rosalind"
)
// Print problem description for Rosalind.info
// Problem {{id}}: {{title}}
func {{id}}Description() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem {{id}}:",
"{{title}}",
"",
"{{description}}",
"",
"URL: {{url}}",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Run the problem
func {{id}}(filename string) {
{{id}}Description()
// Read the contents of the input file
// into a single string
lines, err := readLines(filename)
if err != nil {
log.Fatalf("readLines: %v", err)
}
//// Input file contents
//input := lines[0]
//params := lines[1]
//result := rosa.PatternCount(input, pattern)
//
//fmt.Println("")
//fmt.Printf("Computed result from input file: %s\n", filename)
//fmt.Println(result)
}
Loading…
Cancel
Save