Compare commits

...

36 Commits

Author SHA1 Message Date
Charles Reid 887cba33a1 ba1h utility function comment 5 years ago
Charles Reid 4f99bcd31c add BA1H solution and fixes for everybody 5 years ago
Charles Reid 4db10b5283 update all drivers to properly work. 5 years ago
Charles Reid e3da875eb1 add actual rosalind.info inputs (i.e., inputs with no outputs) 5 years ago
Charles Reid c61794fe70 add int slice comparison function 5 years ago
Charles Reid 3e37bd3ab1 add tests from external file for BA1D 5 years ago
Charles Reid b067fd9ff7 fix print statement 5 years ago
Charles Reid a75d2664a7 update import statements and fix compilation errors 5 years ago
Charles Reid 0ba69ecec5 update top level readme with organization info 5 years ago
Charles Reid 15dddfee86 update filename 5 years ago
Charles Reid 1361c6d3c4 add file i/o functions 5 years ago
Charles Reid 36372ba20f add chapter 1 readme 5 years ago
Charles Reid ade1dfa0f7 move chapter 1 problems to folder 5 years ago
Charles Reid 5b8ad463eb add BA1G solution 5 years ago
Charles Reid 7d4bac370f update tests, BA1F working 5 years ago
Charles Reid 2b806f3850 add BA1F solution and functionsa 5 years ago
Charles Reid 39ab82e555 add matrix of tests for clump finding - problem BA1E 5 years ago
Charles Reid 94414c8335 add BA1E. restructure everyone to use rosalind.go community function file 5 years ago
Charles Reid dee0776073 fill in remaining tests 5 years ago
Charles Reid 1a823f377f add data directory with text files containing sample data 5 years ago
Charles Reid 589899e0b6 add BA1B test from file 5 years ago
Charles Reid cf0ebb10b8 add a few more tests 5 years ago
Charles Reid 49f32fe400 add BA1D test problems 5 years ago
Charles Reid c4ee7b4cf4 run a simple test in the BA1C() method 5 years ago
Charles Reid 92cd45b7ab add code to BA1A test to load test case from file 5 years ago
Charles Reid 3df02b514a complete DNA-bitmask conversion functions 5 years ago
Charles Reid 1e16acbb17 add BA1C (in progress) and initial test of DNA2Bitmask method 5 years ago
Charles Reid f651265cc1 update update function names and docstrings in BA1B 5 years ago
Charles Reid 1f57e09dc3 add Go gitignore 5 years ago
Charles Reid 17914b28c9 add readme 5 years ago
Charles Reid 50486ece91 add todo doc 5 years ago
Charles Reid c4ff3da78e add main method 5 years ago
Charles Reid 81c302f612 add problem ba1b 5 years ago
Charles Reid 62c11a90f9 add problem ba1b test 5 years ago
Charles Reid 1cd161a1af add a test for problem BA1A 5 years ago
Charles Reid cb2286de7c restructure ba1a.go problem - include BA1A() and BA1ADescription() 5 years ago
  1. 13
      .gitignore
  2. 41
      Readme.md
  3. 38
      ba1a.go
  4. 73
      chapter01/Readme.md
  5. 54
      chapter01/ba1a.go
  6. 99
      chapter01/ba1a_test.go
  7. 58
      chapter01/ba1b.go
  8. 82
      chapter01/ba1b_test.go
  9. 50
      chapter01/ba1c.go
  10. 123
      chapter01/ba1c_test.go
  11. 61
      chapter01/ba1d.go
  12. 97
      chapter01/ba1d_test.go
  13. 58
      chapter01/ba1e.go
  14. 42
      chapter01/ba1e_test.go
  15. 60
      chapter01/ba1f.go
  16. 53
      chapter01/ba1f_test.go
  17. 52
      chapter01/ba1g.go
  18. 49
      chapter01/ba1g_test.go
  19. 65
      chapter01/ba1h.go
  20. 56
      chapter01/ba1h_test.go
  21. 5
      chapter01/data/clump_finding.txt
  22. 5
      chapter01/data/frequent_words.txt
  23. 5
      chapter01/data/hamming_distance.txt
  24. 4
      chapter01/data/minimum_skew.txt
  25. 5
      chapter01/data/pattern_count.txt
  26. 5
      chapter01/data/pattern_matching.txt
  27. 4
      chapter01/data/reverse_complement.txt
  28. 2
      chapter01/for_real/rosalind_ba1a.txt
  29. 2
      chapter01/for_real/rosalind_ba1b.txt
  30. 1
      chapter01/for_real/rosalind_ba1c.txt
  31. 2
      chapter01/for_real/rosalind_ba1d.txt
  32. 2
      chapter01/for_real/rosalind_ba1e.txt
  33. 1
      chapter01/for_real/rosalind_ba1f.txt
  34. 2
      chapter01/for_real/rosalind_ba1g.txt
  35. 3
      chapter01/for_real/rosalind_ba1h.txt
  36. 15
      chapter01/main.go
  37. 545
      chapter01/rosalind.go
  38. 21
      chapter01/todo.md
  39. 95
      chapter01/utils.go

13
.gitignore vendored

@ -0,0 +1,13 @@ @@ -0,0 +1,13 @@
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
# Test binary, build with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out

41
Readme.md

@ -0,0 +1,41 @@ @@ -0,0 +1,41 @@
# Go-Rosalind
Solving problems from Rosalind.info using Go
## Organization
Each chapter has its own directory.
Within the chapter directory, each problem has
its own driver program, which prints info about
the problem, loads the input file from Rosalind,
and prints the solution. Each problem also has
its own test suite using the examples provided
on Rosalind.info.
For example, the function that loads the
input file for problem BA1A is in `ba1a.go`
and the code to test the functionality
of the solution to BA1A is in `ba1a_test.go`.
## Quick Start
To run all the tests in a chapter directory:
```
go test -v
```
To run only a particular problem:
1. Edit `main.go` to call the right method
for the right problem with the right input
file name.
2. Run `main.go` using `go run`, and point Go
to all the relevant Go files:
```
go run main.go utils.go rosalind.go <name-of-BA-file>
```

38
ba1a.go

@ -1,38 +0,0 @@ @@ -1,38 +0,0 @@
package main
import "fmt"
// Rosalind: Problem BA1A
//
// To run:
//
// $ go run ba1a.go
func pattern_count(input string, pattern string) int {
// Number of substring overlaps
var overlap = len(input) - len(pattern) + 1
// Count of occurrences
count:=0
// Loop over each substring overlap
for i:=0; i<overlap; i++ {
// Grab a slice of the full input
start:=i
end:=i+len(pattern)
var slice = input[start:end]
if slice==pattern {
count += 1
}
}
return count
}
func main() {
// Call the pattern_count function
fmt.Println("Number of occurrences of GCG in GCGCG:")
res := pattern_count("GCGCG","GCG")
fmt.Println(res)
}

73
chapter01/Readme.md

@ -0,0 +1,73 @@ @@ -0,0 +1,73 @@
# Chapter 1
In this chapter we perform basic operations with
strings and data structures.
## How to run
* Each problem has its own function
* To run the code for a particular problem,
call the function for that problem in `main.go`
* Edit `main.go` to call the right function,
and pass in the name of the input file you
want to use: for example, `BA1A("input.txt")`
* The function you call is implemented in the
corresponding Go file (for example, `ba1a.go`).
It loads the inputs from the input file,
calls the right function with the inputs,
and prints the results.
* The functions that load data from input files
are tested along with the functions themselves,
since each problem has a sample input file
in `data/`
## Directory Layout
* Each problem has one Go file and one test
* The `data/` directory contains input files
for the tests (i.e., files that contain both
inputs and corresponding outputs)
* The `for_real/` directory contains sample
input files from Rosalind.info for each
problem (i.e., files that contain only the
inputs)
* The `main.go` file contains the `main()`
driver function and is the entrypoint for
`go run`
* The `rosalind.go` file contains most of the
computational functionality implemented
for the problems.
* The `utils.go` file contains utilties unrelated
to bioinformatics.
## Compiling and Running
To run all tests, `go test`:
```
go test -v
```
To run a specific problem, edit `main.go`
to call the corresponding problem's function
and then `go run`:
```
go run main.go utils.go rosalind.go <name of ba1 file.go>
```
## To Do
Add a Snakefile

54
chapter01/ba1a.go

@ -0,0 +1,54 @@ @@ -0,0 +1,54 @@
package main
import (
"fmt"
"log"
)
// Rosalind: Problem BA1A: Most Frequent k-mers
// Describe the problem
func BA1ADescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1A:",
"Most Frequest k-mers",
"",
"Given an input string and a length k,",
"report the k-mer or k-mers that occur",
"most frequently.",
"",
"URL: http://rosalind.info/problems/ba1a/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem,
// print the name of the input file,
// print the output/result
func BA1A(filename string) {
BA1ADescription()
// Read the contents of the input file
// into a single string
lines, err := readLines(filename)
if err != nil {
log.Fatalf("readLines: %v",err)
}
// Input file contents
var input, pattern string
input = lines[0]
pattern = lines[1]
result := PatternCount(input, pattern)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n",filename)
fmt.Println(result)
}

99
chapter01/ba1a_test.go

@ -0,0 +1,99 @@ @@ -0,0 +1,99 @@
package main
import (
"fmt"
"log"
"strconv"
"testing"
)
// To run this test:
//
// $ go test -v -run TestPatternCount
// Run a single test of the PatternCount function
func TestPatternCount(t *testing.T) {
// Call the PatternCount function
input := "GCGCG"
pattern := "GCG"
result := PatternCount(input,pattern)
gold := 2
if result != gold {
err := fmt.Sprintf("Error testing PatternCount(): input = %s, pattern = %s, result = %d (should be %d)",
input, pattern, result, gold)
t.Error(err)
}
}
// Run a test matrix of the PatternCount function
func TestMatrixPatternCount(t *testing.T) {
// Construct a test matrix
var tests = []struct {
input string
pattern string
gold int
}{
{"GCGCG", "GCG", 2},
{"GAGGGGGGGAG", "AGG", 1},
{"GCACGCACGCAC", "GCAC", 3},
{"", "GC", 0},
{"GCG", "GTACTCTC", 0},
{"ACGTACGTACGT", "CG", 3},
{"AAAGAGTGTCTGATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAATAATTACAGAGTACACAACATCCA",
"AAA", 4},
{"AGCGTGCCGAAATATGCCGCCAGACCTGCTGCGGTGGCCTCGCCGACTTCACGGATGCCAAGTGCATAGAGGAAGCGAGCAAAGGTGGTTTCTTTCGCTTTATCCAGCGCGTTAACCACGTTCTGTGCCGACTTT",
"TTT", 4},
{"GGACTTACTGACGTACG","ACT", 2},
{"ATCCGATCCCATGCCCATG","CC", 5},
{"CTGTTTTTGATCCATGATATGTTATCTCTCCGTCATCAGAAGAACAGTGACGGATCGCCCTCTCTCTTGGTCAGGCGACCGTTTGCCATAATGCCCATGCTTTCCAGCCAGCTCTCAAACTCCGGTGACTCGCGCAGGTTGAGT",
"CTC", 9},
}
for _, test := range tests {
result := PatternCount(test.input, test.pattern)
if result != test.gold {
err := fmt.Sprintf("Error testing PatternCount(): input = %s, pattern = %s, result = %d (should be %d)",
test.input, test.pattern, result, test.gold)
t.Error(err)
}
}
}
// Load a PatternCount test (input and output)
// from a file. Run the test with the input
// and verify the output matches the output
// contained in the file.
func TestPatternCountFile(t *testing.T) {
filename := "data/pattern_count.txt"
// Read the contents of the input file
// into a single string
lines, err := readLines(filename)
if err != nil {
log.Fatalf("readLines: %v",err)
}
// lines[0]: Input
input := lines[1]
pattern := lines[2]
// lines[3]: Output
output_str := lines[4]
// Convert output to inteter
output,err := strconv.Atoi(output_str)
if err!=nil {
t.Error(err)
}
// Call the function with the given inputs
result := PatternCount(input, pattern)
// Verify answer
if result != output {
err := fmt.Sprintf("Error testing PatternCount using test case from file: results do not match:\rcomputed result = %d\nexpected output = %d",result,output)
t.Error(err)
}
}

58
chapter01/ba1b.go

@ -0,0 +1,58 @@ @@ -0,0 +1,58 @@
package main
import (
"fmt"
"log"
"strings"
"strconv"
)
// Rosalind: Problem BA1B: Most Frequent k-mers
// Describe the problem
func BA1BDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1B:",
"Most Frequest k-mers",
"",
"Given an input string and a length k,",
"report the k-mer or k-mers that occur",
"most frequently.",
"",
"URL: http://rosalind.info/problems/ba1b/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1B(filename string) {
BA1BDescription()
// Read the contents of the input file
// into a single string
lines, err := readLines(filename)
if err != nil {
log.Fatalf("Error: readLines: %v",err)
}
// Input file contents
input := lines[0]
k_str := lines[1]
k,err := strconv.Atoi(k_str)
if err!=nil {
log.Fatalf("Error: string to int conversion: %v",err)
}
mfks,_ := MostFrequentKmers(input,k)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n",filename)
fmt.Println(strings.Join(mfks," "))
}

82
chapter01/ba1b_test.go

@ -0,0 +1,82 @@ @@ -0,0 +1,82 @@
package main
import (
"fmt"
"sort"
"strconv"
"strings"
"log"
"testing"
)
// Run a test of the MostFrequentKmers function
func TestMostFrequentKmers(t *testing.T) {
// Call MostFrequentKmers
input := "AAAATGCGCTAGTAAAAGTCACTGAAAA"
k := 4
result,err := MostFrequentKmers(input,k)
gold := []string{"AAAA"}
if err!=nil {
t.Error(err)
}
if !EqualStringSlices(result,gold) {
err := fmt.Sprintf("Error testing MostFrequentKmers(): input = %s, k = %d, result = %s (should be %s)",
input, k, result, gold)
t.Error(err)
}
}
// Run a test of the PatternCount function
// using inputs/outputs from a file.
func TestMostFrequentKmersFile(t *testing.T) {
filename := "data/frequent_words.txt"
// Read the contents of the input file
// into a single string
lines, err := readLines(filename)
if err != nil {
log.Fatalf("readLines: %v",err)
}
// lines[0]: Input
dna := lines[1]
k_str := lines[2]
// lines[3]: Output
gold := strings.Split(lines[4]," ")
// Convert k to integer
k,err := strconv.Atoi(k_str)
if err!=nil {
t.Error(err)
}
// Call the function with the given inputs
result, err := MostFrequentKmers(dna,k)
// Check if function threw error
if err!=nil {
t.Error(err)
}
// Check that there _was_ a result
if len(result)==0 {
err := fmt.Sprintf("Error testing MostFrequentKmers using test case from file: length of most frequent kmers found was 0: %q",
result)
t.Error(err)
}
// Sort before comparing
sort.Strings(gold)
sort.Strings(result)
// These will only be unequal if something went wrong
if !EqualStringSlices(gold,result) {
err := fmt.Sprintf("Error testing MostFrequentKmers using test case from file: most frequent kmers mismatch.\ncomputed = %q\ngold = %q\n",
result,gold)
t.Error(err)
}
}

50
chapter01/ba1c.go

@ -0,0 +1,50 @@ @@ -0,0 +1,50 @@
package main
import (
"fmt"
"log"
)
// Rosalind: Problem BA1C: Find the Reverse Complement of a String
// Describe the problem
func BA1CDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1C:",
"Find the Reverse Complement of a String",
"",
"Given a DNA input string,",
"find the reverse complement",
"of the DNA string.",
"",
"URL: http://rosalind.info/problems/ba1c/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1C(filename string) {
BA1CDescription()
// Read the contents of the input file
// into a single string
lines, err := readLines(filename)
if err != nil {
log.Fatalf("Error: readLines: %v",err)
}
// Input file contents
input := lines[0]
result,_ := ReverseComplement(input)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n",filename)
fmt.Println(result)
}

123
chapter01/ba1c_test.go

@ -0,0 +1,123 @@ @@ -0,0 +1,123 @@
package main
import (
"fmt"
"testing"
)
// Check that the DNA2Bitmasks utility
// extracts the correct bitmasks from
// a DNA input string.
func TestDNA2Bitmasks(t *testing.T) {
input := "AATCCGCT"
result, func_err := DNA2Bitmasks(input)
// Handle errors from in the DNA2Bitmasks function
if func_err != nil {
err := fmt.Sprintf("Error in function DNA2Bitmasks(): input = %s", input)
t.Error(err)
}
// Assemble gold standard answer (bitvectors)
tt := true
ff := false
gold := make(map[string][]bool)
gold["A"] = []bool{tt,tt,ff,ff,ff,ff,ff,ff}
gold["T"] = []bool{ff,ff,tt,ff,ff,ff,ff,tt}
gold["C"] = []bool{ff,ff,ff,tt,tt,ff,tt,ff}
gold["G"] = []bool{ff,ff,ff,ff,ff,tt,ff,ff}
// Verify result from DNA2Bitmasks is same as
// our gold standard
for _,cod := range "ATCG" {
cods := string(cod)
if !EqualBoolSlices(result[cods],gold[cods]) {
err := fmt.Sprintf("Error testing DNA2Bitmasks(): input = %s, codon = %s, extracted = %v, gold = %v",
input, cods, result[cods], gold[cods])
t.Error(err)
}
}
}
// Check that the Bitmasks2DNA utility
// constructs the correct DNA string
// from bitmasks.
func TestBitmasks2DNA(t *testing.T) {
// Assemble input bitmasks
tt := true
ff := false
input := make(map[string][]bool)
input["A"] = []bool{tt,tt,ff,ff,ff,ff,ff,ff}
input["T"] = []bool{ff,ff,tt,ff,ff,ff,ff,tt}
input["C"] = []bool{ff,ff,ff,tt,tt,ff,tt,ff}
input["G"] = []bool{ff,ff,ff,ff,ff,tt,ff,ff}
gold := "AATCCGCT"
result, func_err := Bitmasks2DNA(input)
// Handle errors from in the DNA2Bitmasks function
if func_err != nil {
err := fmt.Sprintf("Error in function Bitmasks2DNA(): function returned error")
t.Error(err)
}
// Verify result from DNA2Bitmasks is same as
// our gold standard
if result != gold {
err := fmt.Sprintf("Error testing Bitmasks2DNA(): result = %s, gold = %s", result, gold)
t.Error(err)
}
}
// Run a test of the function that computes
// the ReverseComplement of a DNA string.
func TestReverseComplement(t *testing.T) {
input := "AAAACCCGGT"
result,_ := ReverseComplement(input)
gold := "ACCGGGTTTT"
if result!=gold {
err := fmt.Sprintf("Error testing ReverseComplement(): input = %s, result = %s (should be %s)",
input, result, gold)
t.Error(err)
}
}
// Run a test of the ReverseComplement function
// using inputs/outputs from a file.
func TestReverseComplementFile(t *testing.T) {
filename := "data/reverse_complement.txt"
// Read the contents of the input file
// into a single string
lines, err := readLines(filename)
if err != nil {
t.Error(err)
}
// lines[0]: Input
input := lines[1]
// lines[2]: Output
gold := lines[3]
// Call the function with the given inputs
result, err := ReverseComplement(input)
// Check that there _was_ a result
if len(result)==0 {
err := fmt.Sprintf("Error testing ReverseComplement using test case from file")
t.Error(err)
}
if result!=gold {
err := fmt.Sprintf("Error testing ReverseComplement(): input = %s, result = %s (should be %s)",
input, result, gold)
t.Error(err)
}
}

61
chapter01/ba1d.go

@ -0,0 +1,61 @@ @@ -0,0 +1,61 @@
package main
import (
"fmt"
"strconv"
"strings"
"log"
)
// Rosalind: Problem BA1D: Find all occurrences of pattern in string
// Describe the problem
func BA1DDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1D:",
"Find all occurrences of pattern in string",
"",
"Given a string input (genome) and a substring (pattern),",
"return all starting positions in the genome where the",
"pattern occurs in the genome.",
"",
"URL: http://rosalind.info/problems/ba1d/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1D(filename string) {
BA1DDescription()
// Read the contents of the input file
// into a single string
lines, err := readLines(filename)
if err != nil {
log.Fatalf("Error: readLines: %v",err)
}
// Input file contents
pattern := lines[0]
genome := lines[1]
// Result is a slice of ints
locs,_ := FindOccurrences(pattern,genome)
// Convert to a slice of strings for easier printing
locs_str := make([]string,len(locs))
for i,j := range locs {
locs_str[i] = strconv.Itoa(j)
}
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n",filename)
fmt.Println(strings.Join(locs_str," "))
}

97
chapter01/ba1d_test.go

@ -0,0 +1,97 @@ @@ -0,0 +1,97 @@
package main
import (
"fmt"
"log"
"strings"
"strconv"
"testing"
)
func TestFindOccurrences(t *testing.T) {
// Call FindOccurrences
pattern := "ATAT"
genome := "GATATATGCATATACTT"
result,err := FindOccurrences(pattern,genome)
gold := []int{1,3,9}
if !EqualIntSlices(result,gold) || err!=nil {
err := fmt.Sprintf("Error testing FindOccurrences(): result = %q, should be %q",
result, gold)
t.Error(err)
}
}
func TestFindOccurrencesDebug(t *testing.T) {
// Construct a test matrix
var tests = []struct {
pattern string
genome string
gold []int
}{
{"ACAC", "TTTTACACTTTTTTGTGTAAAAA",
[]int{4}},
{"AAA", "AAAGAGTGTCTGATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAATAATTACAGAGTACACAACATCCAT",
[]int{0,46,51,74}},
{"TTT", "AGCGTGCCGAAATATGCCGCCAGACCTGCTGCGGTGGCCTCGCCGACTTCACGGATGCCAAGTGCATAGAGGAAGCGAGCAAAGGTGGTTTCTTTCGCTTTATCCAGCGCGTTAACCACGTTCTGTGCCGACTTT",
[]int{88,92,98,132}},
{"ATA", "ATATATA",
[]int{0,2,4}},
}
for _, test := range tests {
result,err := FindOccurrences(test.pattern, test.genome)
if err!=nil {
t.Error(err)
}
if !EqualIntSlices(result,test.gold) {
err := fmt.Sprintf("Error testing FindOccurrences(): result = %q, should be %q",
result, test.gold)
t.Error(err)
}
}
}
func TestFindOccurrencesFiles(t *testing.T) {
filename := "data/pattern_matching.txt"
// Read the contents of the input file
// into a single string
lines, err := readLines(filename)
if err != nil {
log.Fatalf("Error: readLines: %v",err)
}
// lines[0]: Input
pattern := lines[1]
genome := lines[2]
// lines[3]: Output
gold_str := lines[4]
gold_slice := strings.Split(gold_str," ")
gold := make([]int,len(gold_slice))
for i,g := range gold_slice {
gold[i],err = strconv.Atoi(g)
if err!=nil {
t.Error(err)
}
}
result,err := FindOccurrences(pattern,genome)
if err!=nil {
t.Error(err)
}
if !EqualIntSlices(result,gold) {
err := fmt.Sprintf("Error testing FindOccurrences():\nresult = %v\ngold = %v\n",
result, gold)
t.Error(err)
}
}

58
chapter01/ba1e.go

@ -0,0 +1,58 @@ @@ -0,0 +1,58 @@
package main
import (
"fmt"
"log"
"strings"
"strconv"
)
// Rosalind: Problem BA1E: Find patterns forming clumps in a string
// Describe the problem
func BA1EDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1E:",
"Find patterns forming clumps in a string",
"",
"A clump is characterized by integers L and t",
"if there is an interval in the genome of length L",
"in which a given pattern occurs t or more times.",
"",
"URL: http://rosalind.info/problems/ba1e/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1E(filename string) {
BA1EDescription()
// Read the contents of the input file
// into a single string
lines, err := readLines(filename)
if err != nil {
log.Fatalf("Error: readLines: %v",err)
}
// Input file contents
genome := lines[0]
params_str := lines[1]
params_slice := strings.Split(params_str," ")
k,_ := strconv.Atoi(params_slice[0])
L,_ := strconv.Atoi(params_slice[1])
t,_ := strconv.Atoi(params_slice[2])
patterns,_ := FindClumps(genome,k,L,t)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n",filename)
fmt.Println(strings.Join(patterns," "))
}

42
chapter01/ba1e_test.go

@ -0,0 +1,42 @@ @@ -0,0 +1,42 @@
package main
import (
"fmt"
"testing"
)
func TestMatrixFindClumps(t *testing.T) {
var tests = []struct {
genome string
k int
L int
t int
gold []string
}{
{"CGGACTCGACAGATGTGAAGAACGACAATGTGAAGACTCGACACGACAGAGTGAAGAGAAGAGGAAACATTGTAA",
5, 50, 4,
[]string{"CGACA","GAAGA"}},
{"AAAACGTCGAAAAA",
2, 4, 2,
[]string{"AA"}},
{"ACGTACGT",
1, 5, 2,
[]string{"A","C","G","T"}},
{"CCACGCGGTGTACGCTGCAAAAAGCCTTGCTGAATCAAATAAGGTTCCAGCACATCCTCAATGGTTTCACGTTCTTCGCCAATGGCTGCCGCCAGGTTATCCAGACCTACAGGTCCACCAAAGAACTTATCGATTACCGCCAGCAACAATTTGCGGTCCATATAATCGAAACCTTCAGCATCGACATTCAACATATCCAGCG",
3, 25, 3,
[]string{"AAA","CAG","CAT","CCA","GCC","TTC"}},
}
for _, test := range tests {
result,err := FindClumps(test.genome,
test.k, test.L, test.t)
if err!=nil {
t.Error(err)
}
if !EqualStringSlices(result,test.gold) {
err := fmt.Sprintf("Error testing FindClumps(): k = %d, L = %d, t = %d",test.k,test.L,test.t)
t.Error(err)
}
}
}

60
chapter01/ba1f.go

@ -0,0 +1,60 @@ @@ -0,0 +1,60 @@
package main
import (
"fmt"
"strings"
"strconv"
"log"
)
// Rosalind: Problem BA1F: Find positions in a gene that minimizing skew
// Describe the problem
func BA1FDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1F:",
"Find positions in a gene that minimize skew",
"",
"The skew of a genome is defined as the difference",
"between the number of C codons and the number of G",
"codons. Given a DNA string, this function should",
"compute the cumulative skew for each position in",
"the genome, and report the indices where the skew",
"value is minimzed.",
"",
"URL: http://rosalind.info/problems/ba1f/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1F(filename string) {
BA1FDescription()
// Read the contents of the input file
// into a single string
lines, err := readLines(filename)
if err != nil {
log.Fatalf("Error: readLines: %v",err)
}
// Input file contents
genome := lines[0]
minskew,_ := MinSkewPositions(genome)
minskew_str := make([]string,len(minskew))
for i,j := range minskew {
minskew_str[i] = strconv.Itoa(j)
}
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n",filename)
fmt.Println(strings.Join(minskew_str," "))
}

53
chapter01/ba1f_test.go

@ -0,0 +1,53 @@ @@ -0,0 +1,53 @@
package main
import (
"fmt"
"sort"
"testing"
)
func TestMatrixMinSkewPosition(t *testing.T) {
var tests = []struct {
genome string
gold []int
}{
{"CCTATCGGTGGATTAGCATGTCCCTGTACGTTTCGCCGCGAACTAGTTCACACGGCTTGATGGCAAATGGTTTTTCCGGCGACCGTAATCGTCCACCGAG",
[]int{53, 97}},
{"TAAAGACTGCCGAGAGGCCAACACGAGTGCTAGAACGAGGGGCGTAAACGCGGGTCCGA",
[]int{11, 24}},
{"ACCG",
[]int{3}},
{"ACCC",
[]int{4}},
{"CCGGGT",
[]int{2}},
{"CCGGCCGG",
[]int{2,6}},
}
for _, test := range tests {
// Do it - find the positions that minimize skew
result,err := MinSkewPositions(test.genome)
if err!=nil {
t.Error(err)
}
// Check length of result
if len(result)!=len(test.gold) {
err := fmt.Sprintf("Error testing MinSkewPositions():\nfor genome: %s\nlength of result (%d) did not match length of gold standard (%d).\nFound: %v\nShould be: %v",
test.genome, len(result), len(test.gold),
result, test.gold)
t.Error(err)
}
// Sort before comparing
sort.Ints(result)
sort.Ints(test.gold)
if !EqualIntSlices(result,test.gold) {
err := fmt.Sprintf("Error testing MinSkewPositions():\nfor genome: %s\nfound: %v\nshould be: %v",
test.genome, result, test.gold)
t.Error(err)
}
}
}

52
chapter01/ba1g.go

@ -0,0 +1,52 @@ @@ -0,0 +1,52 @@
package main
import (
"fmt"
"log"
)
// Rosalind: Problem BA1G: Find Hamming distance between two DNA strings
// Describe the problem
func BA1GDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1G:",
"Find Hamming distance between two DNA strings",
"",
"The Hamming distance between two strings HammingDistance(p,q)",
"is the number of characters different between the two",
"strands. This program computes the Hamming distance",
"between two strings.",
"",
"URL: http://rosalind.info/problems/ba1g/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1G(filename string) {
BA1GDescription()
// Read the contents of the input file
// into a single string
lines, err := readLines(filename)
if err != nil {
log.Fatalf("Error: readLines: %v",err)
}
// Input file contents
p := lines[0]
q := lines[1]
hamm,_ := HammingDistance(p,q)
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n",filename)
fmt.Println(hamm)
}

49
chapter01/ba1g_test.go

@ -0,0 +1,49 @@ @@ -0,0 +1,49 @@
package main
import (
"fmt"
"testing"
)
func TestMatrixHammingDistance(t *testing.T) {
var tests = []struct {
p string
q string
dist int
}{
{"GGGCCGTTGGT",
"GGACCGTTGAC",
3 },
{"AAAA",
"TTTT",
4 },
{"ACGTACGT",
"TACGTACG",
8 },
{"ACGTACGT",
"CCCCCCCC",
6 },
{"ACGTACGT",
"TGCATGCA",
8 },
{"GATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATAC",
"AATAGCAGCTTCTCAACTGGTTACCTCGTATGAGTAAATTAGGTCATTATTGACTCAGGTCACTAACGTC",
15 },
{"AGAAACAGACCGCTATGTTCAACGATTTGTTTTATCTCGTCACCGGGATATTGCGGCCACTCATCGGTCAGTTGATTACGCAGGGCGTAAATCGCCAGAATCAGGCTG",
"AGAAACCCACCGCTAAAAACAACGATTTGCGTAGTCAGGTCACCGGGATATTGCGGCCACTAAGGCCTTGGATGATTACGCAGAACGTATTGACCCAGAATCAGGCTC",
28 },
}
for _, test := range tests {
result,err := HammingDistance(test.p, test.q)
if err!=nil {
t.Error(err)
}
if result!=test.dist {
err := fmt.Sprintf("Error testing HammingDistance(): computed dist = %d (should be %d)\np = %s\nq = %s\n",
result, test.dist,
test.p, test.q)
t.Error(err)
}
}
}

65
chapter01/ba1h.go

@ -0,0 +1,65 @@ @@ -0,0 +1,65 @@
package main
import (
"fmt"
"strconv"
"strings"
"log"
)
// Rosalind: Problem BA1H: Find approximate occurrences of pattern in string
// Describe the problem
func BA1HDescription() {
description := []string{
"-----------------------------------------",
"Rosalind: Problem BA1H:",
"Find approximate occurrences of pattern in string",
"",
"Given a string Text and a string Pattern, and a maximum",
"Hamming distance d, return all locations in Text where",
"there is an approximate match with Pattern (i.e., a pattern",
"with a Hamming distance from Pattern of d or less).",
"",
"URL: http://rosalind.info/problems/ba1h/",
"",
}
for _, line := range description {
fmt.Println(line)
}
}
// Describe the problem, and call the function
func BA1H(filename string) {
BA1HDescription()
// Read the contents of the input file
// into a single string
lines, err := readLines(filename)
if err != nil {
log.Fatalf("Error: readLines: %v",err)
}
// Input file contents
pattern := lines[0]
text := lines[1]
d_str := lines[2]
d,_ := strconv.Atoi(d_str)
approx,_ := FindApproximateOccurrences(pattern,text,d)
approx_str := make([]string,len(approx))
for i,j := range approx {
approx_str[i] = strconv.Itoa(j)
if err!=nil {
log.Fatalf("Error: conversion from int to string: %v",err)
}
}
fmt.Println("")
fmt.Printf("Computed result from input file: %s\n",filename)
fmt.Println(strings.Join(approx_str," "))
}

56
chapter01/ba1h_test.go

@ -0,0 +1,56 @@ @@ -0,0 +1,56 @@
package main
import (
"fmt"
"testing"
)
func TestMatrixApproximateOccurrences(t *testing.T) {
var tests = []struct {
pattern string
text string
d int
gold []int
}{
{"ATTCTGGA",
"CGCCCGAATCCAGAACGCATTCCCATATTTCGGGACCACTGGCCTCCACGGTACGGACGTCAATCAAATGCCTAGCGGCTTGTGGTTTCTCCTACGCTCC",
3,
[]int{6, 7, 26, 27, 78}},
{"AAA",
"TTTTTTAAATTTTAAATTTTTT",
2,
[]int{4, 5, 6, 7, 8, 11, 12, 13, 14, 15}},
{"GAGCGCTGG",
"GAGCGCTGGGTTAACTCGCTACTTCCCGACGAGCGCTGTGGCGCAAATTGGCGATGAAACTGCAGAGAGAACTGGTCATCCAACTGAATTCTCCCCGCTATCGCATTTTGATGCGCGCCGCGTCGATT",
2,
[]int{0, 30, 66}},
{"AATCCTTTCA",
"CCAAATCCCCTCATGGCATGCATTCCCGCAGTATTTAATCCTTTCATTCTGCATATAAGTAGTGAAGGTATAGAAACCCGTTCAAGCCCGCAGCGGTAAAACCGAGAACCATGATGAATGCACGGCGATTGCGCCATAATCCAAACA",
3,
[]int{3, 36, 74, 137}},
{"CCGTCATCC",
"CCGTCATCCGTCATCCTCGCCACGTTGGCATGCATTCCGTCATCCCGTCAGGCATACTTCTGCATATAAGTACAAACATCCGTCATGTCAAAGGGAGCCCGCAGCGGTAAAACCGAGAACCATGATGAATGCACGGCGATTGC",
3,
[]int{0, 7, 36, 44, 48, 72, 79, 112}},
{"TTT",
"AAAAAA",
3,
[]int{0, 1, 2, 3}},
{"CCA",
"CCACCT",
0,
[]int{0}},
}
for _, test := range tests {
result,err := FindApproximateOccurrences(test.pattern, test.text, test.d)
if err!=nil {
t.Error(err)
}
if !EqualIntSlices(result, test.gold) {
err := fmt.Sprintf("Error testing FindApproximateOccurrences:\ncomputed = %v\ngold = %v",
result, test.gold)
t.Error(err)
}
}
}

5
chapter01/data/clump_finding.txt

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
Input
GCGGTTATGCACCGTTCAAATTAGCAAACCACTAAGCGACGTAGTCTGGATTGATTTCTCCCTACCAGTGACCCAAGACGCGTTAGTGAGTTAAGTTCATATCCAGTACCTGCCGCCCTCTGTACTTGGGCGTCCGATTCGCATGCTTACTCAGGTGGAGGACACGATAATCTGATTAAACTGAGCTAAACCAGGTGGAACCAGAAACCAGGTGGGGAGTCTCGCTTCAAGCCGTTCTTGCGATCAAACCAGGTGGTCCATTATGAAACCAGGTGGCTAAACCAGGTGGTCCAGATCCTCGAATGATGTCGGTGCACATCAAAACCAGGTGGGGTGGTGGAACGTAAAACCAGGTGGCATAAACCAGGTGGGCCGGTTCGTAAACCAGGTGAAACCAGGTGGGGTGGAAACCAGGTGGGTTACAAATTACGTTGAGATGGCCCAAACCAGGTGGTGGGCTTCACCCATGTCAACAAACCACCCTATGGAACTAAACCAGGTGGAACCAGGTGGTGAAGGCTTATCCTCAGGAAAAACCAGGTGGAGGTGGTGAAATAAAACCAGGTGGACCAGGTGGATAACCCTCGCCTCGCTTCTCAACCGAGACCTGGATAAACCAGGTGGGGTGGTCCACCGATTTTTGAGACACTAGAAACCAGGTGGGCGGGGAAACCAGGTGGCAAACCAGGTGGGGTGGACGGAAACCAGGTGGATATGTCATAAAACCAAACCAGGTGGTGCACCCCCATGGTGTGTCTTATCCGTGCGTATAAACCAGGTGGTCGCACGGCTTCCACTTGCTGAGAATAGGCCCGCAGGGTCAGTGCCATGCCCTCCGTCACTCGATATGTGTTGTAAGAGTGGTTACCCCTTCATTGAAGTCGCCCACAGCCCCACCTGCATTGCTAGACTATCACCCTACAGTAGGCCTTTTCGCCTTCTTCAAGCAGCAATCTCTTATCCGCGGATGGGCGCGGCGAGCGTGGCGTCCCCGAACATTTTTACCTAACGTGTTTTGTTGGCCGCAAGCCTTCCCTCTAGTCCACCTCAGCCATTCAGCCTAGTAGCTTTCAAGCCGAGCCTTCCATATCTAATGGACCGTCCAGAATTTCACACGTTTCACAGGGCTGTGTTCGACCGCCCGTAATGCTGTTTCACAGGCGATCGCCTTGCGGTTTTTTCACAGATCGCAGCCGATGGACATGCCAACTCGATTTTCACAGAGTTTTTCACAGCGGTTTCACAGCACAGCAGTGATTGTTTCACAGCAATTTTCACTTTCACAGGGGCCCTTTTCACAGCTCAGGGCTCTTTTCACTTTCACAGTTTCACAGCGCTCCTTTCACAGAGCGGGGAAATTTAAGGGAACACTCAAGGGAACAAGGGAACACACAAAGGGAACACAACACAACACATAAGGGAACACTTTCACAGAACACAAAAGTCCGAAATCATCAGCGGCGAAGGGATTTCACAGACAGACACTTTCACAGCGCATTTCACAGATACGTACTTTCACAGGCGTACTTTCACAGACTTTCACAGAGGACAAGCTCAATTTTCACAGACAGGCTGGATAAATTTCACAGCGGTAAGGGTTTCACAGCACACATAAGGGAACACGAATTTCACAGCAGGGAACACCTCTACGAGTAATCTATTACTCTACCTACTGAAGGGAACACACCGAAGACCTACTATTACCTATTACTCTTAAAGGGAACACATTACAAGGGAACACACTCTCTCGTCATATCTCACCTCTCTATTACTCTTAAGGGAACACCTTCTCGATCAACCTATTACTCTATGGAGATAGAGATATTCCAGACATATGGAGATAACATGGAGATATGGAGATAATGGAGATGGAGATAGCTCTTATATTTATCCTATGGAGATATGATACTATTAATGGAGATAATTCTAATGGAGATATAATTACTCTAAGAGGATGGGATCTCGGGCTATTACTCTAATGGAGATAAGCACTATTACTCTAGGAAATGGAGATATGTCAATGGAGATATGTAATGGAGATAGAGGGAGATGGAGTCGCCATTTCATAATCGCCATTTCATAGTTCAGGAATCGCCATTTCCGCCATTTCTAAGATGGAGTCGCCATTTCTACGTATGGAGATAGGATCGCCATTTCATACGACCCGTTGGATATCGCCATTTCCTCGCCATTTCTGGTGACATTTCTCGCCATTTCATTTCTGGAGATAGATGGATCTCGCCATTTCATAGGAATCGCCATTTCCACGTAGGGGGGGCCACAATCCGTAGGTCGGAATTCAGACTCGCCATTTCCCATCGCCATTTCTTCACCTGTATGCCGATCCCTTCGCCATTTCTCATGGAGATAACTCTCTCTCGCCATTTCTCGCCATTTCCATTTCACTCTCATTCGCCATCGCCATTTCCATTCGCCATTTCATCGCCATTTCTTCAGGATAAGATATCGCCATTTCGACTCTCATTCGCATACTGACTCTCATTCTCATCTCGCCATTTCTCATCTGACTCTCATCCTGGGGGAAACTTGCGACTCTCATCACACTTCCGTCGACTCTCATACTGGCGGATAGCATAGGAGCCATTTAAAGACTCTCATTCTCATTCGAGACTCTCATTCAAATCCTACGAGGACTCTCATATAGACTCTCATATCATTACGAGGACTCTCATATACGAGCCATGCATGTGGCGACGACTCTCATCTACGAGCCATGCAAGCAGAATCTACGAGCGACTCTCATTACGAGCCATGTGACCGTACGAGCCATGCATGCATGCCATGCTGACTCTCATCGAGTACGAGCCATGGAAGTTCTTGTTGGTTCGTAGCCCAAGAGCTGAAGTTACGAGCCTACGAGCCATGAAGTTACTTTTACGAGCCATGAAGCTTACGATACGAGCCATGCGAGCCATGCATCCGCGCTACGAGCCATGTTCCAGTACGAGCCATGTTAGTTGCTGAAGTTAAGTTTGGCGCTGAAGTTTGTACGAGCCATGTGCCCGCTGAAGTTTGTTGTACGAGCCATGCATGCTGAAGTTAATGGCTGAAGTTAGCGTTTGCGGGCAGATCCTCATTCTACGATACGAGCCATGCCATGCAGCTGAAGTTAAGTTGGGTTACGAGCCATGCGAGCCATGTGAAGTACGAGCCATGCTGGCTGAAGTTGTTTGTGCTGCTGAAGTTGCTCTTGTCTCTAGCTGAAGTTGCCAACAGGGCTGAAGCTGAAGTTTAAGCTGAAGTTGCGAGCAGGCTGAAGTTATCGGATTGGGGCTGAAGTTCAACCTCCCGTCCCCCCACACTATATTCCCGTCCCCCCCCGCGCACGCGCCGTCTCCCGTCCCCCCTATCCCGTGCGCACGCGACGCGATCCCGTCCCCCCAGAGTGCGCGCACGCGTCCCCCTTCCCGTCCCCCTCTCCCGGGCGCACGCGTCGCTCAACATTTCCGCGCACGCGTCGCGCACGCGGGCGCACGCGGGTCCCGTCCCCCCCCCTCTTCGGCGCACGCGGAATTCCCGTCGCGCACGCGTCCCGTCCCGCGCACGCGTCGCGCACGCGACTGCCCTAACCAACAGTGCGCACGCGCCGGTAACCCGGTAACCCGGTAACCGCGCACGCGGGCGCACGCGCGTAACCCGCGCACGCGCCGCGCACGCGGCCCGGTTCCCGTCCCCCCCGGTAACCCGGTAACTCCCGTCCCCCGTAACCCGGTGCGCACGCGCCCGGCGCACGCGGAGCGCACGCGCCCCCCCCGGTAATAGCGCACGCGCCCGGGCGCACGCGCCCGGTAACCCGGTAACCCGGGCGCGCGCACGCGGCGGCGCACGCGGCGCACGCGGCGCACGCG
11 566 18
Output
AAACCAGGTGG

5
chapter01/data/frequent_words.txt

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
Input
CGGAAGCGAGATTCGCGTGGCGTGATTCCGGCGGGCGTGGAGAAGCGAGATTCATTCAAGCCGGGAGGCGTGGCGTGGCGTGGCGTGCGGATTCAAGCCGGCGGGCGTGATTCGAGCGGCGGATTCGAGATTCCGGGCGTGCGGGCGTGAAGCGCGTGGAGGAGGCGTGGCGTGCGGGAGGAGAAGCGAGAAGCCGGATTCAAGCAAGCATTCCGGCGGGAGATTCGCGTGGAGGCGTGGAGGCGTGGAGGCGTGCGGCGGGAGATTCAAGCCGGATTCGCGTGGAGAAGCGAGAAGCGCGTGCGGAAGCGAGGAGGAGAAGCATTCGCGTGATTCCGGGAGATTCAAGCATTCGCGTGCGGCGGGAGATTCAAGCGAGGAGGCGTGAAGCAAGCAAGCAAGCGCGTGGCGTGCGGCGGGAGAAGCAAGCGCGTGATTCGAGCGGGCGTGCGGAAGCGAGCGG
12
Output
CGGCGGGAGATT CGGGAGATTCAA CGTGCGGCGGGA CGTGGAGGCGTG CGTGGCGTGCGG GCGTGCGGCGGG GCGTGGAGGCGT GCGTGGCGTGCG GGAGAAGCGAGA GGAGATTCAAGC GGCGGGAGATTC GGGAGATTCAAG GTGCGGCGGGAG TGCGGCGGGAGA

5
chapter01/data/hamming_distance.txt

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
Input
CCGAAGCAATTGAAACCCCCCCGGCCTGGGAGGCGCAAAAATCTGACCTCTTTGTGAGTTGACCACTTAATTTATGTCTGACCACGAGAAGGGCTACTGATTTGGTACGTCGGGTCATGACCCCCAGTTCTTAGCCGCCTGCTCCAATCTCTGACTTGTTTATCGAGGGGATGGAGTAACGAAATGCGATTCGCCCGCTCAGGCCAAGGTATATATTTGAGTAGCGGAAGGTTGCACTACCTACAACCACGGCACACCGGCACGTTGTCGTGCCCTGGCGGCCTGCGCACTTTCGCCACTGTCAAGTACGACTTCCCAAGCTCAACCAACATTCATAATCCGGTGCAATTCATACCGTATCATCGTGCTATAAGCGACGCCGATTCTCGGGGCCTGATAATTGAGACTGGACTACATAGTGGGTGCCCTCTCTGCGAGTAAGTGACGGAACAACGGAGATCAGGGACCAAATGGTAGCAAAACAGATCGAGGTACACGCAGGTAGCTGTCCGTGGAGTAGACCGCGCTTAGCGTCTGTTAGAGTATCATCGGGGTATTAGACACAGGAACCTCTATGCTGTTAAAAGGCCATACCCCGTAATTGTGCAAATTTGTTACGTTCAAATCTACGCAGTGAGGGTCCTAAGGTGATGGCAGGGATTGGAACTTCTCCGCTGGCTCTTAGATTACTTAGCCAGTCTACCCTCGAAGATACAAATCCTTCCACCAGAGGGAGCTCATTGAAATTCATTCCATGCTACTCGACCGCGCGTATGGGTGCGGGGCTCTATGGGATCTAACTCGATCCTTCAGAGTCCTTATTCAAATGCATTTCCGTCCCCGTATGTTTCGACGAAGCCGAAGCCCAAACCCTGGGATGGACGAATTAAGGACAGTACAGGCAATAGTGTTCTCCCATACTCGGAACAGACGCCTCATTTTTTCGCGAAATCGATCTGGGTTGGAAGAAGTTCCAGTGCAGAGTTCCTATCACACAATTCGTTCTCGGGGCTTCCGGCCCATAAGCGATACTACTGTCTTTGCGAGCTAACGATTACATTCGGGGGAACTTAGCTCGGACTGGACCAGGTACATGATCCAAAGCGCGATGTCTGTCTGTTACCCTCACCGCCGCTCTTTTATCGGGTA
GCGTAGTAGGTTCGCGTACCTAGTTCCGCCGAAAAGACAAAGGAGAAGGGAATGCTCCTAGTAGTTTCAGTCTAGCAAACATGTTATAACGCTAACTGTGTGCTGCAAAAAGGATTTGAACCCAAATTTTAAAGCGCTGATCGACAGAACGCTGTTGAAGAGGCGATGGTACTGAGATTCCCCAGAAACCACCTCCGCGCTATGTGCTCAAGACAACCCGCATTCGTTTTTACTAGATTTGGAGCCGAGTTGTGATTTGGATATTTTCACATAAGACCGAGCAGGAAATATACCTTGTTGCAGCTATTGACCCCGTTCTCTCGGAAATCCATGGAATAGTCTTCGGATATTCGTACCAATGGGCGCGATGTTGCGATAAGAGAGCACATTTCATTAAGTGGTGCTCCGCCGCTAAGATGGGAAGGGGCGAGTCTATCGCAGCATCGAAGGCTGAGTTGGCCATTGCCGAGAGTATACATATTTACGATCACACTCGCATAGTCCCACGCATTACGTCCGAGATAGTATGTCCCAATGCAACCTAAAGCCGCGAGATTCCCTAAGGAGAAAATTAAACACTGGAAATTAGGTGATGCTACATCCCATGGACACTTTCGGAACAATATCGGTGACACACATCATCCGTGATCCCGTGATATTTCATCCATGGAGAGAGTATGGTTTTACTACACCTGGTCTAGGCCAAGCCTAACCCCCTGTTCATCCGTTTTATACGAGTATTACCTTGACGACCATAGAGGATAGACTCGGTATCCCGCACACTCTACACACACGACTTAATCCGCTCCACGACCTTCCTAGCGATCTTTGGCGCAGCCGGTTCGCGTATTTTACGACCAACTCGATGGATCCCAATTATCCCCCTGGTAGTGCCCCTCCGCCTGAGAATTCGACGGGCGAGGTCCGGGGGACCGACATAGAGTGGAATGCTTCTTTCCGGGATAACACGTGATTGACATAAAAATGTAGGGCAGATAGGCATCGTTAGCACCTCTCTCCTTGCTGCACTGCGTTTATCGATCGAATTCAAGACTTGTGCATGTTGAAAACAACCTCGCGTTATCCCTGCTATTTGCTTCAGAGCCGTAGGAGGGGACCATGCGTGAGTCCTCCTGAGCAACCTCAATT
Output
844

4
chapter01/data/minimum_skew.txt

File diff suppressed because one or more lines are too long

5
chapter01/data/pattern_count.txt

File diff suppressed because one or more lines are too long

5
chapter01/data/pattern_matching.txt

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
Input
ACACCA
CCGAACACCCGTACACCGAACACCACACCACACCTTGCACACCACACCTACACCACACACCACACCGGACACCCACACCCACACCACGAACACCGAGAGTACACCTACACCTGACACCGGGGATCGTCACACCAAGTGGTGATACACCCACACCCTTTACACCTACACCACACCCGTACACCCTGAACACCACACCTAGAGAGTTGCACACCTCACACCGAAGGCACACCACACCATCCACACCATAAACACCGTTAACACCGTAGAACACCCAGCACACCCTTACCGCATACACCGACGTTAGACACCCACACCGGCAGTCACACCGTACACCCATTCGGTCCACACCCTACACCGCCTGCCACACCTACTGAGTTACACCGCATGACACCATTATCCGAACACACCAATATACACCAACACCATACACCATTTAACACCCCAAAACACCGACACCGACACCGCAAGCCCACACCACACCCACACCACAGACACCTACACCGTTTAGACACCAACACCGACACCACACCCCACACCCAAGACACCGCTACACCCTGCTGGACACCGACACCTACACCTCACACCGGACACCGCACACACCGCCACACCAATCACACCACACCACACCAGTACAACACCGACACCTACACCACACCACACCCAGATACACCCACACCGGACACCACACCAAACACCATTACACCCACACCGGTACACCACACCTCGTACACCAAGTAGACACCCAACACACCACACCTTGATGACACCTGACACCATACACCAAACACCACACCGAGGTAGACACCACACCGCCATCGACCACACCCTGACACCATACACCACACCACACCTAGTCGACACCCACACCCTCACACCTGACACCCGCGGCATACACCCACACCACTTACACCTACACCGGGGGAAACACCGAAACACCTCAACACCGGACACCACACCTAAGACACCGGGCGATACACCTGACCCTGACACCACACCACACCCAACACCCGAACACCACACCCAAACCTTGACACCCACACCAAAACACCCTTTATTAAAACACCCCGACCACCAAACACCACACCCCACACCGAACACCCACACCGCATACACCGGTCACACCTTATCTCGCCCACACCCTACACCCCACACCACACCACACCACACCGTACCACACCACACCCCCACACCAAAACACCACACCACACCGGTTACACCCCACACCAACACCCACACCATTACACCTACACCGCAACACCTGCACACCACACCAAGACTGGAGACACCTACCACACCCTCGTTTACACCACCTGACACCTTACACCTCCGACACCAAAAACCCGTTGGGTCATCGGATCAGGACACCTTTACACCACACCTTCGAGGACACCACGGACACCACACCCCACACCACACCGGTACACCGCGTTCACACCTCACACCGACACCACACCCCCTGAACTGTATACACCACACCACACCAACCCAACACCCTAGAAGACACCTGCCACACCTTACACCACACCACCGACACCAACACCCAAACACCTTTGACACACCACACCAACACCGTACACCGCAACACCCGCATTACACCTTACACCACACCACACCCCCCTACACCCACACCACACCCTCGGACACCAGTACACCACACCACAGATAGACACCATACACCTTACACCACATACACCTTTCACACCACACCCACACCCCGCTTAGACACCGACACCACACCACACCTGACACCACACCTCGCACACCGCCCTTACACCACACCCCAGCAGAAAACGAACACCCACACCACACCACACACCACACCACACCACACCGACACCTGACACCTAAACACCCCCACACCACACCTCTCCAACACCACACCAACACCTACACCAGAAAGACACCGACACCCGACACCCGCTGTTGTACACCCACACCATCGACACCACACCACACCACACCCTACACCGGCACACCATGCAAACACCACACACCTGGACACCCACACCACACCGCACACCACACCACACCTACACCACCGACACCACACCACACACCTACTCCACAACACCTACACCAAACACCCTACACCTACACCTACACCTACATACACCTACACCTAATATTATGGACACCACACCTTCAGACACCGTACACCACACACCCTATGTTACACCACAGGCAGAATTTGACACCTCACACCCACACCCACACCCGCACACCACACCAACACCACACCACACCCCCAACACCGCTCTTACACCTTACACCGACACCAACACCGACACCGACACCACACCCCAATATCCCTCACACCACACCTAACCAGTATACACCGTTGACAACACCCCAATTTACACCCCATACACCTCAGACCACACACCGGACGGGCAACACCTACACCGATGTTACTTTACACCGGGCTCGCGGACACCACTCGACACCAACACCCGACACCTTACACCACACCAGCTGCGTGAACACCTACACCATCCCAACACCACACCGACACCGTATGGACACCTACACCTCGAGAGTTCCGCTAGAACACCACACCCATACACCATACACCGCGTACACCGAACACCGACACCCACACCACACCCAATGACACCGATGACACCGGCTCGATACACCTACACCGAACACCATCAGACACCGCGTACACCCAACACCTGACACCAACACCGCGGCACACCTAGTGACACCTACACCTACACCACACCATACACCCTACACCGATGAACACCAACACCACTCTAAACACCCAGGACACCAACACACCTAGACACCACACCAACGACAGAGACACCCTACACCTGCCAAGCTTTACACCATTGGTGAATCACACACCACACCAACACCACACCACACCGCTTACACCCGACCCGAAAACACCCACACCACACCAACACCACACCACATTACTCCCGTTACACCTACACCAACACCACACCTTTACACCACACCCAGCAACACCACACCAAATGGACACCACACCACACCACACCTTAGCCGATGTGCCGACACCGCTGTCGTCACACCAGTGACACCTTAGCGTACACACCACACCCAACACCTACACCACACCCGAAACACCTGACACCACACCACACCACACCCTACACCACACCATGACCACACACCAGCCGACACCACACCATACACCTACACCGAAACACCTTTCTACACCACACCACACCTGAACACCTAGTCACACCACGACACCAACACCTGACCACACCGGGGGACACCTTTGGAACGACACCTAACACCGCCACACCACACCACACCCGACACCTATAACACCACACCACACCACACCAAAGGCACACCTTAACACCCACACCAAGGGCTACACCACACCACACCTCCAAAACAAGGGACACCACACCCAACACCACACCACACCGCGTGGACACCACACCTTGACACCAAATTGTGCACACCACACCTGCACACCTTAAGAACGACACCGTCAGTACACCGAAACCCTATGACACCTGGGACACCTGGCACACCAACTACACCACACCCACACCACACACCTGGACACCGTTTCGCGAGTGTGGGTTGCTTGACACCACACCACACCGCGGCCTTACACCGCACACCGTAAACACCGTTGACACCTCATTACTCGACACCACACCGCACACCCACACCCGACACCGAACACCACACCTGGGCATACACACCACACCGTACACCTACACCACACCTGTGCTACACCAGGGGTACACCACACCTAGTACACCACACCGATACACCCACACCACACCACACCCACCAACACCACACCATCAAGAACACCCTATACACCCACACCACACCTACACCACACCCTACACCACACCACACCACACCATCGACACCTACACCACACCAACACCACACCAAACACCACACCCACACCCGGACACCACACCCACACCACACCATAACACCTAACACCACACACCTACACCTACTCTGCTAAACACCCAACACCTCTACACCCTGCCGACACCGCGACACCGGCGACACCCTGTTACACCACACCTCACACCTTCGACACCAGCCAGAGACACCGGACACCGACACCCCGAACACCAACACACCCGA
Output
19 24 38 49 56 80 128 164 186 225 230 239 387 403 413 419 426 471 482 508 520 604 613 618 623 646 651 679 684 691 713 727 747 770 777 784 801 829 836 841 897 947 986 991 1011 1036 1075 1148 1153 1158 1173 1186 1194 1199 1220 1232 1262 1267 1303 1329 1369 1386 1395 1407 1444 1467 1472 1477 1516 1521 1530 1555 1560 1599 1604 1625 1640 1648 1653 1666 1680 1698 1728 1733 1745 1770 1800 1805 1812 1817 1822 1856 1872 1877 1889 1933 1942 1947 1952 1972 1983 2004 2016 2021 2032 2041 2046 2073 2131 2153 2172 2218 2223 2229 2234 2272 2290 2312 2430 2440 2460 2465 2486 2497 2547 2560 2595 2645 2678 2716 2721 2745 2751 2772 2788 2793 2831 2849 2854 2860 2865 2900 2905 2911 2916 2941 2947 2960 2975 2980 2991 2996 3001 3040 3063 3081 3102 3107 3112 3124 3129 3142 3152 3157 3188 3193 3216 3224 3279 3284 3305 3310 3315 3320 3345 3357 3362 3385 3397 3402 3418 3431 3445 3517 3526 3537 3580 3585 3643 3675 3694 3712 3728 3739 3753 3772 3777 3792 3797 3824 3835 3847 3852 3857 3862 3877 3882 3888 3893 3900 3919 3930 3935 3950 4032 4053 4088

4
chapter01/data/reverse_complement.txt

@ -0,0 +1,4 @@ @@ -0,0 +1,4 @@
Input
GCACTAAAGCACCAGCGAGACTAGACAGTGCCTTACGCTGTATAGGGATAAAAGTTGTCAAGATGACTTGCGGGAATCGTTAGGCTGACACGCACTAATGCTCGCCTTCCGGGTGTTCTGTGAGTACGGTTGATCACGGTCGCCCTGCGGATGTACTACCATGAAAGTTGATCACGTGCCGCGCGCTCCCTAAGCTTAGAAGTTTGCACAATCTGCATTCTATCCTGCCACGCCTTCAATAATAAGTGGTGTATGCAATTTGGAGTCGATCTGGGAACCAACGATTAACTTGGGAAGTGGCTATATCAAAATACGATGTCTTCAGCGTCGCGGTCGACGCTGCGCAACGAACGAAAAGTCCGATGGACCCGAACTCTGATTATACCGAATCTCCGCTTTTACGACTCGCCACATACCGGCATAAGCCATTCTGGGGCTTTGCCCCCTTAGGTCTAGCCCACCCCCGACCTAGCTTGAGCGTGTCACACCCCAACAGCCGCATTACGCCCGCTCACCGACACTTGGCGGTCGTATAAGAAATCCAAAACCGAGACGAAAACTGAAGAATAAGGTTCATTCAGCATTGTGGAGTTGACAACATCAGTATGAGGGTGAGTTGCGTCAAAGTCGAAGAATATGGAGGGTCAAATCACGAGATGTAACATCCACGCGAACACTTAGCTAGTAATCATTTTTCCGTAAAGAGTCGTTGAGTCCGACCAGTTGAAGCTCAGTGTTTATCCGGTAGGGAATTGTAGGATCAACGATAGGGTCGCGGAACCGCCGTATTATAGAAAGAGATAGTCCCAACGTTCTTTATGCACTTCGCTGAGAGAGGGTGACCGGGCACGCAGAGACTTTGGCTTTGTAGCCCCATTCCGCGGCTCTTCGGATACTGACTGAGCTGTAGTCGGCACATCCTTTACAACAAAAAAGCTCATGTCCGAGATTTTAATGGCGGCGCACGGTCACTCGGAGTTGACGAATGCGCAGCGAATCGTTGGTTCCAGATAAAGGCAAGGCTGTGTTACTGTTTCGGAGGGCAATCGTCAACGAGCAAAGATGTTAGAATAGAAATCGGAGCGAGGCTCCCAGCAAATATGAGTTAGGATCTTTTTTGCGAAAGGGTTGGTCTCCATCTCCTCTCGCCTGCGAGCGAGTCCCCGAAGCACGTTCAACCTATTTGATTCGGTGCAGGACACCCTAGATTAGCATACAGGTATAATATCAGGAAGAGTCACCTTTCATTCCCGACCAGTAGGATGTATAGGAATGAGACTATCCAGTTCTTTGTCAGCTCAAGACAGCGTTGGCAATACGGCCGAGTATTGGGGGGAATACCCCGGAACATAGTATTGTGCCTTAGCTATTGCCCTAGATACCACGCGGCCCTTGAGCATTTGTCTACACTTTGGTGATCCTAGGCACCCCGCGCTCGTGGCAACGTCAGCATCTTGTGATAGCAAAGCGTATGTACCTGTAATGTAACATCAAAGTATATCGGCACCCTAGTGGGGGCGAAGGTTGGATCGCTTATCACTCGGGACGACGGTGGTATCCAGCCACAGTGTTGCTCATTAACGACCACACAGCTCTTGGAATCGAGCCATGGACAGGGGACGCCCCAGGATACATGATGTTCCTGTGAGCACAAGCACTATGGCAGGCTTAGAGCTAATTCTTCCATTGGGCCGGTAAGACGCCAGAGAAAGTCACCGGTGTGAGAAAGGGTTTCGTGTGGGGGAGGCGTCAAACAACAAGGATTTACGTCGAACCGATCAGCCCTTGTCTGATTCATTCCAGGTTTAAGCGAGCCCTGGCGGTGACCTCCCGGGGATTCTTGGTGACGATAAGTGTAGACTGGTTTATGACTGTCTATAAGTGCAAGCAGTCCGCGACTCGGCCGCTCCTCAGATCTCGTCCTCCCAATCCTTACGAGGCACTATTCCGGCCCTAAAAACTTACCTACCAACCGGACATAGCGAACGGTCTAAGTTTTCGGAAATTGAATAACACTCGAACAAAGGAGCCCAATACATGGCACAAGCACACATAAAGCTTGGCGCTGCTGACGGCCGGCCCCCACAGCAGGTGGGTATATCAGGATAATGCTCTACCTCCTCGGGGATGACCAGAGACGAACGTTCGGACGCTATTAGTTAGTGGTCGCCCAGATATTCTCCTAATCAAGCCCTCGAAGGCTAGTCTAAATTTTAGCAAAAACTCGTATAGCAGCACATGCGGTAGACTGGGCCTCAGCCAGGTAGAGCTGTGGCTGCACTCGAGCAATCACTACCGTATAGAGTGGTGTTATTTCGGGGTGAATGTCAGGGGTGGTCCAAAATCACAAACACGTCTATTCGCACCCGGGAATGCTCATGTTCCCACGGCGGGCCTGTACAGATGTGAGAGGCAGCGATCATACAAAGTTGCCTGGCCTCCCCACGAACACACGGCGGCCCATTAGGTCTGAACAGGTTTATCGTTAATATATTTTGCGGTGG
Output
CCACCGCAAAATATATTAACGATAAACCTGTTCAGACCTAATGGGCCGCCGTGTGTTCGTGGGGAGGCCAGGCAACTTTGTATGATCGCTGCCTCTCACATCTGTACAGGCCCGCCGTGGGAACATGAGCATTCCCGGGTGCGAATAGACGTGTTTGTGATTTTGGACCACCCCTGACATTCACCCCGAAATAACACCACTCTATACGGTAGTGATTGCTCGAGTGCAGCCACAGCTCTACCTGGCTGAGGCCCAGTCTACCGCATGTGCTGCTATACGAGTTTTTGCTAAAATTTAGACTAGCCTTCGAGGGCTTGATTAGGAGAATATCTGGGCGACCACTAACTAATAGCGTCCGAACGTTCGTCTCTGGTCATCCCCGAGGAGGTAGAGCATTATCCTGATATACCCACCTGCTGTGGGGGCCGGCCGTCAGCAGCGCCAAGCTTTATGTGTGCTTGTGCCATGTATTGGGCTCCTTTGTTCGAGTGTTATTCAATTTCCGAAAACTTAGACCGTTCGCTATGTCCGGTTGGTAGGTAAGTTTTTAGGGCCGGAATAGTGCCTCGTAAGGATTGGGAGGACGAGATCTGAGGAGCGGCCGAGTCGCGGACTGCTTGCACTTATAGACAGTCATAAACCAGTCTACACTTATCGTCACCAAGAATCCCCGGGAGGTCACCGCCAGGGCTCGCTTAAACCTGGAATGAATCAGACAAGGGCTGATCGGTTCGACGTAAATCCTTGTTGTTTGACGCCTCCCCCACACGAAACCCTTTCTCACACCGGTGACTTTCTCTGGCGTCTTACCGGCCCAATGGAAGAATTAGCTCTAAGCCTGCCATAGTGCTTGTGCTCACAGGAACATCATGTATCCTGGGGCGTCCCCTGTCCATGGCTCGATTCCAAGAGCTGTGTGGTCGTTAATGAGCAACACTGTGGCTGGATACCACCGTCGTCCCGAGTGATAAGCGATCCAACCTTCGCCCCCACTAGGGTGCCGATATACTTTGATGTTACATTACAGGTACATACGCTTTGCTATCACAAGATGCTGACGTTGCCACGAGCGCGGGGTGCCTAGGATCACCAAAGTGTAGACAAATGCTCAAGGGCCGCGTGGTATCTAGGGCAATAGCTAAGGCACAATACTATGTTCCGGGGTATTCCCCCCAATACTCGGCCGTATTGCCAACGCTGTCTTGAGCTGACAAAGAACTGGATAGTCTCATTCCTATACATCCTACTGGTCGGGAATGAAAGGTGACTCTTCCTGATATTATACCTGTATGCTAATCTAGGGTGTCCTGCACCGAATCAAATAGGTTGAACGTGCTTCGGGGACTCGCTCGCAGGCGAGAGGAGATGGAGACCAACCCTTTCGCAAAAAAGATCCTAACTCATATTTGCTGGGAGCCTCGCTCCGATTTCTATTCTAACATCTTTGCTCGTTGACGATTGCCCTCCGAAACAGTAACACAGCCTTGCCTTTATCTGGAACCAACGATTCGCTGCGCATTCGTCAACTCCGAGTGACCGTGCGCCGCCATTAAAATCTCGGACATGAGCTTTTTTGTTGTAAAGGATGTGCCGACTACAGCTCAGTCAGTATCCGAAGAGCCGCGGAATGGGGCTACAAAGCCAAAGTCTCTGCGTGCCCGGTCACCCTCTCTCAGCGAAGTGCATAAAGAACGTTGGGACTATCTCTTTCTATAATACGGCGGTTCCGCGACCCTATCGTTGATCCTACAATTCCCTACCGGATAAACACTGAGCTTCAACTGGTCGGACTCAACGACTCTTTACGGAAAAATGATTACTAGCTAAGTGTTCGCGTGGATGTTACATCTCGTGATTTGACCCTCCATATTCTTCGACTTTGACGCAACTCACCCTCATACTGATGTTGTCAACTCCACAATGCTGAATGAACCTTATTCTTCAGTTTTCGTCTCGGTTTTGGATTTCTTATACGACCGCCAAGTGTCGGTGAGCGGGCGTAATGCGGCTGTTGGGGTGTGACACGCTCAAGCTAGGTCGGGGGTGGGCTAGACCTAAGGGGGCAAAGCCCCAGAATGGCTTATGCCGGTATGTGGCGAGTCGTAAAAGCGGAGATTCGGTATAATCAGAGTTCGGGTCCATCGGACTTTTCGTTCGTTGCGCAGCGTCGACCGCGACGCTGAAGACATCGTATTTTGATATAGCCACTTCCCAAGTTAATCGTTGGTTCCCAGATCGACTCCAAATTGCATACACCACTTATTATTGAAGGCGTGGCAGGATAGAATGCAGATTGTGCAAACTTCTAAGCTTAGGGAGCGCGCGGCACGTGATCAACTTTCATGGTAGTACATCCGCAGGGCGACCGTGATCAACCGTACTCACAGAACACCCGGAAGGCGAGCATTAGTGCGTGTCAGCCTAACGATTCCCGCAAGTCATCTTGACAACTTTTATCCCTATACAGCGTAAGGCACTGTCTAGTCTCGCTGGTGCTTTAGTGC

2
chapter01/for_real/rosalind_ba1a.txt

@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
GTCCGGGGTCCGGGGTCCCGGGGTTACCCGGGGTCCCGGGGTCCGGGGTTACCCGGGGTAACCGGGGTCACCGGGGTCTCCGCCGGGGTGGCCGGGGTACCCGGGGTCCCGGGGTCCGGGGTCCGGGGTGCACGGGCCGGGGTGTACCGGGGTCCGGGGTCGGCGTCCGGGGTCCGGGGTCTGGAAGTTGAACTGACACCGGGGTTCCGGGGTCCGGGGTCCGGGGTCCGGGGTGGAACCCGGGGTGTCCCGGGGTCCGGGGTCCGGGGTCCGGGGTTCCGGGGTAGGCCGGGGTCCCGGGGTAGTGTGTGCCGGGGTCCTCGCCGGGGTAGCGCAAAACCGGGGTGCGGTAACTACCGGGGTCCGGGGTGCCGGGGTCCGGGGTTCCGGGGTCCGGGGTTCTCCGGGGTCCCGGGGTAGCCCGGGGTATCCGGGGTCCGGGGTCCGGGGTCCGGGGTACCGGGGTCAGGGCCGGGGTGACCGGGGTTCCGGGGTATCTGTTTATCCCGGGGTCCGGGGTCGGTAAACCGTCCGGGGTTTACCCCGGGGTCCGGGGTCTCGATCAAACCGGGGTTATGAGAATCCGGGGTCCGGGGTCCCGGGGTAGACCGGGGTACATCCCGGGGTGTCCGGGGTTACAAGCCGGGGTCCAAACGATTCCCGGGGTCCGGGGTTGCCCCGGGGTCCGGGGTGATGCACCGGGGTAAGCCGGGGTTGACGACCCCGGGGTCGCCGGGGTCTGCACTCCGGGGTTCCGGGGTAGCCGGGGTCAACCGGGGTAACCGGGGTTTGCCGGGGTCCCGGGGTTTGTCCGGGGTCCACCGGGGTCCGGGGTGCCGGGGTTCTACCGGGGTGCCGGGGTACACCGGGGTAGCCGGGGTATCCGGGGTACCGGGGTAAACCGGGGTGCCGGGGTCCGGGGTCCGGGGTTCCCGGGGTTTCTACCGGGGTGGGACCGGGGTCCGGGGTCCGGGGTATTAACCACCGGGGTGCGACCGGGGTGGCCGGGGTCCGGGGTATCCGGGGTACATCCGGGGTACGG
CCGGGGTCC

2
chapter01/for_real/rosalind_ba1b.txt

@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
AGTAGGTTCAGGGCGTTTAATAGCGAAAACAAATAATAGCAGTAGGTTGTACCACGTACCACTAATAGCGAAAACAAAAGTAGGTTCAGGGCGTTGTACCACGTACCACGAAAACAAATAATAGCTAATAGCGAAAACAAAGTACCACGAAAACAAATAATAGCGAAAACAAAGTACCACCAGGGCGTTTAATAGCGAAAACAAATAATAGCTAATAGCTAATAGCAGTAGGTTCAGGGCGTTGTACCACGAAAACAAAGAAAACAAAGTACCACTAATAGCCAGGGCGTTAGTAGGTTGAAAACAAACAGGGCGTTAGTAGGTTCAGGGCGTTGTACCACTAATAGCTAATAGCGAAAACAAATAATAGCTAATAGCTAATAGCCAGGGCGTTGTACCACGAAAACAAACAGGGCGTTTAATAGCAGTAGGTTGAAAACAAATAATAGCCAGGGCGTTGTACCACGAAAACAAAGAAAACAAAGTACCACCAGGGCGTTAGTAGGTTGTACCACGTACCACGAAAACAAAGTACCACGAAAACAAATAATAGCGTACCACGAAAACAAAAGTAGGTTTAATAGCGAAAACAAAAGTAGGTTAGTAGGTTCAGGGCGTTTAATAGCTAATAGCGAAAACAAAGTACCACCAGGGCGTTTAATAGCTAATAGCGTACCACCAGGGCGTTGTACCACGTACCACGAAAACAAAGAAAACAAAAGTAGGTTTAATAGCAGTAGGTTAGTAGGTTAGTAGGTTTAATAGCGAAAACAAACAGGGCGTTTAATAGCGTACCACGTACCACGAAAACAAAGAAAACAAAGTACCACCAGGGCGTTTAATAGCGAAAACAAACAGGGCGTTGAAAACAAA
12

1
chapter01/for_real/rosalind_ba1c.txt

File diff suppressed because one or more lines are too long

2
chapter01/for_real/rosalind_ba1d.txt

File diff suppressed because one or more lines are too long

2
chapter01/for_real/rosalind_ba1e.txt

File diff suppressed because one or more lines are too long

1
chapter01/for_real/rosalind_ba1f.txt

File diff suppressed because one or more lines are too long

2
chapter01/for_real/rosalind_ba1g.txt

@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
ATGACTAGTTATGCGACACGTGTTCCTTAAACAAACCGCTGATTGCGGAGGGATCATGTTGAAACGCAGTCAGTTGGCGCTTTACAAGAATTTAAGTGTCCCTCGGAGATGCTCCACTACACGCCATGGCGAAACGGTTCAGTCTCTTAGAAGAAGAAAGATATAGGAGTTGCGCCACCGTGATATAAGCACCGCAGTATCTGAAGGGAGCACAACTTGCTGCGAACAGACTGGTACGGTTACGTCGGGGCTTCAGGCATCGTTGGCGAGGTAGGAATCCTTATGTTAATTTTAAATCGAAGCAAAACAGAACTGTTGATCACTCATGTGTCGTTAACCGGAAGACTGCGGGTGCTCAGCCCCAATCGACGGCTGTTAGGAATGGCACACTACTGTATTTGTGACGACTAACTTGACATTCGAAGGTATCTGCGGTTGTTAAACGCCGATAATCGCCACCGCAGTTCTGAAAGGCTATATGTATCACGGTGATTTACGGCATTGTAAGCCCACTCAGAGTGCGTCGTAGGTTACGCGTTCTGAGTTGAAATAATCCAGTCGAACACGGTTGGTATCATGAATTCAGACTACCGTTTCTTGACTCCCGTCCTATACGAGTCTAGAGCGAACTTCGGGGTAAGAAATCACAATTAATCTCTTCCTTGTGTGATCCGCAAGGAAGCTGAGCTCAATTTGCAAGTACAGGTAGGTGGCAATCGAGAGCTACTAACACTCTTGTGTCGTTCGTAATTCATAAATAAAAAGACACGCCCTTATGATTGAAGCCTGAACTGCGGCAACGGTAGGTTTCCAAAGAGGATCGAGTCAGCGATACCCCCTGTACGCAGACAGATTATTACCCCCACTCTGCAATGTAGAAGTCTTAAAAACGCACTCTAGGCCAGTAACCAACCAGCTGGGTGGTGCGTTACCTAGTGCTATACAACAGTACCACCAGATTAGAAGCATGCCAGGTGTCTCGACACCTCCAATTCGTCATTTGGTGTGAGAAAAAGATATACCGCCAAGTTGCCATACCTGCAC
ATGGACTATTTTGCTATACGATTACTAGGAATAAGTTGAACAACCCTTGCTTTTCTTTTTAACACAGCCAGAGGCTCGGGATGGAACGCGTCATCTCGCGGACTCAGAGATGCCAGATGGTAGGCCTCTTCCAACGAGTAACTTACGATAATTTGATAGATTCTTGAACGTAGTGTGTCGACCTCCGTACCGGAAAATTTTCTTATCTCTAGTGAACCGTCGAGCTGTACTTTAGACCCCTGTGCGACGATAGGTCTCCTGCGTTAGGTATTTTACATATTCCGCTGGGACCCAAATTTTTCCCGCGAACGGGATAGAGGTAGTATCTAACTTCGTTTACACAACGTAACATCCCGCCATGGTCGTTACGGGCGTACCCGCGCGGCGAAGGGCGCGGACCCGCGAATCATAAACTAAGAAAAGAGTATGTTGAAGCGCACCCGCCATGTCGCTCGACATCTGCCTGGCATGCTATAATACCTGCTGAGCAGTACATCCACGGCGTCTATGAGCGCCACGTCAATCGGATCAGCCGGAATGCTGATTCTGTTGTGTCGCCGTTATGAATTTGGAGGTGGCACGCAAGGTTCCAGCCCTGTATAGTGTGTTAAAGTCCACTTTTCATCATTGCTTAATGTTTAATCGGGTCCTCACCCGAAACTGTGATTGCGTTCTTATGTAAAGCTCTCGTTAGCAGACACCAATCTATGAAACTTCCGCCTCGGGCAACTTTCATGAGGCACTGTAACATTTGTTGCATAGAGCCGTACTATGGCCACCGTATTTTATATGGCTGACGTAAAGAGCCTGTTAATGTGTAATTCGAAGGTCCCTTTAGATGAGTCTCATGCCAGACCCAGAAGAGTGACGGCTGTCTCGGAGTGGGTATACGTTAGCCCCTGCCAATAGTAAAGCGTACACCTTGTCTTCAAGACTGTCACTGACACAAATTCCCCGACCCATATTCCGTTCCGGGTTGGTCTACCTTACGGCGGGAATCCAGAGGCCTAATGCGCTGGTTATATACCACCGGATCCCGATATA

3
chapter01/for_real/rosalind_ba1h.txt

File diff suppressed because one or more lines are too long

15
chapter01/main.go

@ -0,0 +1,15 @@ @@ -0,0 +1,15 @@
package main
import (
)
func main() {
//BA1A("for_real/rosalind_ba1a.txt")
//BA1B("for_real/rosalind_ba1b.txt")
//BA1C("for_real/rosalind_ba1c.txt")
//BA1D("for_real/rosalind_ba1d.txt")
//BA1E("for_real/rosalind_ba1e.txt")
//BA1F("for_real/rosalind_ba1f.txt")
//BA1G("for_real/rosalind_ba1g.txt")
BA1H("for_real/rosalind_ba1h.txt")
}

545
chapter01/rosalind.go

@ -0,0 +1,545 @@ @@ -0,0 +1,545 @@
package main
import (
"fmt"
"sort"
"errors"
s "strings"
)
/*
rosalind.go:
This file contains core functions that
are used to solve Rosalind problems.
*/
////////////////////////////////
// BA1A
// Count occurrences of a substring pattern
// in a string input
func PatternCount(input string, pattern string) int {
// Number of substring overlaps
var overlap = len(input) - len(pattern) + 1
// If overlap < 1, we are looking
// for a pattern longer than our input
if overlap<1 {
return 0
}
// Count of occurrences
count:=0
// Loop over each substring overlap
for i:=0; i<overlap; i++ {
// Grab a slice of the full input
start:=i
end:=i+len(pattern)
var slice = input[start:end]
if slice==pattern {
count += 1
}
}
return count
}
////////////////////////////////
// BA1B
// Return the histogram of kmers of length k
// found in the given input
func KmerHistogram(input string, k int) (map[string]int,error) {
result := map[string]int{}
if len(input)<1 {
err := fmt.Sprintf("Error: input string was not DNA. Only characters ATCG are allowed, you had %s",input)
return result, errors.New(err)
}
// Number of substring overlaps
overlap := len(input) - k + 1
// If overlap < 1, we are looking
// for kmers longer than our input
if overlap<1 {
return result,nil
}
// Iterate over each position,
// extract the string,
// increment the count.
for i:=0; i<overlap; i++ {
// Get the kmer of interest
substr := input[i:i+k]
// If it doesn't exist, the value is 0
result[substr] += 1
}
return result,nil
}
// Find the most frequent kmer(s) in the kmer histogram,
// and return as a string array slice
func MostFrequentKmers(input string, k int) ([]string,error) {
max := 0
// most frequent kmers
mfks := []string{}
if k<1 {
err := fmt.Sprintf("Error: MostFrequentKmers received a kmer size that was not a natural number: k = %d",k)
return mfks, errors.New(err)
}
khist,err := KmerHistogram(input,k)
if err != nil {
err := fmt.Sprintf("Error: MostFrequentKmers failed when calling KmerHistogram()")
return mfks, errors.New(err)
}
for kmer,freq := range khist {
if freq > max {
// We have a new maximum, and a new set of kmers
max = freq
mfks = []string{kmer}
} else if freq==max {
// We have another maximum
mfks = append(mfks,kmer)
}
}
return mfks,nil
}
// Find the kmer(s) in the kmer histogram
// exceeding a count of N, and return as
// a string array slice
func MoreFrequentThanNKmers(input string, k, N int) ([]string,error) {
// more frequent than n kmers
mftnks := []string{}
if k<1 || N<1 {
err := fmt.Sprintf("Error: MoreFrequentThanNKmers received a kmer or frequency size that was not a natural number: k = %d, N = %d",k,N)
return mftnks, errors.New(err)
}
khist,err := KmerHistogram(input,k)
if err != nil {
err := fmt.Sprintf("Error: MoreFrequentThanNKmers failed when calling KmerHistogram()")
return mftnks, errors.New(err)
}
for kmer,freq := range khist {
if freq >= N {
// Add another more frequent than n
mftnks = append(mftnks,kmer)
}
}
return mftnks,nil
}
////////////////////////////////
// BA1C
// Reverse returns its argument string reversed
// rune-wise left to right.
// https://github.com/golang/example/blob/master/stringutil/reverse.go
func ReverseString(s string) string {
r := []rune(s)
for i, j := 0, len(r)-1; i < len(r)/2; i, j = i+1, j-1 {
r[i], r[j] = r[j], r[i]
}
return string(r)
}
// Given an alleged DNA input string,
// iterate through it character by character
// to ensure that it only contains ATGC.
// Returns true if this is DNA (ATGC only),
// false otherwise.
func CheckIsDNA(input string) bool {
// Convert input to uppercase
input = s.ToUpper(input)
// If any character is not ATCG, fail
for _, c := range input {
if c!='A' && c!='T' && c!='C' && c!='G' {
return false
}
}
// If we made it here, everything's gravy!
return true
}
// Convert a DNA string into four bitmasks:
// one each for ATGC. That is, for the DNA
// string AATCCGCT, it would become:
//
// bitmask[A] = 11000000
// bitmask[T] = 00100001
// bitmask[C] = 00011010
// bitmask[G] = 00000100
func DNA2Bitmasks(input string) (map[string][]bool,error) {
// Convert input to uppercase
input = s.ToUpper(input)
// Allocate space for the map
m := make(map[string][]bool)
// Start by checking whether we have DNA
if CheckIsDNA(input)==false {
err := fmt.Sprintf("Error: input string was not DNA. Only characters ATCG are allowed, you had %s",input)
return m, errors.New(err)
}
// Important: we want to iterate over the
// DNA string ONCE and only once. That means
// we need to have the bit vectors initialized
// already, and as we step through the DNA
// string, we access the appropriate index
// of the appropriate bit vector and set
// it to true.
m["A"] = make([]bool, len(input))
m["T"] = make([]bool, len(input))
m["C"] = make([]bool, len(input))
m["G"] = make([]bool, len(input))
// To begin with, every bit vector is false.
for i,c := range input {
cs := string(c)
// Get the corresponding bit vector - O(1)
bitty := m[cs]
// Flip to true for this position - O(1)
bitty[i] = true
}
return m,nil
}
// Convert four bitmasks (one each for ATGC)
// into a DNA string.
func Bitmasks2DNA(bitmasks map[string][]bool) (string,error) {
// Verify ATGC keys are all present
_,Aok := bitmasks["A"]
_,Tok := bitmasks["T"]
_,Gok := bitmasks["G"]
_,Cok := bitmasks["C"]
if !(Aok && Tok && Gok && Cok) {
err := fmt.Sprintf("Error: input bitmask was missing one of: ATGC (Keys present? A: %t, T: %t, G: %t, C: %t",Aok,Tok,Gok,Cok)
return "", errors.New(err)
}
// Hope that all bitmasks are the same size
size := len(bitmasks["A"])
// Make a rune array that we'll turn into
// a string for our final return value
dna := make([]rune,size)
// Iterate over the bitmask, using only
// the index and not the mask value itself
for i, _ := range bitmasks["A"] {
if bitmasks["A"][i] == true {
dna[i] = 'A'
} else if bitmasks["T"][i] == true {
dna[i] = 'T'
} else if bitmasks["G"][i] == true {
dna[i] = 'G'
} else if bitmasks["C"][i] == true {
dna[i] = 'C'
}
}
return string(dna),nil
}
// Given a DNA input string, find the
// complement. The complement swaps
// Gs and Cs, and As and Ts.
func Complement(input string) (string,error) {
// Convert input to uppercase
input = s.ToUpper(input)
// Start by checking whether we have DNA
if CheckIsDNA(input)==false {
return "", errors.New(fmt.Sprintf("Error: input string was not DNA. Only characters ATCG are allowed, you had %s",input))
}
m,_ := DNA2Bitmasks(input)
// Swap As and Ts
newT := m["A"]
newA := m["T"]
m["T"] = newT
m["A"] = newA
// Swap Cs and Gs
newG := m["C"]
newC := m["G"]
m["G"] = newG
m["C"] = newC
output,_ := Bitmasks2DNA(m)
return output,nil
}
// Given a DNA input string, find the
// reverse complement. The complement
// swaps Gs and Cs, and As and Ts.
// The reverse complement reverses that.
func ReverseComplement(input string) (string,error) {
// Convert input to uppercase
input = s.ToUpper(input)
// Start by checking whether we have DNA
if CheckIsDNA(input)==false {
err := fmt.Sprintf("Error: input string was not DNA. Only characters ATCG are allowed, you had %s",input)
return "", errors.New(err)
}
comp,_ := Complement(input)
revcomp := ReverseString(comp)
return revcomp,nil
}
////////////////////////////////
// BA1D
// Given a large string (genome) and a string (pattern),
// find the zero-based indices where pattern occurs in genome.
func FindOccurrences(pattern, genome string) ([]int,error) {
locations := []int{}
slots := len(genome)-len(pattern)+1
if slots<1 {
// pattern is longer than genome
return locations,nil
}
// Loop over each character,
// saving the position if it
// is the start of pattern
for i:=0; i<slots; i++ {
start := i
end := i+len(pattern)
if genome[start:end]==pattern {
locations = append(locations,i)
}
}
return locations,nil
}
////////////////////////////////
// BA1E
// Find k-mers (patterns) of length k occuring at least
// t times over an interval of length L in a genome.
func FindClumps(genome string, k, L, t int) ([]string,error) {
// Algorithm:
// allocate a list of kmers
// for each possible position of L window,
// feed string L to KmerHistogram()
// save any kmers with frequency > t
// return master list of saved kmers
L_slots := len(genome)-L+1
// Set kmers
kmers := map[string]bool{}
// List kmers
kmers_list := []string{}
// Loop over each possible window of length L
for iL:=0; iL<L_slots; iL++ {
// Grab this portion of the genome
winstart := iL
winend := iL+L
genome_window := genome[winstart:winend]
// Get the number of kmers that occur more
// frequently than t times
new_kmers,err := MoreFrequentThanNKmers(genome_window,k,t)
if err!=nil {
return kmers_list,err
}
// Add these to the set kmers
for _,new_kmer := range new_kmers {
kmers[new_kmer] = true
}
}
for k := range kmers {
kmers_list = append(kmers_list,k)
}
sort.Strings(kmers_list)
return kmers_list,nil
}
////////////////////////////////
// BA1F
// The skew of a genome is the difference between
// the number of G and C codons that have occurred
// cumulatively in a given strand of DNA.
// This function computes the positions in the genome
// at which the cumulative skew is minimized.
func MinSkewPositions(genome string) ([]int,error) {
n := len(genome)
cumulative_skew := make([]int,n+1)
// Get C/G bitmasks
bitmasks,err := DNA2Bitmasks(genome)
if err!=nil {
return cumulative_skew,err
}
c := bitmasks["C"]
g := bitmasks["G"]
// Init
cumulative_skew[0] = 0
// Make space to keep track of the
// minima we have encountered so far
min := 999
min_skew_ix := []int{}
// At each position, compute the next skew value.
// We need two indices b/c for a genome of size N,
// the cumulative skew array index is of size N+1.
for i,ibit:=1,0; i<=n; i,ibit=i+1,ibit+1 {
var next int
// Next skew value
if c[ibit] {
// C -1
next = -1
} else if g[ibit] {
// G +1
next = 1
} else {
next = 0
}
cumulative_skew[i] = cumulative_skew[i-1] + next
if cumulative_skew[i] < min {
// New min and min_skew
min = cumulative_skew[i]
min_skew_ix = []int{i}
} else if cumulative_skew[i] == min {
// Additional min and min_skew
min_skew_ix = append(min_skew_ix,i)
}
}
return min_skew_ix,nil
}
////////////////////////////////
// BA1G
// Compute the Hamming distance between
// two strings. The Hamming distance is
// defined as the number of characters
// different between two strings.
func HammingDistance(p, q string) (int,error) {
// Technically a Hamming distance when
// one string is empty would be 0, but
// we will throw an error instead.
if len(p)==0 || len(q)==0 {
err := fmt.Sprintf("Error: HammingDistance: one or more arguments had length 0. len(p) = %d, len(q) = %d",len(p),len(q))
return -1,errors.New(err)
}
// Get longest length common to both
var m int
if len(p)>len(q) {
m = len(q)
} else {
m = len(p)
}
// Accumulate distance
dist := 0
for i:=0; i<m; i++ {
if p[i]!=q[i] {
dist += 1
}
}
return dist,nil
}
////////////////////////////////
// BA1H
// Given a large string (text) and a string (pattern),
// find the zero-based indices where we have an occurrence
// of pattern or a string with Hamming distance d or less
// from pattern.
func FindApproximateOccurrences(pattern, text string, d int) ([]int,error) {
locations := []int{}
slots := len(text)-len(pattern)+1
if slots<1 {
// pattern is longer than genome
return locations,nil
}
// Loop over each character,
// saving the position if it
// is the start of pattern
for i:=0; i<slots; i++ {
start := i
end := i+len(pattern)
poss_approx_pattern := text[start:end]
hamm,_ := HammingDistance(poss_approx_pattern,pattern)
if hamm<=d {
locations = append(locations,i)
}
}
return locations,nil
}

21
chapter01/todo.md

@ -0,0 +1,21 @@ @@ -0,0 +1,21 @@
https://github.com/moul/euler
- use snakemake
main.go is a cli:
- given a problem...
- print url for problem
- duration
- answer
- awesome go
ba1c test
- not testing everything
- finish
code coverage
- https://mlafeldt.github.io/blog/test-coverage-in-go/
- go lint
- go test

95
chapter01/utils.go

@ -0,0 +1,95 @@ @@ -0,0 +1,95 @@
package main
import (
"bufio"
"fmt"
"os"
)
// readLines reads a whole file into memory
// and returns a slice of its lines.
func readLines(path string) ([]string, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
var lines []string
scanner := bufio.NewScanner(file)
buf := make([]byte, 2)
// This is awkward.
// Scanners aren't good for big files,
// just simple stuff.
BIGNUMBER := 90000
scanner.Buffer(buf, BIGNUMBER)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
return lines, scanner.Err()
}
// writeLines writes the lines to the given file.
func writeLines(lines []string, path string) error {
file, err := os.Create(path)
if err != nil {
return err
}
defer file.Close()
w := bufio.NewWriter(file)
for _, line := range lines {
fmt.Fprintln(w, line)
}
return w.Flush()
}
// Utility function: check if two string arrays/array slices
// are equal. This is necessary because of squirrely
// behavior when comparing arrays (of type [1]string)
// and slices (of type []string).
func EqualStringSlices(a, b []string) bool {
if len(a)!=len(b) {
return false
}
for i:=0; i<len(a); i++ {
if a[i] != b[i] {
return false
}
}
return true
}
// Utility function: check if two boolean arrays/array slices
// are equal. This is necessary because of squirrely
// behavior when comparing arrays (of type [1]bool)
// and slices (of type []bool).
func EqualBoolSlices(a, b []bool) bool {
if len(a)!=len(b) {
return false
}
for i:=0; i<len(a); i++ {
if a[i] != b[i] {
return false
}
}
return true
}
// Utility function: check if two int arrays/array slices
// are equal.
func EqualIntSlices(a, b []int) bool {
if len(a)!=len(b) {
return false
}
for i:=0; i<len(a); i++ {
if a[i] != b[i] {
return false
}
}
return true
}
Loading…
Cancel
Save