Browse Source

remove old stuff

fix-instructions
Charles Reid 7 years ago
parent
commit
8787ac9862
  1. 38
      scripts/README.md
  2. 41
      scripts/calculate_signatures.py
  3. 58
      scripts/calculate_signatures.sh
  4. 27
      scripts/compare_components.py
  5. 24
      scripts/compare_components.sh
  6. 85
      scripts/filter_taxa.py
  7. 38
      scripts/filter_taxa.sh
  8. 13
      scripts/get_prefix.py
  9. 30
      scripts/get_sbt.py
  10. 22
      scripts/get_trimmed_data.py
  11. 23
      scripts/install_pyenv.py
  12. 47
      scripts/install_singularity.py
  13. 50
      scripts/install_snakemake.py
  14. 82
      scripts/kaiju2krona.py
  15. 33
      scripts/kaiju2krona.sh
  16. 12
      scripts/prepare_biocontainers.py
  17. 8
      scripts/run_all_part1.sh
  18. 24
      scripts/run_all_part2.sh
  19. 58
      scripts/run_kaiju.py
  20. 45
      scripts/run_kaiju.sh
  21. 16
      scripts/trimmed_data.dat
  22. 14
      scripts/unpack_kaiju.py
  23. 12
      scripts/unpack_kaiju.sh
  24. 57
      scripts/visualize_krona.py
  25. 40
      scripts/visualize_krona.sh

38
scripts/README.md

@ -1,38 +0,0 @@ @@ -1,38 +0,0 @@
# dahak yeti scripts
This contains scripts to prepare for Dahak workflows.
## setup for dahak
To get dahak set up:
```
./install_pyenv.py
./install_snakemake.py
sudo ./install_singularity.py
```
## run taxonomic classification workflow
`prepare_biocontainers.sh` - download Docker containers from biocontainers
`get_sbt.sh` - get sequence bloom trees for microbial genomes
`get_trimmed_data.sh` - get trimmed data from OSF from URLs in `trimmed_data.dat`
`calculate_signatures.sh` - run sourmash biocontainer to compute signatures
(Will take a really long time) `compare_components.sh` -
`unpack_kaiju.py` - download and unpack tarballs from the NR Euk. database
(Will take a really long time) `run_kaiju.py` - run kaiju through docker
`kaiju2krona.py` - convert kaiju output to krona output
`filter_taxa.py` - perform the same operation as above, but filtering the taxonomies showed
`visualize_krona.py` - generate krona html output.

41
scripts/calculate_signatures.py

@ -1,41 +0,0 @@ @@ -1,41 +0,0 @@
#!/usr/bin/python3
import glob, os
from get_prefix import get_prefix
def calc_signatures():
"""
Calculate signatures using sourmash
"""
suffix1 = "_1.trim2.fq.gz"
#suffix2 = "_2.trim2.fq.gz"
sigsuffix = ".trim2.scaled10k.k21_31_51.sig"
for filename in glob.glob("*"+suffix1):
prefix = get_prefix(filename,suffix1)
targetfile = prefix+sigsuffix
if(os.path.isfile(targetfile)):
print("Skipping file %s, file exists."%(targetfile))
else:
datadir = "/data"
pwd = os.getcwd()
cmd = ["docker","run"]
cmd += ["-v","%s:%s"%(pwd,datadir)]
cmd += ["quay.io/biocontainers/sourmash:2.0.0a3--py36_0"]
cmd += ["sourmash","compute"]
cmd += ["--merge","%s/%s.trim2.fq.gz"%(datadir,prefix)]
cmd += ["--track-abundance"]
cmd += ["--scaled 10000"]
cmd += ["-k","21,31,51"]
cmd += ["%s/%s_%d.trim2.fq.gz"%(datadir,prefix,i+1) for i in range(2)]
cmd += ["-o","%s/%s"%(datadir, targetfile)]
print("Running this docker command from dir %s:"%(pwd))
print(" ".join(cmd))
print()
subprocess.call(cmd, cwd=pwd)
if __name__=="__main__":
calc_signatures()

58
scripts/calculate_signatures.sh

@ -1,58 +0,0 @@ @@ -1,58 +0,0 @@
#!/bin/bash
#
# Calculate signatures from the trimmed data using sourmash
sourmashurl="quay.io/biocontainers/sourmash:2.0.0a3--py36_0"
for filename in *_1.trim2.fq.gz
do
#Remove _1.trim.fq from file name to create base
base=$(basename $filename _1.trim2.fq.gz)
sigsuffix=".trim2.scaled10k.k21_31_51.sig"
echo $base
if [ -f ${base}${sigsuffix} ]
then
# skip
echo "Skipping file ${base}${sigsuffix}, file exists."
else
docker run \
-v ${PWD}:/data \
${sourmashurl} \
sourmash compute \
--merge /data/${base}.trim2.fq.gz \
--track-abundance \
--scaled 10000 \
-k 21,31,51 \
/data/${base}_1.trim2.fq.gz \
/data/${base}_2.trim2.fq.gz \
-o /data/${base}${sigsuffix}
fi
done
for filename in *_1.trim30.fq.gz
do
#Remove _1.trim.fq from file name to create base
base=$(basename $filename _1.trim30.fq.gz)
sigsuffix=".trim30.scaled10k.k21_31_51.sig"
echo $base
if [ -f ${base}${sigsuffix} ]
then
# skip
echo "Skipping file ${base}${sigsuffix}, file exists."
else
docker run \
-v ${PWD}:/data \
${sourmashurl} \
sourmash compute \
--merge /data/${base}.trim30.fq.gz \
--track-abundance \
--scaled 10000 \
-k 21,31,51 \
/data/${base}_1.trim30.fq.gz \
/data/${base}_2.trim30.fq.gz \
-o /data/${base}${sigsuffix}
fi
done

27
scripts/compare_components.py

@ -1,27 +0,0 @@ @@ -1,27 +0,0 @@
#!/usr/bin/python3
import glob
kmers = [21,31,51]
pwd = os.getcwd()
for kmer_len in kmers:
for sig in glob.glob("*sig"):
sourmashurl = "quay.io/biocontainers/sourmash:2.0.0a2--py36_0"
cmd = ["docker","run"]
cmd += ["-v","%s:%s"%(pwd,datadir)]
cmd += [sourmashurl]
cmd += ["-k",str(kmer_len)]
cmd += [sig]
cmd += ["genbank-k%d.sbt.json"%(kmer_len)]
cmd += ["refseq-k%d.sbt.json"]
cmd += ["-o","%s.k%d.gather.output.csv"%(sig,kmer_len)]
cmd += ["--output-unassigned","%s.k%dgather_unassigned.output.csv"%(sig,kmer_len)]
cmd += ["--save-matches","%s.k%d.gather_matches"%(sig,kmer_len)]
print("Running docker command from %s:"%(pwd))
print(" ".join(cmd))
subprocess.call(cmd, cwd=pwd)

24
scripts/compare_components.sh

@ -1,24 +0,0 @@ @@ -1,24 +0,0 @@
#!/bin/bash
#
# Skipping this script for now,
# due to missing JSON files.
sourmashurl="quay.io/biocontainers/sourmash:2.0.0a3--py36_0"
for kmer_len in 21 31 51
do
for sig in *sig
do
docker run \
-v ${PWD}:/data \
${sourmashurl} \
sourmash gather \
-k ${kmer_len} \
${sig} \
genbank-k${kmer_len}.sbt.json \
refseq-k${kmer_len}.sbt.json \
-o ${sig}.k${kmer_len}.gather.output.csv \
--output-unassigned ${sig}.k${kmer_len}gather_unassigned.output.csv \
--save-matches ${sig}.k${kmer_len}.gather_matches
done
done

85
scripts/filter_taxa.py

@ -1,85 +0,0 @@ @@ -1,85 +0,0 @@
#!/usr/bin/python3
import glob, os, re
import subprocess
def get_prefix(filename,suffix):
"""
Strip the suffix from a filename
"""
s = re.search("^(.*)%s"%(suffix),filename)
prefix = s.group(1)
return prefix
def filter_taxa():
"""
Filter taxa
"""
kaijudirname = "kaijudb"
suffixes = [".trim2.out",".trim30.out"]
for suffix in suffixes:
filenames = glob.glob("*"+suffix)
for filename in filenames:
prefix = get_prefix(filename,suffix)
# ------------------------
# Kaiju report,
# filtering taxa
reportoutfile = prefix + ".kaiju_out_krona.1percenttotal.summary"
classifiedreportoutfile = prefix + ".kaiju_out_krona.1percentclassified.summary"
if(os.path.isfile(reportoutfile)):
print("Skipping file %s, file exists."%(reportoutfile))
else:
datadir = "/data"
pwd = os.getcwd()
cmd = ["docker","run"]
cmd += ["-v","%s:%s"%(pwd,datadir)]
cmd += ["quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0"]
cmd += ["kaijuReport"]
cmd += ["-v"]
cmd += ["-t","%s/%s/nodes.dmp"%(datadir,kaijudirname)]
cmd += ["-n","%s/%s/names.dmp"%(datadir,kaijudirname)]
cmd += ["-i","%s/%s"%(datadir, filename)]
cmd += ["-r","genus"]
cmd += ["-m","1"]
cmd += ["-o","%s/%s"%(datadir, reportoutfile)]
print("Running this docker command from dir %s:"%(pwd))
print(" ".join(cmd))
print()
subprocess.call(cmd, cwd=pwd)
cmd = ["docker","run"]
cmd += ["-v","%s:%s"%(pwd,datadir)]
cmd += ["quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0"]
cmd += ["kaijuReport"]
cmd += ["-v"]
cmd += ["-t","%s/%s/nodes.dmp"%(datadir,kaijudirname)]
cmd += ["-n","%s/%s/names.dmp"%(datadir,kaijudirname)]
cmd += ["-i","%s/%s"%(datadir, filename)]
cmd += ["-r","genus"]
cmd += ["-m","1"]
cmd += ["-u"]
cmd += ["-o","%s/%s"%(datadir, reportoutfile)]
print("Running this docker command from dir %s:"%(pwd))
print(" ".join(cmd))
print()
subprocess.call(cmd, cwd=pwd)
if __name__=="__main__":
filter_taxa()

38
scripts/filter_taxa.sh

@ -1,38 +0,0 @@ @@ -1,38 +0,0 @@
#!/bin/bash
# Now let's filter out taxa with low abundances by obtaining genera that comprise at least 1 percent of the total reads:
kaijuurl="quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0"
for i in *trim{"2","30"}.out
do
docker run \
-v ${PWD}:/data \
${kaijuurl} \
kaijuReport
-v \
-t /data/kaijudb/nodes.dmp \
-n /data/kaijudb/names.dmp \
-i /data/${i} \
-r genus \
-m 1 \
-o /data/${i}.kaiju_out_krona.1percenttotal.summary
done
# Now for comparison, let's take the genera that comprise at least 1 percent of all of the classified reads:
for i in *trim{"2","30"}.out
do
docker run \
-v ${PWD}:/data \
${kaijuurl} \
kaijuReport \
-v \
-t /data/kaijudb/nodes.dmp \
-n /data/kaijudb/names.dmp \
-i /data/${i} \
-r genus \
-m 1 \
-u \
-o /data/${i}.kaiju_out_krona.1percentclassified.summary
done

13
scripts/get_prefix.py

@ -1,13 +0,0 @@ @@ -1,13 +0,0 @@
import re
def get_prefix(filename,suffix):
"""
Strip the suffix from a filename
"""
s = re.search("^(.*)%s"%(suffix),filename)
prefix = s.group(1)
return prefix
if __name__=="__main__":
print("Do not run this script directly")

30
scripts/get_sbt.py

@ -1,30 +0,0 @@ @@ -1,30 +0,0 @@
#!/usr/bin/python3
import os
import subprocess
pwd = os.getcwd()
datadir = pwd+"/data"
subprocess.call(["mkdir","-p",datadir], cwd=pwd)
for prefix in ["refseq","genbank"]:
for kmers in [21,31,51]:
filename = "microbe-%s-sbt-k%d-2017.05.09.tar.gz"%(prefix,kmers)
url = "https://s3-us-west-1.amazonaws.com/spacegraphcats.ucdavis.edu/%s"%(filename)
subprocess.call(["curl","-O",url])
# The individual signatures for the above SBTs were calculated as follows:
#
# sourmash compute -k 4,5 \
# -n 2000 \
# --track-abundance \
# --name-from-first \
# -o {output} \
# {input}
#
# sourmash compute -k 21,31,51 \
# --scaled 2000 \
# --track-abundance \
# --name-from-first \
# -o {output} \
# {input}

22
scripts/get_trimmed_data.py

@ -1,22 +0,0 @@ @@ -1,22 +0,0 @@
#!/usr/bin/python3
#
# If you have not made trimmed data,
# you can download it.
#
# This gets the data from OSF using
# URLs and filenames in trimmed_data.dat.
import subprocess
def get_trimmed_data():
with open('trimmed_data.dat','r') as f:
for ln in f.readlines():
line = ln.split()
url = line[1]
fname = line[0]
cmd = ["wget",url,"-O",fname]
print("Calling command %s"%(" ".join(cmd)))
subprocess.call(cmd)
if __name__=="__main__":
get_trimmed_data()

23
scripts/install_pyenv.py

@ -1,23 +0,0 @@ @@ -1,23 +0,0 @@
#!/usr/bin/env python
import getpass
import subprocess
def install_pyenv():
user = getpass.getuser()
if(user=="root"):
raise Exception("You are root - you should run this script as a normal user.")
else:
# Install pyenv
curlcmd = ["curl","-L","https://raw.githubusercontent.com/pyenv/pyenv-installer/master/bin/pyenv-installer"]
curlproc = subprocess.Popen(curlcmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
subprocess.Popen(["/bin/bash"], stdin=curlproc.stdout, stdout=subprocess.DEVNULL)
# We don't need to add ~/.pyenv/bin to $PATH,
# it is already done.
if __name__=="__main__":
install_pyenv()

47
scripts/install_singularity.py

@ -1,47 +0,0 @@ @@ -1,47 +0,0 @@
#!/usr/bin/env python
import getpass
import subprocess
def install_singularity():
user = getpass.getuser()
if(user!="root"):
raise Exception("You are not root - this script requires root (apt-get commands).")
else:
# -----------------------
# Update aptitude and install dependencies
aptupdatecmd = ["apt-get","-y","update"]
subprocess.call(aptupdatecmd)
aptinstallcmd = ["apt-get","-y","install"]
subprocess.call(aptinstallcmd+["zlib1g-dev"])
subprocess.call(aptinstallcmd+["ncurses-dev"])
# -----------------------
# Install docker
wgetproc = subprocess.Popen(["wget","-qO-","https://get.docker.com"], stdout=subprocess.PIPE)
bashproc = subprocess.Popen(["/bin/bash"], stdin=wgetproc.stdout, stdout=subprocess.PIPE)
subprocess.call(["usermod","-aG","docker","ubuntu"])
# -----------------------
# Install singularity:
wgetproc = subprocess.Popen(["wget","-O-","http://neuro.debian.net/lists/xenial.us-ca.full | tee /etc/apt/sources.list.d/neurodebian.sources.list"],stdout=subprocess.PIPE)
bashproc = subprocess.Popen(["/bin/bash"], stdin=wgetproc.stdout, stdout=subprocess.PIPE)
keyupdatecmd = ["apt-key","adv","--recv-keys","--keyserver","hkp://pool.sks-keyservers.net:80","0xA5D32F012649A5A9"]
subprocess.call(keyupdatecmd)
subprocess.call(aptupdatecmd)
subprocess.call(aptinstallcmd+["singularity-container"])
print("-"*40)
print()
print("Singularity is now installed. Log out and log back in for docker group to take effect.")
print()
print("-"*40)
if __name__=="__main__":
install_singularity()

50
scripts/install_snakemake.py

@ -1,50 +0,0 @@ @@ -1,50 +0,0 @@
#!/usr/bin/env python
import os
import getpass
import tempfile
import subprocess
def install_pyenv():
user = getpass.getuser()
if(user=="root"):
raise Exception("You are root - you should run this script as a normal user.")
else:
# Install snakemake
conda_version = "miniconda3-4.3.30"
installcmd = ["pyenv","install",conda_version]
subprocess.call(installcmd)
globalcmd = ["pyenv","global",conda_version]
subprocess.call(globalcmd)
# ---------------------------
# Install snakemake
pyenvbin = os.environ['HOME']
condabin = pyenvbin+"/.pyenv/shims/conda"
subprocess.call([condabin,"update"])
subprocess.call([condabin,"config","--add","channels","r"])
subprocess.call([condabin,"config","--add","channels","default"])
subprocess.call([condabin,"config","--add","channels","conda-forge"])
subprocess.call([condabin,"config","--add","channels","bioconda"])
subprocess.call([condabin,"install","--yes","-c","bioconda","snakemake"])
# ---------------------------
# Install osf cli client
pyenvbin = os.environ['HOME']
pipbin = pyenvbin+"/.pyenv/shims/pip"
subprocess.call([pipbin,"install","--upgrade","pip"])
subprocess.call([pipbin,"install","--user","osfclient"])
if __name__=="__main__":
install_pyenv()

82
scripts/kaiju2krona.py

@ -1,82 +0,0 @@ @@ -1,82 +0,0 @@
#!/usr/bin/python3
import glob, os
import subprocess
from get_prefix import get_prefix
def kaiju2krona():
"""
Convert kaiju results to krona results,
and generate a report.
"""
kaijudirname = "kaijudb"
trims = [2,30]
suffixes = [".trim2.out",".trim30.out"]
for suffix,ntrim in suffixes,trims:
filenames = glob.glob("*"+suffix)
for filename in filenames:
prefix = get_prefix(filename,suffix)
# ------------------------
# Kaiju to krona
kronaoutfile = prefix + ".trim" + str(ntrim) + ".kaiju.out.krona"
if(os.path.isfile(kronaoutfile)):
print("Skipping file %s, file exists."%(kronaoutfile))
else:
datadir = "/data"
pwd = os.getcwd()
cmd = ["docker","run"]
cmd += ["-v","%s:%s"%(pwd,datadir)]
cmd += ["quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0"]
cmd += ["kaiju2krona"]
cmd += ["-v"]
cmd += ["-i","%s/%s"%(datadir, filename)]
cmd += ["-o","%s/%s"%(datadir, kronaoutfile)]
cmd += ["-t","%s/%s/nodes.dmp"%(datadir,kaijudirname)]
cmd += ["-n","%s/%s/names.dmp"%(datadir,kaijudirname)]
print("Running this docker command from dir %s:"%(pwd))
print(" ".join(cmd))
print()
subprocess.call(cmd, cwd=pwd)
# ------------------------
# Kaiju Report
reportoutfile = prefix + ".trim" + str(ntrim) + ".kaiju_out_krona.summary"
if(os.path.isfile(reportoutfile)):
print("Skipping file %s, file exists."%(reportoutfile))
else:
datadir = "/data"
pwd = os.getcwd()
cmd = ["docker","run"]
cmd += ["-v","%s:%s"%(pwd,datadir)]
cmd += ["quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0"]
cmd += ["kaijuReport"]
cmd += ["-v"]
cmd += ["-t","%s/%s/nodes.dmp"%(datadir,kaijudirname)]
cmd += ["-n","%s/%s/names.dmp"%(datadir,kaijudirname)]
cmd += ["-i","%s/%s"%(datadir, filename)]
cmd += ["-r","genus"]
cmd += ["-o","%s/%s"%(datadir, reportoutfile)]
print("Running this docker command from dir %s:"%(pwd))
print(" ".join(cmd))
print()
subprocess.call(cmd, cwd=pwd)
if __name__=="__main__":
kaiju2krona()

33
scripts/kaiju2krona.sh

@ -1,33 +0,0 @@ @@ -1,33 +0,0 @@
#!/bin/bash
# Convert kaiju file to format readable by krona:
kaijuurl="quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0"
kaijudir="kaijudb"
for i in *trim{"2","30"}.out
do
docker run \
-v ${PWD}:/data \
${kaijuurl} \
kaiju2krona \
-v \
-t /data/${kaijudir}/nodes.dmp \
-n /data/${kaijudir}/names.dmp \
-i /data/${i} \
-o /data/${i}.kaiju.out.krona
done
for i in *trim{"2","30"}.out
do
docker run \
-v ${PWD}:/data \
${kaijuurl} \
kaijuReport \
-v \
-t /data/${kaijudir}/nodes.dmp \
-n /data/${kaijudir}/names.dmp \
-i /data/${i} \
-r genus \
-o /data/${i}.kaiju_out_krona.summary
done

12
scripts/prepare_biocontainers.py

@ -1,12 +0,0 @@ @@ -1,12 +0,0 @@
#!/usr/bin/python3
import os
import subprocess
quayurls = [ "quay.io/biocontainers/sourmash:2.0.0a3--py36_0",
"quay.io/biocontainers/krona:2.7--pl5.22.0_1",
"quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0"]
pwd = os.getcwd()
for quayurl in quayurls:
subprocess.call(["docker","pull",quayurl], cwd=pwd)

8
scripts/run_all_part1.sh

@ -1,8 +0,0 @@ @@ -1,8 +0,0 @@
#!/bin/bash
./install_pyenv.py
./install_snakemake.py
sudo ./install_singularity.py

24
scripts/run_all_part2.sh

@ -1,24 +0,0 @@ @@ -1,24 +0,0 @@
#!/bin/bash
./prepare_biocontainers.py
./get_sbt.py
./get_trimmed_data.py
./calculate_signatures.py
## This script takes a really long time
#./compare_components.sh
./unpack_kaiju.py
# This script takes a long time
./run_kaiju.py
./kaiju2krona.py
./filter_taxa.py
./visualize_korona.py

58
scripts/run_kaiju.py

@ -1,58 +0,0 @@ @@ -1,58 +0,0 @@
#!/usr/bin/python3
import glob, os, re
import subprocess
def get_prefix(filename,suffix):
"""
Strip the suffix from a filename
"""
s = re.search("^(.*)%s"%(suffix),filename)
prefix = s.group(1)
return prefix
def run_kaiju():
"""
Run kaiju on the sourmash signatures
"""
kaijudirname = "kaijudb"
suffixes = [("_1.trim2.fq.gz", "_2.trim2.fq.gz", ".kaiju_output.trim2.out"),
("_1.trim30.fq.gz","_2.trim30.fq.gz",".kaiju_output.trim30.out")]
for (suffix1, suffix2, kaijusuffix) in suffixes:
for filename in glob.glob("*"+suffix1):
prefix = get_prefix(filename,suffix1)
targetfile = prefix + kaijusuffix
if(os.path.isfile(targetfile)):
print("Skipping file %s, file exists."%(targetfile))
else:
datadir = "/data"
pwd = os.getcwd()
cmd = ["docker","run"]
cmd += ["-v","%s:%s"%(pwd,datadir)]
cmd += ["quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0"]
cmd += ["kaiju"]
cmd += ["-x","-v"]
cmd += ["-t","%s/%s/nodes.dmp"%(datadir,kaijudirname)]
cmd += ["-f","%s/%s/kaiju_db_nr_euk.fmi"%(datadir,kaijudirname)]
cmd += ["-i","%s/%s%s"%(datadir,prefix,suffix1)]
cmd += ["-j","%s/%s%s"%(datadir,prefix,suffix2)]
cmd += ["-o","%s/%s"%(datadir,targetfile)]
cmd += ["-z","4"]
print("Running this docker command from dir %s:"%(pwd))
print(" ".join(cmd))
print()
subprocess.call(cmd, cwd=pwd)
if __name__=="__main__":
run_kaiju()

45
scripts/run_kaiju.sh

@ -1,45 +0,0 @@ @@ -1,45 +0,0 @@
#!/bin/bash
for filename in *1.trim2.fq.gz
do
#Remove _1.trim2.fq from file name to create base
base=$(basename $filename _1.trim2.fq.gz)
echo $base
# Command to run container interactively:
#docker run -it --rm -v ${PWD}:/data quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0 /bin/bash
docker run \
-v ${PWD}:/data \
quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0 \
kaiju \
-x \
-v \
-t /data/kaijudb/nodes.dmp \
-f /data/kaijudb/kaiju_db_nr_euk.fmi \
-i /data/${base}_1.trim2.fq.gz \
-j /data/${base}_2.trim2.fq.gz \
-o /data/${base}.kaiju_output.trim2.out \
-z 4
done
for filename in *1.trim30.fq.gz
do
#Remove _1.trim30.fq from file name to create base
base=$(basename $filename _1.trim30.fq.gz)
echo $base
docker run \
-v ${PWD}:/data \
quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0 \
kaiju \
-x \
-v \
-t /data/kaijudb/nodes.dmp \
-f data/kaijudb/kaiju_db_nr_euk.fmi \
-i /data/${base}_1.trim30.fq.gz \
-j /data/${base}_2.trim30.fq.gz \
-o /data/${base}.kaiju_output.trim30.out \
-z 4
done

16
scripts/trimmed_data.dat

@ -1,16 +0,0 @@ @@ -1,16 +0,0 @@
SRR606249_1.trim2.fq.gz https://osf.io/tzkjr/download
SRR606249_2.trim2.fq.gz https://osf.io/sd968/download
SRR606249_subset50_1.trim2.fq.gz https://osf.io/acs5k/download
SRR606249_subset50_2.trim2.fq.gz https://osf.io/bem28/download
SRR606249_subset25_1.trim2.fq.gz https://osf.io/syf3m/download
SRR606249_subset25_2.trim2.fq.gz https://osf.io/zbcrx/download
SRR606249_subset10_1.trim2.fq.gz https://osf.io/ksu3e/download
SRR606249_subset10_2.trim2.fq.gz https://osf.io/k9tqn/download
SRR606249_1.trim30.fq.gz https://osf.io/qtzyk/download
SRR606249_2.trim30.fq.gz https://osf.io/dumz6/download
SRR606249_subset50_1.trim30.fq.gz https://osf.io/v5jhs/download
SRR606249_subset50_2.trim30.fq.gz https://osf.io/q4cfa/download
SRR606249_subset25_1.trim30.fq.gz https://osf.io/jcp5n/download
SRR606249_subset25_2.trim30.fq.gz https://osf.io/qevh9/download
SRR606249_subset10_1.trim30.fq.gz https://osf.io/rtvuz/download
SRR606249_subset10_2.trim30.fq.gz https://osf.io/zq4f9/download

14
scripts/unpack_kaiju.py

@ -1,14 +0,0 @@ @@ -1,14 +0,0 @@
#!/usr/bin/python3
import os
import subprocess
pwd = os.getcwd()
kaijudir = pwd+"/kaijudb"
tarfile = "kaiju_index_nr_euk.tgz"
url = "http://kaiju.binf.ku.dk/database/kaiju_index_nr_euk.tgz"
subprocess.call(["mkdir",kaijudir], cwd=pwd)
subprocess.call(["curl","-LO",url], cwd=kaijudir)
subprocess.call(["tar","zxvf",tarfile],cwd=kaijudir)
subprocess.call(["rm","-f",tarfile], cwd=kaijudir)

12
scripts/unpack_kaiju.sh

@ -1,12 +0,0 @@ @@ -1,12 +0,0 @@
#!/bin/bash
#
# Unpack the tgz files from/for kaiju steps
kaijudir="${PWD}/kaijudb"
tarfile="kaiju_index_nr_euk.tgz"
mkdir ${kaijudir}
curl -LO "http://kaiju.binf.ku.dk/database/${tarfile}"
tar xzf ${tarfile}
rm -f ${tarfile}

57
scripts/visualize_krona.py

@ -1,57 +0,0 @@ @@ -1,57 +0,0 @@
#!/usr/bin/python3
import glob, os, re
import subprocess
from get_prefix import get_prefix
def pull_krona():
# Download the krona image from quay.io so we can visualize the results from kaiju:
kronaurl = "quay.io/biocontainers/krona:2.7--pl5.22.0_1"
subprocess.call(["docker","pull",kronaurl])
def visualize_krona():
kaijudirname = "kaijudb"
kronaurl = "quay.io/biocontainers/krona:2.7--pl5.22.0_1"
cases = ["kaiju_out_krona",
"kaiju_out_krona.1percenttotal",
"kaiju_out_krona.1percentclassified"]
for case in cases:
suffix = case + ".summary"
htmlsuffix = case + ".html"
filenames = glob.glob("*"+suffix)
for filename in filenames:
prefix = get_prefix(filename,suffix)
htmlname = prefix + htmlsuffix
if(os.path.isfile(htmlname)):
print("Skipping file %s, file exists."%(htmlname))
else:
datadir = "/data"
pwd = os.getcwd()
cmd = ["docker","run"]
cmd += ["-v","%s:%s"%(pwd,datadir)]
cmd += [kronaurl]
cmd += ["ktImportText"]
cmd += ["-o","%s/%s"%(datadir,htmlname)]
cmd += ["%s/%s"%(datadir,filename)]
print("Running this docker command from dir %s:"%(pwd))
print(" ".join(cmd))
print()
subprocess.call(cmd, cwd=pwd)
if __name__=="__main__":
pull_krona()
visualize_krona()

40
scripts/visualize_krona.sh

@ -1,40 +0,0 @@ @@ -1,40 +0,0 @@
#!/bin/bash
kaijudir="${PWD}/kaijudb"
kronaurl="quay.io/biocontainers/krona:2.7--pl5.22.0_1"
docker pull ${kronaurl}
suffix="kaiju_out_krona"
for i in *${suffix}.summary
do
docker run \
-v ${kaijudir}:/data \
${kronaurl} \
ktImportText \
-o /data/${i}.${suffix}.html \
/data/${i}
done
suffix="kaiju_out_krona.1percenttotal"
for i in *${suffix}.summary
do
docker run \
-v ${kaijudir}:/data \
${kronaurl} \
ktImportText \
-o /data/${i}.${suffix}.html \
/data/${i}
done
suffix="kaiju_out_krona.1percentclassified"
for i in *${suffix}.summary
do
docker run \
-v ${kaijudir}:/data \
${kronaurl} \
ktImportText \
-o /data/${i}.${suffix}.html \
/data/${i}
done
Loading…
Cancel
Save