25 changed files with 0 additions and 897 deletions
@ -1,38 +0,0 @@
@@ -1,38 +0,0 @@
|
||||
# dahak yeti scripts |
||||
|
||||
This contains scripts to prepare for Dahak workflows. |
||||
|
||||
## setup for dahak |
||||
|
||||
To get dahak set up: |
||||
|
||||
``` |
||||
./install_pyenv.py |
||||
|
||||
./install_snakemake.py |
||||
|
||||
sudo ./install_singularity.py |
||||
``` |
||||
|
||||
## run taxonomic classification workflow |
||||
|
||||
`prepare_biocontainers.sh` - download Docker containers from biocontainers |
||||
|
||||
`get_sbt.sh` - get sequence bloom trees for microbial genomes |
||||
|
||||
`get_trimmed_data.sh` - get trimmed data from OSF from URLs in `trimmed_data.dat` |
||||
|
||||
`calculate_signatures.sh` - run sourmash biocontainer to compute signatures |
||||
|
||||
(Will take a really long time) `compare_components.sh` - |
||||
|
||||
`unpack_kaiju.py` - download and unpack tarballs from the NR Euk. database |
||||
|
||||
(Will take a really long time) `run_kaiju.py` - run kaiju through docker |
||||
|
||||
`kaiju2krona.py` - convert kaiju output to krona output |
||||
|
||||
`filter_taxa.py` - perform the same operation as above, but filtering the taxonomies showed |
||||
|
||||
`visualize_krona.py` - generate krona html output. |
||||
|
@ -1,41 +0,0 @@
@@ -1,41 +0,0 @@
|
||||
#!/usr/bin/python3 |
||||
import glob, os |
||||
from get_prefix import get_prefix |
||||
|
||||
|
||||
def calc_signatures(): |
||||
""" |
||||
Calculate signatures using sourmash |
||||
""" |
||||
suffix1 = "_1.trim2.fq.gz" |
||||
#suffix2 = "_2.trim2.fq.gz" |
||||
sigsuffix = ".trim2.scaled10k.k21_31_51.sig" |
||||
for filename in glob.glob("*"+suffix1): |
||||
prefix = get_prefix(filename,suffix1) |
||||
targetfile = prefix+sigsuffix |
||||
if(os.path.isfile(targetfile)): |
||||
print("Skipping file %s, file exists."%(targetfile)) |
||||
else: |
||||
datadir = "/data" |
||||
pwd = os.getcwd() |
||||
|
||||
cmd = ["docker","run"] |
||||
cmd += ["-v","%s:%s"%(pwd,datadir)] |
||||
cmd += ["quay.io/biocontainers/sourmash:2.0.0a3--py36_0"] |
||||
cmd += ["sourmash","compute"] |
||||
cmd += ["--merge","%s/%s.trim2.fq.gz"%(datadir,prefix)] |
||||
cmd += ["--track-abundance"] |
||||
cmd += ["--scaled 10000"] |
||||
cmd += ["-k","21,31,51"] |
||||
cmd += ["%s/%s_%d.trim2.fq.gz"%(datadir,prefix,i+1) for i in range(2)] |
||||
cmd += ["-o","%s/%s"%(datadir, targetfile)] |
||||
|
||||
print("Running this docker command from dir %s:"%(pwd)) |
||||
print(" ".join(cmd)) |
||||
print() |
||||
|
||||
subprocess.call(cmd, cwd=pwd) |
||||
|
||||
if __name__=="__main__": |
||||
calc_signatures() |
||||
|
@ -1,58 +0,0 @@
@@ -1,58 +0,0 @@
|
||||
#!/bin/bash |
||||
# |
||||
# Calculate signatures from the trimmed data using sourmash |
||||
|
||||
sourmashurl="quay.io/biocontainers/sourmash:2.0.0a3--py36_0" |
||||
|
||||
for filename in *_1.trim2.fq.gz |
||||
do |
||||
#Remove _1.trim.fq from file name to create base |
||||
base=$(basename $filename _1.trim2.fq.gz) |
||||
sigsuffix=".trim2.scaled10k.k21_31_51.sig" |
||||
echo $base |
||||
|
||||
if [ -f ${base}${sigsuffix} ] |
||||
then |
||||
# skip |
||||
echo "Skipping file ${base}${sigsuffix}, file exists." |
||||
else |
||||
docker run \ |
||||
-v ${PWD}:/data \ |
||||
${sourmashurl} \ |
||||
sourmash compute \ |
||||
--merge /data/${base}.trim2.fq.gz \ |
||||
--track-abundance \ |
||||
--scaled 10000 \ |
||||
-k 21,31,51 \ |
||||
/data/${base}_1.trim2.fq.gz \ |
||||
/data/${base}_2.trim2.fq.gz \ |
||||
-o /data/${base}${sigsuffix} |
||||
fi |
||||
done |
||||
|
||||
for filename in *_1.trim30.fq.gz |
||||
do |
||||
#Remove _1.trim.fq from file name to create base |
||||
base=$(basename $filename _1.trim30.fq.gz) |
||||
sigsuffix=".trim30.scaled10k.k21_31_51.sig" |
||||
echo $base |
||||
|
||||
if [ -f ${base}${sigsuffix} ] |
||||
then |
||||
# skip |
||||
echo "Skipping file ${base}${sigsuffix}, file exists." |
||||
else |
||||
docker run \ |
||||
-v ${PWD}:/data \ |
||||
${sourmashurl} \ |
||||
sourmash compute \ |
||||
--merge /data/${base}.trim30.fq.gz \ |
||||
--track-abundance \ |
||||
--scaled 10000 \ |
||||
-k 21,31,51 \ |
||||
/data/${base}_1.trim30.fq.gz \ |
||||
/data/${base}_2.trim30.fq.gz \ |
||||
-o /data/${base}${sigsuffix} |
||||
fi |
||||
done |
||||
|
@ -1,27 +0,0 @@
@@ -1,27 +0,0 @@
|
||||
#!/usr/bin/python3 |
||||
import glob |
||||
|
||||
kmers = [21,31,51] |
||||
|
||||
pwd = os.getcwd() |
||||
for kmer_len in kmers: |
||||
for sig in glob.glob("*sig"): |
||||
|
||||
sourmashurl = "quay.io/biocontainers/sourmash:2.0.0a2--py36_0" |
||||
|
||||
cmd = ["docker","run"] |
||||
cmd += ["-v","%s:%s"%(pwd,datadir)] |
||||
cmd += [sourmashurl] |
||||
cmd += ["-k",str(kmer_len)] |
||||
cmd += [sig] |
||||
cmd += ["genbank-k%d.sbt.json"%(kmer_len)] |
||||
cmd += ["refseq-k%d.sbt.json"] |
||||
cmd += ["-o","%s.k%d.gather.output.csv"%(sig,kmer_len)] |
||||
cmd += ["--output-unassigned","%s.k%dgather_unassigned.output.csv"%(sig,kmer_len)] |
||||
cmd += ["--save-matches","%s.k%d.gather_matches"%(sig,kmer_len)] |
||||
|
||||
print("Running docker command from %s:"%(pwd)) |
||||
print(" ".join(cmd)) |
||||
|
||||
subprocess.call(cmd, cwd=pwd) |
||||
|
@ -1,24 +0,0 @@
@@ -1,24 +0,0 @@
|
||||
#!/bin/bash |
||||
# |
||||
# Skipping this script for now, |
||||
# due to missing JSON files. |
||||
|
||||
sourmashurl="quay.io/biocontainers/sourmash:2.0.0a3--py36_0" |
||||
for kmer_len in 21 31 51 |
||||
do |
||||
for sig in *sig |
||||
do |
||||
docker run \ |
||||
-v ${PWD}:/data \ |
||||
${sourmashurl} \ |
||||
sourmash gather \ |
||||
-k ${kmer_len} \ |
||||
${sig} \ |
||||
genbank-k${kmer_len}.sbt.json \ |
||||
refseq-k${kmer_len}.sbt.json \ |
||||
-o ${sig}.k${kmer_len}.gather.output.csv \ |
||||
--output-unassigned ${sig}.k${kmer_len}gather_unassigned.output.csv \ |
||||
--save-matches ${sig}.k${kmer_len}.gather_matches |
||||
done |
||||
done |
||||
|
@ -1,85 +0,0 @@
@@ -1,85 +0,0 @@
|
||||
#!/usr/bin/python3 |
||||
import glob, os, re |
||||
import subprocess |
||||
|
||||
|
||||
def get_prefix(filename,suffix): |
||||
""" |
||||
Strip the suffix from a filename |
||||
""" |
||||
s = re.search("^(.*)%s"%(suffix),filename) |
||||
prefix = s.group(1) |
||||
return prefix |
||||
|
||||
|
||||
def filter_taxa(): |
||||
""" |
||||
Filter taxa |
||||
""" |
||||
kaijudirname = "kaijudb" |
||||
suffixes = [".trim2.out",".trim30.out"] |
||||
|
||||
for suffix in suffixes: |
||||
|
||||
filenames = glob.glob("*"+suffix) |
||||
|
||||
for filename in filenames: |
||||
|
||||
prefix = get_prefix(filename,suffix) |
||||
|
||||
# ------------------------ |
||||
# Kaiju report, |
||||
# filtering taxa |
||||
|
||||
reportoutfile = prefix + ".kaiju_out_krona.1percenttotal.summary" |
||||
classifiedreportoutfile = prefix + ".kaiju_out_krona.1percentclassified.summary" |
||||
|
||||
if(os.path.isfile(reportoutfile)): |
||||
print("Skipping file %s, file exists."%(reportoutfile)) |
||||
else: |
||||
datadir = "/data" |
||||
pwd = os.getcwd() |
||||
|
||||
cmd = ["docker","run"] |
||||
cmd += ["-v","%s:%s"%(pwd,datadir)] |
||||
cmd += ["quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0"] |
||||
cmd += ["kaijuReport"] |
||||
cmd += ["-v"] |
||||
cmd += ["-t","%s/%s/nodes.dmp"%(datadir,kaijudirname)] |
||||
cmd += ["-n","%s/%s/names.dmp"%(datadir,kaijudirname)] |
||||
cmd += ["-i","%s/%s"%(datadir, filename)] |
||||
cmd += ["-r","genus"] |
||||
cmd += ["-m","1"] |
||||
cmd += ["-o","%s/%s"%(datadir, reportoutfile)] |
||||
|
||||
print("Running this docker command from dir %s:"%(pwd)) |
||||
print(" ".join(cmd)) |
||||
print() |
||||
|
||||
subprocess.call(cmd, cwd=pwd) |
||||
|
||||
|
||||
cmd = ["docker","run"] |
||||
cmd += ["-v","%s:%s"%(pwd,datadir)] |
||||
cmd += ["quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0"] |
||||
cmd += ["kaijuReport"] |
||||
cmd += ["-v"] |
||||
cmd += ["-t","%s/%s/nodes.dmp"%(datadir,kaijudirname)] |
||||
cmd += ["-n","%s/%s/names.dmp"%(datadir,kaijudirname)] |
||||
cmd += ["-i","%s/%s"%(datadir, filename)] |
||||
cmd += ["-r","genus"] |
||||
cmd += ["-m","1"] |
||||
cmd += ["-u"] |
||||
cmd += ["-o","%s/%s"%(datadir, reportoutfile)] |
||||
|
||||
print("Running this docker command from dir %s:"%(pwd)) |
||||
print(" ".join(cmd)) |
||||
print() |
||||
|
||||
subprocess.call(cmd, cwd=pwd) |
||||
|
||||
|
||||
|
||||
if __name__=="__main__": |
||||
filter_taxa() |
||||
|
@ -1,38 +0,0 @@
@@ -1,38 +0,0 @@
|
||||
#!/bin/bash |
||||
|
||||
# Now let's filter out taxa with low abundances by obtaining genera that comprise at least 1 percent of the total reads: |
||||
|
||||
kaijuurl="quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0" |
||||
for i in *trim{"2","30"}.out |
||||
do |
||||
docker run \ |
||||
-v ${PWD}:/data \ |
||||
${kaijuurl} \ |
||||
kaijuReport |
||||
-v \ |
||||
-t /data/kaijudb/nodes.dmp \ |
||||
-n /data/kaijudb/names.dmp \ |
||||
-i /data/${i} \ |
||||
-r genus \ |
||||
-m 1 \ |
||||
-o /data/${i}.kaiju_out_krona.1percenttotal.summary |
||||
done |
||||
|
||||
# Now for comparison, let's take the genera that comprise at least 1 percent of all of the classified reads: |
||||
|
||||
for i in *trim{"2","30"}.out |
||||
do |
||||
docker run \ |
||||
-v ${PWD}:/data \ |
||||
${kaijuurl} \ |
||||
kaijuReport \ |
||||
-v \ |
||||
-t /data/kaijudb/nodes.dmp \ |
||||
-n /data/kaijudb/names.dmp \ |
||||
-i /data/${i} \ |
||||
-r genus \ |
||||
-m 1 \ |
||||
-u \ |
||||
-o /data/${i}.kaiju_out_krona.1percentclassified.summary |
||||
done |
||||
|
@ -1,13 +0,0 @@
@@ -1,13 +0,0 @@
|
||||
import re |
||||
|
||||
def get_prefix(filename,suffix): |
||||
""" |
||||
Strip the suffix from a filename |
||||
""" |
||||
s = re.search("^(.*)%s"%(suffix),filename) |
||||
prefix = s.group(1) |
||||
return prefix |
||||
|
||||
if __name__=="__main__": |
||||
print("Do not run this script directly") |
||||
|
@ -1,30 +0,0 @@
@@ -1,30 +0,0 @@
|
||||
#!/usr/bin/python3 |
||||
import os |
||||
import subprocess |
||||
|
||||
pwd = os.getcwd() |
||||
datadir = pwd+"/data" |
||||
|
||||
subprocess.call(["mkdir","-p",datadir], cwd=pwd) |
||||
|
||||
for prefix in ["refseq","genbank"]: |
||||
for kmers in [21,31,51]: |
||||
filename = "microbe-%s-sbt-k%d-2017.05.09.tar.gz"%(prefix,kmers) |
||||
url = "https://s3-us-west-1.amazonaws.com/spacegraphcats.ucdavis.edu/%s"%(filename) |
||||
subprocess.call(["curl","-O",url]) |
||||
|
||||
# The individual signatures for the above SBTs were calculated as follows: |
||||
# |
||||
# sourmash compute -k 4,5 \ |
||||
# -n 2000 \ |
||||
# --track-abundance \ |
||||
# --name-from-first \ |
||||
# -o {output} \ |
||||
# {input} |
||||
# |
||||
# sourmash compute -k 21,31,51 \ |
||||
# --scaled 2000 \ |
||||
# --track-abundance \ |
||||
# --name-from-first \ |
||||
# -o {output} \ |
||||
# {input} |
@ -1,22 +0,0 @@
@@ -1,22 +0,0 @@
|
||||
#!/usr/bin/python3 |
||||
# |
||||
# If you have not made trimmed data, |
||||
# you can download it. |
||||
# |
||||
# This gets the data from OSF using |
||||
# URLs and filenames in trimmed_data.dat. |
||||
|
||||
import subprocess |
||||
|
||||
def get_trimmed_data(): |
||||
with open('trimmed_data.dat','r') as f: |
||||
for ln in f.readlines(): |
||||
line = ln.split() |
||||
url = line[1] |
||||
fname = line[0] |
||||
cmd = ["wget",url,"-O",fname] |
||||
print("Calling command %s"%(" ".join(cmd))) |
||||
subprocess.call(cmd) |
||||
|
||||
if __name__=="__main__": |
||||
get_trimmed_data() |
@ -1,23 +0,0 @@
@@ -1,23 +0,0 @@
|
||||
#!/usr/bin/env python |
||||
import getpass |
||||
import subprocess |
||||
|
||||
|
||||
def install_pyenv(): |
||||
user = getpass.getuser() |
||||
if(user=="root"): |
||||
raise Exception("You are root - you should run this script as a normal user.") |
||||
else: |
||||
# Install pyenv |
||||
curlcmd = ["curl","-L","https://raw.githubusercontent.com/pyenv/pyenv-installer/master/bin/pyenv-installer"] |
||||
curlproc = subprocess.Popen(curlcmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) |
||||
subprocess.Popen(["/bin/bash"], stdin=curlproc.stdout, stdout=subprocess.DEVNULL) |
||||
|
||||
# We don't need to add ~/.pyenv/bin to $PATH, |
||||
# it is already done. |
||||
|
||||
|
||||
if __name__=="__main__": |
||||
install_pyenv() |
||||
|
||||
|
@ -1,47 +0,0 @@
@@ -1,47 +0,0 @@
|
||||
#!/usr/bin/env python |
||||
import getpass |
||||
import subprocess |
||||
|
||||
|
||||
def install_singularity(): |
||||
user = getpass.getuser() |
||||
if(user!="root"): |
||||
raise Exception("You are not root - this script requires root (apt-get commands).") |
||||
else: |
||||
|
||||
# ----------------------- |
||||
# Update aptitude and install dependencies |
||||
aptupdatecmd = ["apt-get","-y","update"] |
||||
subprocess.call(aptupdatecmd) |
||||
|
||||
aptinstallcmd = ["apt-get","-y","install"] |
||||
subprocess.call(aptinstallcmd+["zlib1g-dev"]) |
||||
subprocess.call(aptinstallcmd+["ncurses-dev"]) |
||||
|
||||
# ----------------------- |
||||
# Install docker |
||||
wgetproc = subprocess.Popen(["wget","-qO-","https://get.docker.com"], stdout=subprocess.PIPE) |
||||
bashproc = subprocess.Popen(["/bin/bash"], stdin=wgetproc.stdout, stdout=subprocess.PIPE) |
||||
|
||||
subprocess.call(["usermod","-aG","docker","ubuntu"]) |
||||
|
||||
# ----------------------- |
||||
# Install singularity: |
||||
wgetproc = subprocess.Popen(["wget","-O-","http://neuro.debian.net/lists/xenial.us-ca.full | tee /etc/apt/sources.list.d/neurodebian.sources.list"],stdout=subprocess.PIPE) |
||||
bashproc = subprocess.Popen(["/bin/bash"], stdin=wgetproc.stdout, stdout=subprocess.PIPE) |
||||
|
||||
keyupdatecmd = ["apt-key","adv","--recv-keys","--keyserver","hkp://pool.sks-keyservers.net:80","0xA5D32F012649A5A9"] |
||||
subprocess.call(keyupdatecmd) |
||||
|
||||
subprocess.call(aptupdatecmd) |
||||
subprocess.call(aptinstallcmd+["singularity-container"]) |
||||
|
||||
print("-"*40) |
||||
print() |
||||
print("Singularity is now installed. Log out and log back in for docker group to take effect.") |
||||
print() |
||||
print("-"*40) |
||||
|
||||
if __name__=="__main__": |
||||
install_singularity() |
||||
|
@ -1,50 +0,0 @@
@@ -1,50 +0,0 @@
|
||||
#!/usr/bin/env python |
||||
import os |
||||
import getpass |
||||
import tempfile |
||||
import subprocess |
||||
|
||||
|
||||
def install_pyenv(): |
||||
user = getpass.getuser() |
||||
if(user=="root"): |
||||
raise Exception("You are root - you should run this script as a normal user.") |
||||
else: |
||||
# Install snakemake |
||||
conda_version = "miniconda3-4.3.30" |
||||
|
||||
installcmd = ["pyenv","install",conda_version] |
||||
subprocess.call(installcmd) |
||||
|
||||
globalcmd = ["pyenv","global",conda_version] |
||||
subprocess.call(globalcmd) |
||||
|
||||
# --------------------------- |
||||
# Install snakemake |
||||
|
||||
pyenvbin = os.environ['HOME'] |
||||
condabin = pyenvbin+"/.pyenv/shims/conda" |
||||
|
||||
subprocess.call([condabin,"update"]) |
||||
|
||||
subprocess.call([condabin,"config","--add","channels","r"]) |
||||
subprocess.call([condabin,"config","--add","channels","default"]) |
||||
subprocess.call([condabin,"config","--add","channels","conda-forge"]) |
||||
subprocess.call([condabin,"config","--add","channels","bioconda"]) |
||||
|
||||
subprocess.call([condabin,"install","--yes","-c","bioconda","snakemake"]) |
||||
|
||||
# --------------------------- |
||||
# Install osf cli client |
||||
|
||||
pyenvbin = os.environ['HOME'] |
||||
pipbin = pyenvbin+"/.pyenv/shims/pip" |
||||
|
||||
subprocess.call([pipbin,"install","--upgrade","pip"]) |
||||
subprocess.call([pipbin,"install","--user","osfclient"]) |
||||
|
||||
|
||||
|
||||
if __name__=="__main__": |
||||
install_pyenv() |
||||
|
@ -1,82 +0,0 @@
@@ -1,82 +0,0 @@
|
||||
#!/usr/bin/python3 |
||||
import glob, os |
||||
import subprocess |
||||
from get_prefix import get_prefix |
||||
|
||||
|
||||
def kaiju2krona(): |
||||
""" |
||||
Convert kaiju results to krona results, |
||||
and generate a report. |
||||
""" |
||||
kaijudirname = "kaijudb" |
||||
trims = [2,30] |
||||
suffixes = [".trim2.out",".trim30.out"] |
||||
|
||||
for suffix,ntrim in suffixes,trims: |
||||
|
||||
filenames = glob.glob("*"+suffix) |
||||
|
||||
for filename in filenames: |
||||
|
||||
prefix = get_prefix(filename,suffix) |
||||
|
||||
|
||||
# ------------------------ |
||||
# Kaiju to krona |
||||
|
||||
kronaoutfile = prefix + ".trim" + str(ntrim) + ".kaiju.out.krona" |
||||
|
||||
if(os.path.isfile(kronaoutfile)): |
||||
print("Skipping file %s, file exists."%(kronaoutfile)) |
||||
else: |
||||
datadir = "/data" |
||||
pwd = os.getcwd() |
||||
|
||||
cmd = ["docker","run"] |
||||
cmd += ["-v","%s:%s"%(pwd,datadir)] |
||||
cmd += ["quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0"] |
||||
cmd += ["kaiju2krona"] |
||||
cmd += ["-v"] |
||||
cmd += ["-i","%s/%s"%(datadir, filename)] |
||||
cmd += ["-o","%s/%s"%(datadir, kronaoutfile)] |
||||
cmd += ["-t","%s/%s/nodes.dmp"%(datadir,kaijudirname)] |
||||
cmd += ["-n","%s/%s/names.dmp"%(datadir,kaijudirname)] |
||||
|
||||
print("Running this docker command from dir %s:"%(pwd)) |
||||
print(" ".join(cmd)) |
||||
print() |
||||
|
||||
subprocess.call(cmd, cwd=pwd) |
||||
|
||||
|
||||
# ------------------------ |
||||
# Kaiju Report |
||||
|
||||
reportoutfile = prefix + ".trim" + str(ntrim) + ".kaiju_out_krona.summary" |
||||
|
||||
if(os.path.isfile(reportoutfile)): |
||||
print("Skipping file %s, file exists."%(reportoutfile)) |
||||
else: |
||||
datadir = "/data" |
||||
pwd = os.getcwd() |
||||
|
||||
cmd = ["docker","run"] |
||||
cmd += ["-v","%s:%s"%(pwd,datadir)] |
||||
cmd += ["quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0"] |
||||
cmd += ["kaijuReport"] |
||||
cmd += ["-v"] |
||||
cmd += ["-t","%s/%s/nodes.dmp"%(datadir,kaijudirname)] |
||||
cmd += ["-n","%s/%s/names.dmp"%(datadir,kaijudirname)] |
||||
cmd += ["-i","%s/%s"%(datadir, filename)] |
||||
cmd += ["-r","genus"] |
||||
cmd += ["-o","%s/%s"%(datadir, reportoutfile)] |
||||
|
||||
print("Running this docker command from dir %s:"%(pwd)) |
||||
print(" ".join(cmd)) |
||||
print() |
||||
|
||||
subprocess.call(cmd, cwd=pwd) |
||||
|
||||
if __name__=="__main__": |
||||
kaiju2krona() |
@ -1,33 +0,0 @@
@@ -1,33 +0,0 @@
|
||||
#!/bin/bash |
||||
|
||||
# Convert kaiju file to format readable by krona: |
||||
|
||||
kaijuurl="quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0" |
||||
kaijudir="kaijudb" |
||||
for i in *trim{"2","30"}.out |
||||
do |
||||
docker run \ |
||||
-v ${PWD}:/data \ |
||||
${kaijuurl} \ |
||||
kaiju2krona \ |
||||
-v \ |
||||
-t /data/${kaijudir}/nodes.dmp \ |
||||
-n /data/${kaijudir}/names.dmp \ |
||||
-i /data/${i} \ |
||||
-o /data/${i}.kaiju.out.krona |
||||
done |
||||
|
||||
for i in *trim{"2","30"}.out |
||||
do |
||||
docker run \ |
||||
-v ${PWD}:/data \ |
||||
${kaijuurl} \ |
||||
kaijuReport \ |
||||
-v \ |
||||
-t /data/${kaijudir}/nodes.dmp \ |
||||
-n /data/${kaijudir}/names.dmp \ |
||||
-i /data/${i} \ |
||||
-r genus \ |
||||
-o /data/${i}.kaiju_out_krona.summary |
||||
done |
||||
|
@ -1,12 +0,0 @@
@@ -1,12 +0,0 @@
|
||||
#!/usr/bin/python3 |
||||
import os |
||||
import subprocess |
||||
|
||||
quayurls = [ "quay.io/biocontainers/sourmash:2.0.0a3--py36_0", |
||||
"quay.io/biocontainers/krona:2.7--pl5.22.0_1", |
||||
"quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0"] |
||||
|
||||
pwd = os.getcwd() |
||||
for quayurl in quayurls: |
||||
subprocess.call(["docker","pull",quayurl], cwd=pwd) |
||||
|
@ -1,8 +0,0 @@
@@ -1,8 +0,0 @@
|
||||
#!/bin/bash |
||||
|
||||
./install_pyenv.py |
||||
|
||||
./install_snakemake.py |
||||
|
||||
sudo ./install_singularity.py |
||||
|
@ -1,24 +0,0 @@
@@ -1,24 +0,0 @@
|
||||
#!/bin/bash |
||||
|
||||
./prepare_biocontainers.py |
||||
|
||||
./get_sbt.py |
||||
|
||||
./get_trimmed_data.py |
||||
|
||||
./calculate_signatures.py |
||||
|
||||
## This script takes a really long time |
||||
#./compare_components.sh |
||||
|
||||
./unpack_kaiju.py |
||||
|
||||
# This script takes a long time |
||||
./run_kaiju.py |
||||
|
||||
./kaiju2krona.py |
||||
|
||||
./filter_taxa.py |
||||
|
||||
./visualize_korona.py |
||||
|
@ -1,58 +0,0 @@
@@ -1,58 +0,0 @@
|
||||
#!/usr/bin/python3 |
||||
import glob, os, re |
||||
import subprocess |
||||
|
||||
|
||||
def get_prefix(filename,suffix): |
||||
""" |
||||
Strip the suffix from a filename |
||||
""" |
||||
s = re.search("^(.*)%s"%(suffix),filename) |
||||
prefix = s.group(1) |
||||
return prefix |
||||
|
||||
|
||||
def run_kaiju(): |
||||
""" |
||||
Run kaiju on the sourmash signatures |
||||
""" |
||||
kaijudirname = "kaijudb" |
||||
|
||||
suffixes = [("_1.trim2.fq.gz", "_2.trim2.fq.gz", ".kaiju_output.trim2.out"), |
||||
("_1.trim30.fq.gz","_2.trim30.fq.gz",".kaiju_output.trim30.out")] |
||||
|
||||
for (suffix1, suffix2, kaijusuffix) in suffixes: |
||||
|
||||
for filename in glob.glob("*"+suffix1): |
||||
|
||||
prefix = get_prefix(filename,suffix1) |
||||
targetfile = prefix + kaijusuffix |
||||
|
||||
if(os.path.isfile(targetfile)): |
||||
print("Skipping file %s, file exists."%(targetfile)) |
||||
else: |
||||
datadir = "/data" |
||||
pwd = os.getcwd() |
||||
|
||||
cmd = ["docker","run"] |
||||
cmd += ["-v","%s:%s"%(pwd,datadir)] |
||||
cmd += ["quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0"] |
||||
cmd += ["kaiju"] |
||||
cmd += ["-x","-v"] |
||||
cmd += ["-t","%s/%s/nodes.dmp"%(datadir,kaijudirname)] |
||||
cmd += ["-f","%s/%s/kaiju_db_nr_euk.fmi"%(datadir,kaijudirname)] |
||||
cmd += ["-i","%s/%s%s"%(datadir,prefix,suffix1)] |
||||
cmd += ["-j","%s/%s%s"%(datadir,prefix,suffix2)] |
||||
cmd += ["-o","%s/%s"%(datadir,targetfile)] |
||||
cmd += ["-z","4"] |
||||
|
||||
print("Running this docker command from dir %s:"%(pwd)) |
||||
print(" ".join(cmd)) |
||||
print() |
||||
|
||||
subprocess.call(cmd, cwd=pwd) |
||||
|
||||
|
||||
if __name__=="__main__": |
||||
run_kaiju() |
||||
|
@ -1,45 +0,0 @@
@@ -1,45 +0,0 @@
|
||||
#!/bin/bash |
||||
|
||||
for filename in *1.trim2.fq.gz |
||||
do |
||||
#Remove _1.trim2.fq from file name to create base |
||||
base=$(basename $filename _1.trim2.fq.gz) |
||||
echo $base |
||||
|
||||
# Command to run container interactively: |
||||
#docker run -it --rm -v ${PWD}:/data quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0 /bin/bash |
||||
|
||||
docker run \ |
||||
-v ${PWD}:/data \ |
||||
quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0 \ |
||||
kaiju \ |
||||
-x \ |
||||
-v \ |
||||
-t /data/kaijudb/nodes.dmp \ |
||||
-f /data/kaijudb/kaiju_db_nr_euk.fmi \ |
||||
-i /data/${base}_1.trim2.fq.gz \ |
||||
-j /data/${base}_2.trim2.fq.gz \ |
||||
-o /data/${base}.kaiju_output.trim2.out \ |
||||
-z 4 |
||||
done |
||||
|
||||
for filename in *1.trim30.fq.gz |
||||
do |
||||
#Remove _1.trim30.fq from file name to create base |
||||
base=$(basename $filename _1.trim30.fq.gz) |
||||
echo $base |
||||
|
||||
docker run \ |
||||
-v ${PWD}:/data \ |
||||
quay.io/biocontainers/kaiju:1.6.1--pl5.22.0_0 \ |
||||
kaiju \ |
||||
-x \ |
||||
-v \ |
||||
-t /data/kaijudb/nodes.dmp \ |
||||
-f data/kaijudb/kaiju_db_nr_euk.fmi \ |
||||
-i /data/${base}_1.trim30.fq.gz \ |
||||
-j /data/${base}_2.trim30.fq.gz \ |
||||
-o /data/${base}.kaiju_output.trim30.out \ |
||||
-z 4 |
||||
done |
||||
|
@ -1,16 +0,0 @@
@@ -1,16 +0,0 @@
|
||||
SRR606249_1.trim2.fq.gz https://osf.io/tzkjr/download |
||||
SRR606249_2.trim2.fq.gz https://osf.io/sd968/download |
||||
SRR606249_subset50_1.trim2.fq.gz https://osf.io/acs5k/download |
||||
SRR606249_subset50_2.trim2.fq.gz https://osf.io/bem28/download |
||||
SRR606249_subset25_1.trim2.fq.gz https://osf.io/syf3m/download |
||||
SRR606249_subset25_2.trim2.fq.gz https://osf.io/zbcrx/download |
||||
SRR606249_subset10_1.trim2.fq.gz https://osf.io/ksu3e/download |
||||
SRR606249_subset10_2.trim2.fq.gz https://osf.io/k9tqn/download |
||||
SRR606249_1.trim30.fq.gz https://osf.io/qtzyk/download |
||||
SRR606249_2.trim30.fq.gz https://osf.io/dumz6/download |
||||
SRR606249_subset50_1.trim30.fq.gz https://osf.io/v5jhs/download |
||||
SRR606249_subset50_2.trim30.fq.gz https://osf.io/q4cfa/download |
||||
SRR606249_subset25_1.trim30.fq.gz https://osf.io/jcp5n/download |
||||
SRR606249_subset25_2.trim30.fq.gz https://osf.io/qevh9/download |
||||
SRR606249_subset10_1.trim30.fq.gz https://osf.io/rtvuz/download |
||||
SRR606249_subset10_2.trim30.fq.gz https://osf.io/zq4f9/download |
@ -1,14 +0,0 @@
@@ -1,14 +0,0 @@
|
||||
#!/usr/bin/python3 |
||||
import os |
||||
import subprocess |
||||
|
||||
pwd = os.getcwd() |
||||
kaijudir = pwd+"/kaijudb" |
||||
tarfile = "kaiju_index_nr_euk.tgz" |
||||
url = "http://kaiju.binf.ku.dk/database/kaiju_index_nr_euk.tgz" |
||||
|
||||
subprocess.call(["mkdir",kaijudir], cwd=pwd) |
||||
subprocess.call(["curl","-LO",url], cwd=kaijudir) |
||||
subprocess.call(["tar","zxvf",tarfile],cwd=kaijudir) |
||||
subprocess.call(["rm","-f",tarfile], cwd=kaijudir) |
||||
|
@ -1,12 +0,0 @@
@@ -1,12 +0,0 @@
|
||||
#!/bin/bash |
||||
# |
||||
# Unpack the tgz files from/for kaiju steps |
||||
|
||||
kaijudir="${PWD}/kaijudb" |
||||
tarfile="kaiju_index_nr_euk.tgz" |
||||
|
||||
mkdir ${kaijudir} |
||||
curl -LO "http://kaiju.binf.ku.dk/database/${tarfile}" |
||||
tar xzf ${tarfile} |
||||
rm -f ${tarfile} |
||||
|
@ -1,57 +0,0 @@
@@ -1,57 +0,0 @@
|
||||
#!/usr/bin/python3 |
||||
import glob, os, re |
||||
import subprocess |
||||
from get_prefix import get_prefix |
||||
|
||||
|
||||
def pull_krona(): |
||||
# Download the krona image from quay.io so we can visualize the results from kaiju: |
||||
kronaurl = "quay.io/biocontainers/krona:2.7--pl5.22.0_1" |
||||
subprocess.call(["docker","pull",kronaurl]) |
||||
|
||||
|
||||
def visualize_krona(): |
||||
kaijudirname = "kaijudb" |
||||
kronaurl = "quay.io/biocontainers/krona:2.7--pl5.22.0_1" |
||||
cases = ["kaiju_out_krona", |
||||
"kaiju_out_krona.1percenttotal", |
||||
"kaiju_out_krona.1percentclassified"] |
||||
|
||||
for case in cases: |
||||
|
||||
suffix = case + ".summary" |
||||
htmlsuffix = case + ".html" |
||||
|
||||
filenames = glob.glob("*"+suffix) |
||||
|
||||
for filename in filenames: |
||||
|
||||
prefix = get_prefix(filename,suffix) |
||||
|
||||
htmlname = prefix + htmlsuffix |
||||
|
||||
if(os.path.isfile(htmlname)): |
||||
print("Skipping file %s, file exists."%(htmlname)) |
||||
else: |
||||
|
||||
datadir = "/data" |
||||
pwd = os.getcwd() |
||||
|
||||
cmd = ["docker","run"] |
||||
cmd += ["-v","%s:%s"%(pwd,datadir)] |
||||
cmd += [kronaurl] |
||||
cmd += ["ktImportText"] |
||||
cmd += ["-o","%s/%s"%(datadir,htmlname)] |
||||
cmd += ["%s/%s"%(datadir,filename)] |
||||
|
||||
print("Running this docker command from dir %s:"%(pwd)) |
||||
print(" ".join(cmd)) |
||||
print() |
||||
|
||||
subprocess.call(cmd, cwd=pwd) |
||||
|
||||
|
||||
if __name__=="__main__": |
||||
pull_krona() |
||||
visualize_krona() |
||||
|
@ -1,40 +0,0 @@
@@ -1,40 +0,0 @@
|
||||
#!/bin/bash |
||||
|
||||
kaijudir="${PWD}/kaijudb" |
||||
kronaurl="quay.io/biocontainers/krona:2.7--pl5.22.0_1" |
||||
|
||||
docker pull ${kronaurl} |
||||
|
||||
suffix="kaiju_out_krona" |
||||
for i in *${suffix}.summary |
||||
do |
||||
docker run \ |
||||
-v ${kaijudir}:/data \ |
||||
${kronaurl} \ |
||||
ktImportText \ |
||||
-o /data/${i}.${suffix}.html \ |
||||
/data/${i} |
||||
done |
||||
|
||||
suffix="kaiju_out_krona.1percenttotal" |
||||
for i in *${suffix}.summary |
||||
do |
||||
docker run \ |
||||
-v ${kaijudir}:/data \ |
||||
${kronaurl} \ |
||||
ktImportText \ |
||||
-o /data/${i}.${suffix}.html \ |
||||
/data/${i} |
||||
done |
||||
|
||||
suffix="kaiju_out_krona.1percentclassified" |
||||
for i in *${suffix}.summary |
||||
do |
||||
docker run \ |
||||
-v ${kaijudir}:/data \ |
||||
${kronaurl} \ |
||||
ktImportText \ |
||||
-o /data/${i}.${suffix}.html \ |
||||
/data/${i} |
||||
done |
||||
|
Loading…
Reference in new issue