Compare commits
49 Commits
reset-trav
...
master
Author | SHA1 | Date | |
---|---|---|---|
ebbba235a1 | |||
949119a0dd | |||
e7cde484d5 | |||
924b8a83f5 | |||
06dfc9c901 | |||
0b860534e2 | |||
9c909a2fb8 | |||
08a3250203 | |||
b381654c71 | |||
31c31544ba | |||
266a4d00ac | |||
95b79ef15f | |||
b2ab9ed8a2 | |||
7debeb24d6 | |||
c23f0656de | |||
064d98c6ea | |||
4cb9d64f25 | |||
38fa8a624e | |||
41e1a23009 | |||
58cfb0cb5a | |||
312621a9cd | |||
b8ba8ba860 | |||
1934a04bce | |||
249b9f05cd | |||
a2b2c0ddd4 | |||
ac9b351d4c | |||
2b8b7855d6 | |||
01a7647aae | |||
6515a67225 | |||
2d54c50e32 | |||
abd0fc8a6a | |||
beecef4b41 | |||
104d0fe868 | |||
9bbc217d32 | |||
86751c36a0 | |||
cd86326b8b | |||
7793f9cc32 | |||
6bb04d44d7 | |||
5b3c10d2dd | |||
47849200d0 | |||
c2de3e8567 | |||
3b86a9ebe2 | |||
f669ff6951 | |||
3c93c1b236 | |||
1095efc568 | |||
46108c379e | |||
e47cfe66c0 | |||
d3f295d7da | |||
e99286a4e0 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -5,3 +5,6 @@ vp/
|
|||||||
build/
|
build/
|
||||||
dist/
|
dist/
|
||||||
test/*.txt
|
test/*.txt
|
||||||
|
|
||||||
|
# mkdocs
|
||||||
|
site/
|
||||||
|
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
[submodule "mkdocs-material-dib"]
|
||||||
|
path = mkdocs-material-dib
|
||||||
|
url = https://github.com/dib-lab/mkdocs-material-dib.git
|
29
.travis.yml
29
.travis.yml
@@ -1,16 +1,22 @@
|
|||||||
# https://raw.githubusercontent.com/LiliC/travis-minikube/master/.travis.yml
|
# Modified from original:
|
||||||
|
# https://raw.githubusercontent.com/LiliC/travis-minikube/minikube-30-kube-1.12/.travis.yml
|
||||||
|
|
||||||
|
# byok8s and Snakemake both require Python,
|
||||||
|
# so we make this Travis CI test Python-based.
|
||||||
language: python
|
language: python
|
||||||
python:
|
python:
|
||||||
- "3.5"
|
|
||||||
- "3.6"
|
- "3.6"
|
||||||
|
|
||||||
|
# Running minikube via travis requires sudo
|
||||||
sudo: required
|
sudo: required
|
||||||
|
|
||||||
|
# We need the systemd for the kubeadm and it's default from 16.04+
|
||||||
|
dist: xenial
|
||||||
|
|
||||||
# This moves Kubernetes specific config files.
|
# This moves Kubernetes specific config files.
|
||||||
env:
|
env:
|
||||||
- CHANGE_MINIKUBE_NONE_USER=true
|
- CHANGE_MINIKUBE_NONE_USER=true
|
||||||
|
|
||||||
# command to install dependencies
|
|
||||||
install:
|
install:
|
||||||
# Install byok8s requirements (snakemake, python-kubernetes)
|
# Install byok8s requirements (snakemake, python-kubernetes)
|
||||||
- pip install -r requirements.txt
|
- pip install -r requirements.txt
|
||||||
@@ -20,16 +26,20 @@ install:
|
|||||||
before_script:
|
before_script:
|
||||||
# Do everything from test/
|
# Do everything from test/
|
||||||
- cd test
|
- cd test
|
||||||
|
# Make root mounted as rshared to fix kube-dns issues.
|
||||||
|
- sudo mount --make-rshared /
|
||||||
# Download kubectl, which is a requirement for using minikube.
|
# Download kubectl, which is a requirement for using minikube.
|
||||||
- curl -Lo kubectl https://storage.googleapis.com/kubernetes-release/release/v1.9.0/bin/linux/amd64/kubectl && chmod +x kubectl && sudo mv kubectl /usr/local/bin/
|
- curl -Lo kubectl https://storage.googleapis.com/kubernetes-release/release/v1.12.0/bin/linux/amd64/kubectl && chmod +x kubectl && sudo mv kubectl /usr/local/bin/
|
||||||
# Download minikube.
|
# Download minikube.
|
||||||
- curl -Lo minikube https://storage.googleapis.com/minikube/releases/v0.25.2/minikube-linux-amd64 && chmod +x minikube && sudo mv minikube /usr/local/bin/
|
- curl -Lo minikube https://storage.googleapis.com/minikube/releases/v0.30.0/minikube-linux-amd64 && chmod +x minikube && sudo mv minikube /usr/local/bin/
|
||||||
- sudo minikube start --vm-driver=none --kubernetes-version=v1.9.0
|
- sudo minikube start --vm-driver=none --bootstrapper=kubeadm --kubernetes-version=v1.12.0
|
||||||
# Fix the kubectl context, as it's often stale.
|
# Fix the kubectl context, as it's often stale.
|
||||||
- minikube update-context
|
- minikube update-context
|
||||||
# Wait for Kubernetes to be up and ready.
|
# Wait for Kubernetes to be up and ready.
|
||||||
- JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'; until kubectl get nodes -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do sleep 1; done
|
- JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'; until kubectl get nodes -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do sleep 1; done
|
||||||
|
|
||||||
|
################
|
||||||
|
## easy test
|
||||||
script:
|
script:
|
||||||
- kubectl cluster-info
|
- kubectl cluster-info
|
||||||
# Verify kube-addon-manager.
|
# Verify kube-addon-manager.
|
||||||
@@ -37,5 +47,8 @@ script:
|
|||||||
- JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'; until kubectl -n kube-system get pods -lcomponent=kube-addon-manager -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do sleep 1;echo "waiting for kube-addon-manager to be available"; kubectl get pods --all-namespaces; done
|
- JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'; until kubectl -n kube-system get pods -lcomponent=kube-addon-manager -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do sleep 1;echo "waiting for kube-addon-manager to be available"; kubectl get pods --all-namespaces; done
|
||||||
# Wait for kube-dns to be ready.
|
# Wait for kube-dns to be ready.
|
||||||
- JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'; until kubectl -n kube-system get pods -lk8s-app=kube-dns -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do sleep 1;echo "waiting for kube-dns to be available"; kubectl get pods --all-namespaces; done
|
- JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'; until kubectl -n kube-system get pods -lk8s-app=kube-dns -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do sleep 1;echo "waiting for kube-dns to be available"; kubectl get pods --all-namespaces; done
|
||||||
# Create example Redis deployment on Kubernetes.
|
|
||||||
- byok8s workflow-alpha params-blue
|
################
|
||||||
|
## hard test
|
||||||
|
# run byok8s workflow on the k8s cluster
|
||||||
|
- byok8s --s3-bucket=cmr-0123 -f workflow-alpha params-blue
|
||||||
|
261
README.md
261
README.md
@@ -2,187 +2,124 @@
|
|||||||
|
|
||||||
[](https://travis-ci.org/charlesreid1/2019-snakemake-byok8s)
|
[](https://travis-ci.org/charlesreid1/2019-snakemake-byok8s)
|
||||||
[](https://github.com/charlesreid1/2019-snakemake-byok8s/blob/master/LICENSE)
|
[](https://github.com/charlesreid1/2019-snakemake-byok8s/blob/master/LICENSE)
|
||||||
|

|
||||||
|

|
||||||
|

|
||||||
|

|
||||||
|
|
||||||
|
# Overview
|
||||||
|
|
||||||
This is an example of a Snakemake workflow that:
|
This is an example of a Snakemake workflow that:
|
||||||
|
|
||||||
- is a command line utility
|
- is a **command line utility** called `byok8s`
|
||||||
- is bundled as a Python package
|
- is bundled as an installable **Python package**
|
||||||
- is designed to run on a Kubernetes cluster
|
- is designed to run on a **Kubernetes (k8s) cluster**
|
||||||
|
- can be **tested with Travis CI** (and/or locally) using [minikube](https://github.com/kubernetes/minikube)
|
||||||
|
|
||||||
Snakemake functionality is provided through
|
## What is byok8s?
|
||||||
a command line tool called `byok8s`, so that
|
|
||||||
it allows you to do this:
|
byok8s = Bring Your Own Kubernetes (cluster)
|
||||||
|
|
||||||
|
k8s = kubernetes
|
||||||
|
|
||||||
|
byok8s is a command line utility that launches
|
||||||
|
a Snakemake workflow on an existing Kubernetes
|
||||||
|
cluster. This allows you to do something
|
||||||
|
like this (also see the [Installation](docs/installing.md)
|
||||||
|
and [Quickstart](docs/quickstart.md) guides in the
|
||||||
|
documentation):
|
||||||
|
|
||||||
```
|
```
|
||||||
# install minikube so you can
|
# Install byok8s
|
||||||
# create a (virtual) k8s cluster
|
python setup.py build install
|
||||||
|
|
||||||
scripts/install_minikube.sh
|
# Create virtual k8s cluster
|
||||||
|
|
||||||
# move to working directory
|
|
||||||
cd test
|
|
||||||
|
|
||||||
# deploy (virtual) k8s cluster
|
|
||||||
minikube start
|
minikube start
|
||||||
|
|
||||||
# run the workflow
|
# Run the workflow on the k8s cluster
|
||||||
byok8s -w my-workflowfile -p my-paramsfile
|
cd /path/to/workflow/
|
||||||
|
byok8s my-workflowfile my-paramsfile --s3-bucket=my-bucket
|
||||||
|
|
||||||
# clean up (virtual) k8s cluster
|
# Clean up the virtual k8s cluster
|
||||||
minikube stop
|
minikube stop
|
||||||
```
|
```
|
||||||
|
|
||||||
Snakemake workflows are run on a Kubernetes (k8s)
|
## Getting Up and Running
|
||||||
cluster. The approach is for the user to provide
|
|
||||||
their own Kubernetes cluster (byok8s = Bring Your
|
|
||||||
Own Kubernetes).
|
|
||||||
|
|
||||||
The example above uses [`minikube`](https://github.com/kubernetes/minikube)
|
See the [Quickstart Guide](docs/quickstart.md) to get up and
|
||||||
to make a virtual k8s cluster, useful for testing.
|
running with byok8s.
|
||||||
|
|
||||||
For real workflow,s your options for
|
## How does byok8s work?
|
||||||
kubernetes clusters are cloud providers:
|
|
||||||
|
The command line utility requires the user to provide
|
||||||
|
three input files:
|
||||||
|
|
||||||
|
* A snakemake workflow, via a `Snakefile`
|
||||||
|
* A workflow configuration file (JSON)
|
||||||
|
* A workflow parameters file (JSON)
|
||||||
|
|
||||||
|
Additionally, the user must create the following resources:
|
||||||
|
|
||||||
|
* A kubernetes cluster up and running
|
||||||
|
* An S3 bucket (and AWS credentials to read/write)
|
||||||
|
|
||||||
|
A sample Snakefile, workflow config file, and workflow
|
||||||
|
params file are provided in the `test/` directory.
|
||||||
|
|
||||||
|
The workflow config file specifies which workflow targets
|
||||||
|
and input files to use.
|
||||||
|
|
||||||
|
The workflow parameters file specifies which parameters to
|
||||||
|
use for the workflow steps.
|
||||||
|
|
||||||
|
## Why S3 buckets?
|
||||||
|
|
||||||
|
AWS credentials and an S3 bucket is required to run workflows because
|
||||||
|
of restrictions on file I/O on nodes in a kubernes cluster. The Snakemake
|
||||||
|
workflows use AWS S3 buckets as remote providers for the Kubernetes nodes,
|
||||||
|
but this can be modified to any others that Snakemake supports.
|
||||||
|
|
||||||
|
AWS credentials are set with the two environment variables:
|
||||||
|
|
||||||
|
```
|
||||||
|
AWS_ACCESS_KEY_ID
|
||||||
|
AWS_SECRET_ACCESS_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
These are passed into the Kubernetes cluster by byok8s and Snakemake.
|
||||||
|
|
||||||
|
## Kubernetes and Minikube
|
||||||
|
|
||||||
|
[Kubernetes](https://kubernetes.io/) is a technology that utilizes Docker
|
||||||
|
container to orchestrate a cluster of compute nodes. These compute nodes are
|
||||||
|
usually real compute nodes requested and managed via a cloud provider, like AWS
|
||||||
|
or Google Cloud.
|
||||||
|
|
||||||
|
But the compute nodes can also be virtual, which is where
|
||||||
|
[minikube](https://github.com/kubernetes/minikube) comes in. It creates a
|
||||||
|
kubernetes cluster that is entirely local and virtual, which makes testing
|
||||||
|
easy. See the [byok8s Minikube Guide](docs/kubernetes_minikube.md) for details
|
||||||
|
about how to use minikube with byok8s.
|
||||||
|
|
||||||
|
The Travis CI tests also utilize minikube to run test workflows. See [byok8s
|
||||||
|
Travis Tests](docs/travis_tests.md) for more information.
|
||||||
|
|
||||||
|
## Cloud Providers
|
||||||
|
|
||||||
|
For real workflows, your options for
|
||||||
|
kubernetes clusters are cloud providers.
|
||||||
|
We have guides for the following:
|
||||||
|
|
||||||
- AWS EKS (Elastic Container Service)
|
- AWS EKS (Elastic Container Service)
|
||||||
- GCP GKE (Google Kuberntes Engine)
|
- GCP GKE (Google Kuberntes Engine)
|
||||||
- Digital Ocean Kubernetes service
|
- Digital Ocean Kubernetes service
|
||||||
- etc...
|
|
||||||
|
|
||||||
Travis CI tests utilize minikube.
|
# Kubernetes + byok8s: In Practice
|
||||||
|
|
||||||
|
| Cloud Provider | Kubernetes Service | Guide | State |
|
||||||
# Quickstart
|
|-----------------------------|---------------------------------|-------------------------------------------------|------------|
|
||||||
|
| Minikube (on AWS EC2) | Minikube | [byok8s Minikube Guide](docs/kubernetes_minikube.md) | Finished |
|
||||||
This runs through the installation and usage
|
| Google Cloud Platform (GCP) | Google Container Engine (GKE) | [byok8s GCP GKE Guide](docs/kubernetes_gcp.md) | Finished |
|
||||||
of `2019-snakemake-byok8s`.
|
| Amazon Web Services (AWS) | Elastic Container Service (EKS) | [byok8s AWS EKS Guide](docs/kubernetes_aws.md) | Unfinished |
|
||||||
|
| Digital Ocean (DO) | DO Kubernetes (DOK) | [byok8s DO DOK Guide](docs/kubernetes_dok.md) | Unfinished |
|
||||||
Step 1: Set up Kubernetes cluster with `minikube`.
|
|
||||||
|
|
||||||
Step 2: Install `byok8s`.
|
|
||||||
|
|
||||||
Step 3: Run the `byok8s` workflow using the Kubernetes cluster.
|
|
||||||
|
|
||||||
Step 4: Tear down Kubernetes cluster with `minikube`.
|
|
||||||
|
|
||||||
|
|
||||||
## Step 1: Set Up VirtualKubernetes Cluster
|
|
||||||
|
|
||||||
### Installing Minikube
|
|
||||||
|
|
||||||
For the purposes of the quickstart, we will walk
|
|
||||||
through how to set up a local, virtual Kubernetes
|
|
||||||
cluster using `minikube`.
|
|
||||||
|
|
||||||
Start by installing minikube:
|
|
||||||
|
|
||||||
```
|
|
||||||
scripts/install_minicube.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
Once it is installed, you can start up a kubernetes cluster
|
|
||||||
with minikube using the following command:
|
|
||||||
|
|
||||||
```
|
|
||||||
minikube start
|
|
||||||
```
|
|
||||||
|
|
||||||
NOTE: If you are running on AWS,
|
|
||||||
|
|
||||||
```
|
|
||||||
minikube config set vm-driver none
|
|
||||||
```
|
|
||||||
|
|
||||||
to set the the vm driver to none and use native Docker to run stuff.
|
|
||||||
|
|
||||||
## Step 2: Install byok8s
|
|
||||||
|
|
||||||
Start by setting up a python virtual environment,
|
|
||||||
and install the required packages into the
|
|
||||||
virtual environment:
|
|
||||||
|
|
||||||
```
|
|
||||||
pip install -r requirements.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
This installs snakemake and kubernetes Python
|
|
||||||
modules. Now install the `byok8s` command line
|
|
||||||
tool:
|
|
||||||
|
|
||||||
```
|
|
||||||
python setup.py build install
|
|
||||||
```
|
|
||||||
|
|
||||||
Now you can run:
|
|
||||||
|
|
||||||
```
|
|
||||||
which byok8s
|
|
||||||
```
|
|
||||||
|
|
||||||
and you should see `byok8s` in your virtual
|
|
||||||
environment's `bin/` directory.
|
|
||||||
|
|
||||||
This command line utility will expect a kubernetes
|
|
||||||
cluster to be set up before it is run.
|
|
||||||
|
|
||||||
Setting up a kubernetes cluster will create...
|
|
||||||
(fill in more info here)...
|
|
||||||
|
|
||||||
Snakemake will automatically create the pods
|
|
||||||
in the cluster, so you just need to allocate
|
|
||||||
a kubernetes cluster.
|
|
||||||
|
|
||||||
|
|
||||||
## Step 3: Run byok8s
|
|
||||||
|
|
||||||
Now you can run the workflow with the `byok8s` command.
|
|
||||||
This submits the Snakemake workflow jobs to the Kubernetes
|
|
||||||
cluster that minikube created.
|
|
||||||
|
|
||||||
(NOTE: the command line utility must be run
|
|
||||||
from the same directory as the kubernetes
|
|
||||||
cluster was created from, otherwise Snakemake
|
|
||||||
won't be able to find the kubernetes cluster.)
|
|
||||||
|
|
||||||
(Would be a good idea to instead specify paths
|
|
||||||
for workflow config and param files,
|
|
||||||
or have a built-in set of params and configs.)
|
|
||||||
|
|
||||||
Run the alpha workflow with blue params:
|
|
||||||
|
|
||||||
```
|
|
||||||
byok8s -w workflow-alpha -p params-blue
|
|
||||||
```
|
|
||||||
|
|
||||||
Run the alpha workflow with red params:
|
|
||||||
|
|
||||||
```
|
|
||||||
byok8s -w workflow-alpha -p params-red
|
|
||||||
```
|
|
||||||
|
|
||||||
Run the gamma workflow with red params, &c:
|
|
||||||
|
|
||||||
```
|
|
||||||
byok8s -w workflow-gamma -p params-red
|
|
||||||
```
|
|
||||||
|
|
||||||
(NOTE: May want to let the user specify
|
|
||||||
input and output directories with flags.)
|
|
||||||
|
|
||||||
Make reasonable assumptions:
|
|
||||||
|
|
||||||
- if no input dir specified, use cwd
|
|
||||||
- if no output dir specified, make one w timestamp and workflow params
|
|
||||||
- don't rely on positional args, makes it harder to translate python code/command line calls
|
|
||||||
|
|
||||||
|
|
||||||
## Step 4: Tear Down Kubernetes Cluster
|
|
||||||
|
|
||||||
The last step once the workflow has been finished,
|
|
||||||
is to tear down the kubernetes cluster. The virtual
|
|
||||||
kubernetes cluster created by minikube can be torn
|
|
||||||
down with the following command:
|
|
||||||
|
|
||||||
```
|
|
||||||
minikube stop
|
|
||||||
```
|
|
||||||
|
|
||||||
|
@@ -1,17 +0,0 @@
|
|||||||
name = config['name']
|
|
||||||
|
|
||||||
rule rulename1:
|
|
||||||
input:
|
|
||||||
"alpha.txt"
|
|
||||||
|
|
||||||
rule target1:
|
|
||||||
output:
|
|
||||||
"alpha.txt"
|
|
||||||
shell:
|
|
||||||
"echo alpha {name} > {output}"
|
|
||||||
|
|
||||||
rule target2:
|
|
||||||
output:
|
|
||||||
"gamma.txt"
|
|
||||||
shell:
|
|
||||||
"echo gamma {name} > {output}"
|
|
117
cli/command.py
117
cli/command.py
@@ -7,82 +7,117 @@ import snakemake
|
|||||||
import sys
|
import sys
|
||||||
import pprint
|
import pprint
|
||||||
import json
|
import json
|
||||||
|
import subprocess
|
||||||
|
|
||||||
from . import _program
|
from . import _program
|
||||||
|
|
||||||
|
|
||||||
thisdir = os.path.abspath(os.path.dirname(__file__))
|
thisdir = os.path.abspath(os.path.dirname(__file__))
|
||||||
parentdir = os.path.join(thisdir,'..')
|
|
||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
|
|
||||||
def main(sysargs = sys.argv[1:]):
|
def main(sysargs = sys.argv[1:]):
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(prog = _program, description='byok8s: run snakemake workflows on your own kubernetes cluster', usage='''byok8s -w <workflow> -p <parameters> [<target>]
|
descr = ''
|
||||||
|
usg = '''byok8s [--FLAGS] <workflowfile> <paramsfile> [<target>]
|
||||||
|
|
||||||
byok8s: run snakemake workflows on your own kubernetes cluster, using the given workflow name & parameters file.
|
byok8s: run snakemake workflows on your own kubernetes
|
||||||
|
cluster, using the given workflow name & parameters file.
|
||||||
|
|
||||||
''')
|
byok8s requires an S3 bucket be used for file I/O. Set
|
||||||
|
AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY env vars.
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog = _program,
|
||||||
|
description=descr,
|
||||||
|
usage = usg
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument('workflowfile')
|
parser.add_argument('workflowfile')
|
||||||
parser.add_argument('paramsfile')
|
parser.add_argument('paramsfile')
|
||||||
|
|
||||||
parser.add_argument('-k', '--kubernetes-namespace')
|
parser.add_argument('-k', '--k8s-namespace',default='default', help='Namespace of Kubernetes cluster, if not "default"')
|
||||||
parser.add_argument('-n', '--dry-run', action='store_true')
|
parser.add_argument('-s', '--snakefile', default='Snakefile', help='Relative path to Snakemake Snakefile, if not "Snakefile"')
|
||||||
parser.add_argument('-f', '--force', action='store_true')
|
parser.add_argument('-b', '--s3-bucket', help='Name of S3 bucket to use for Snakemake file I/O (REQUIRED)')
|
||||||
|
parser.add_argument('-n', '--dry-run', action='store_true', help='Do a dry run of the workflow commands (no commands executed)')
|
||||||
|
parser.add_argument('-f', '--force', action='store_true', help='Force Snakemake rules to be re-run')
|
||||||
|
# NOTE: You MUST use S3 buckets, GCS buckets are not supported.
|
||||||
|
# That's because GCP requires credentials to be stored in a file,
|
||||||
|
# and we can only pass environment variables into k8s containers.
|
||||||
|
|
||||||
args = parser.parse_args(sysargs)
|
args = parser.parse_args(sysargs)
|
||||||
|
|
||||||
# first, find the Snakefile
|
# find the Snakefile
|
||||||
snakefile_this = os.path.join(thisdir,"Snakefile")
|
s1 = os.path.join(cwd,args.snakefile)
|
||||||
if os.path.exists(snakefile_this):
|
if os.path.isfile(s1):
|
||||||
snakefile = snakefile_this
|
# user has provided a relative path
|
||||||
|
# to a Snakefile. top priority.
|
||||||
|
snakefile = os.path.join(cwd,args.snakefile)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
msg = 'Error: cannot find Snakefile at any of the following locations:\n'
|
msg = 'Error: cannot find Snakefile at {}\n'.format(s1)
|
||||||
msg += '{}\n'.format(snakefile_this)
|
|
||||||
sys.stderr.write(msg)
|
sys.stderr.write(msg)
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
|
|
||||||
# next, find the workflow config file
|
# find the workflow config file
|
||||||
workflowfile = None
|
|
||||||
w1 = os.path.join(cwd,args.workflowfile)
|
w1 = os.path.join(cwd,args.workflowfile)
|
||||||
w2 = os.path.join(cwd,args.workflowfile+'.json')
|
w2 = os.path.join(cwd,args.workflowfile+'.json')
|
||||||
# NOTE:
|
# TODO: yaml
|
||||||
# handling yaml would be nice
|
if os.path.isfile(w1):
|
||||||
if os.path.exists(w1) and not os.path.isdir(w1):
|
# user has provided the full filename
|
||||||
workflowfile = w1
|
workflowfile = w1
|
||||||
elif os.path.exists(w2) and not os.path.isdir(w2):
|
elif os.path.isfile(w2):
|
||||||
|
# user has provided the prefix of the
|
||||||
|
# json filename
|
||||||
workflowfile = w2
|
workflowfile = w2
|
||||||
|
else:
|
||||||
if not workflowfile:
|
msg = ['Error: cannot find workflowfile (workflow configuration file) at any of the following locations:\n']
|
||||||
msg = 'Error: cannot find workflowfile {} or {} '.format(w1,w2)
|
msg += ['{}'.format(j) for j in [w1,w2]]
|
||||||
msg += 'in directory {}\n'.format(cwd)
|
|
||||||
sys.stderr.write(msg)
|
sys.stderr.write(msg)
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
|
|
||||||
# next, find the workflow params file
|
# find the workflow params file
|
||||||
paramsfile = None
|
|
||||||
p1 = os.path.join(cwd,args.paramsfile)
|
p1 = os.path.join(cwd,args.paramsfile)
|
||||||
p2 = os.path.join(cwd,args.paramsfile+'.json')
|
p2 = os.path.join(cwd,args.paramsfile+'.json')
|
||||||
if os.path.exists(p1) and not os.path.isdir(p1):
|
# TODO: yaml
|
||||||
|
if os.path.isfile(p1):
|
||||||
paramsfile = p1
|
paramsfile = p1
|
||||||
elif os.path.exists(p2) and not os.path.isdir(p2):
|
elif os.path.isfile(p2):
|
||||||
paramsfile = p2
|
paramsfile = p2
|
||||||
|
else:
|
||||||
if not paramsfile:
|
msg = ['Error: cannot find paramsfile (workflow parameters file) at any of the following locations:\n']
|
||||||
msg = 'Error: cannot find paramsfile {} or {} '.format(p1,p2)
|
msg += ['{}'.format(j) for j in [p1,p2]]
|
||||||
msg += 'in directory {}\n'.format(cwd)
|
|
||||||
sys.stderr.write(msg)
|
sys.stderr.write(msg)
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
|
|
||||||
with open(workflowfile, 'rt') as fp:
|
with open(paramsfile,'r') as f:
|
||||||
|
config = json.load(f)
|
||||||
|
|
||||||
|
with open(workflowfile, 'r') as fp:
|
||||||
workflow_info = json.load(fp)
|
workflow_info = json.load(fp)
|
||||||
|
|
||||||
# get the kubernetes namespace
|
# get the kubernetes namespace
|
||||||
kube_ns = 'default'
|
kube_ns = 'default'
|
||||||
if args.kubernetes_namespace is not None and len(args.kubernetes_namespace)>0:
|
if args.k8s_namespace is not None and len(args.k8s_namespace)>0:
|
||||||
kube_ns = args.kubernetes_namespace
|
kube_ns = args.k8s_namespace
|
||||||
|
|
||||||
|
# verify the user has set the AWS env variables
|
||||||
|
if not (os.environ['AWS_ACCESS_KEY_ID'] and os.environ['AWS_SECRET_ACCESS_KEY']):
|
||||||
|
msg = 'Error: the environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY must be set to allow the k8s cluster to access an S3 bucket for i/o.'
|
||||||
|
sys.stderr.write(msg)
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
# verify the user has provided a bucket name
|
||||||
|
if not args.s3_bucket:
|
||||||
|
msg = 'Error: no S3 bucket specified with --s3-bucket. This must be set to allow the k8s cluster to access an S3 bucket for i/o.'
|
||||||
|
sys.stderr.write(msg)
|
||||||
|
sys.exit(-1)
|
||||||
|
else:
|
||||||
|
mah_bukkit = args.s3_bucket
|
||||||
|
|
||||||
|
|
||||||
target = workflow_info['workflow_target']
|
target = workflow_info['workflow_target']
|
||||||
config = dict()
|
|
||||||
|
|
||||||
print('--------')
|
print('--------')
|
||||||
print('details!')
|
print('details!')
|
||||||
@@ -93,14 +128,22 @@ byok8s: run snakemake workflows on your own kubernetes cluster, using the given
|
|||||||
print('\tk8s namespace: {}'.format(kube_ns))
|
print('\tk8s namespace: {}'.format(kube_ns))
|
||||||
print('--------')
|
print('--------')
|
||||||
|
|
||||||
|
# Note: we comment out configfile=paramsfile below,
|
||||||
|
# because we have problems passing files into k8s clusters.
|
||||||
|
|
||||||
# run byok8s!!
|
# run byok8s!!
|
||||||
status = snakemake.snakemake(snakefile, configfile=paramsfile,
|
status = snakemake.snakemake(snakefile,
|
||||||
|
#configfile=paramsfile,
|
||||||
|
assume_shared_fs=False,
|
||||||
|
default_remote_provider='S3',
|
||||||
|
default_remote_prefix=mah_bukkit,
|
||||||
|
kubernetes_envvars=['AWS_ACCESS_KEY_ID','AWS_SECRET_ACCESS_KEY'],
|
||||||
targets=[target],
|
targets=[target],
|
||||||
printshellcmds=True,
|
printshellcmds=True,
|
||||||
verbose = True,
|
verbose = True,
|
||||||
dryrun=args.dry_run,
|
dryrun=args.dry_run,
|
||||||
forceall=args.force,
|
forceall=args.force,
|
||||||
#kubernetes=kube_ns,
|
kubernetes=kube_ns,
|
||||||
config=config)
|
config=config)
|
||||||
|
|
||||||
if status: # translate "success" into shell exit code of 0
|
if status: # translate "success" into shell exit code of 0
|
||||||
|
11
docs/css/custom.css
Normal file
11
docs/css/custom.css
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
.md-typeset h1 { font-weight: 600; }
|
||||||
|
.md-typeset h2 { font-weight: 600; }
|
||||||
|
.md-typeset h3 { font-weight: 600; }
|
||||||
|
.md-typeset h4 { font-weight: 600; }
|
||||||
|
|
||||||
|
body {
|
||||||
|
background-color: #FAFAFA;
|
||||||
|
}
|
||||||
|
div.body {
|
||||||
|
background-color: #FAFAFA;
|
||||||
|
}
|
38
docs/images/ship.svg
Normal file
38
docs/images/ship.svg
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||||
|
<!-- Generator: Adobe Illustrator 16.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
|
||||||
|
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" id="Capa_1" x="0px" y="0px" width="512px" height="512px" viewBox="0 0 612 612" style="enable-background:new 0 0 612 612;" xml:space="preserve">
|
||||||
|
<g>
|
||||||
|
<path d="M612,342.869l-72.243,150.559c-9.036,17.516-27.098,28.521-46.808,28.521H66.974c-7.85,0-12.942-8.277-9.402-15.285 l0.179-0.355c5.778-11.439,2.35-25.383-8.074-32.836l-0.589-0.422c-24.197-17.305-38.554-45.225-38.554-74.973v-34.141h379.228 v-0.211c0-11.52,9.338-20.857,20.856-20.857H612L612,342.869z M368.693,216.46h-73.738c-5.818,0-10.534,4.716-10.534,10.534 v115.875c0,5.818,4.716,10.535,10.534,10.535h73.738c5.817,0,10.534-4.717,10.534-10.535V226.994 C379.228,221.176,374.511,216.46,368.693,216.46z M495.102,258.596h-84.272c-5.817,0-10.534,4.716-10.534,10.534v42.135 c0,5.818,4.717,10.535,10.534,10.535h84.272c5.818,0,10.534-4.717,10.534-10.535V269.13 C505.636,263.312,500.92,258.596,495.102,258.596z M168.545,353.402h84.272c5.818,0,10.534-4.717,10.534-10.533v-84.273 c0-5.818-4.716-10.534-10.534-10.534h-84.272c-5.818,0-10.534,4.716-10.534,10.534v84.273 C158.012,348.686,162.728,353.402,168.545,353.402z M163.155,195.391l-26.211,21.069v136.942H31.602V216.46H0v-21.069h73.738 v-30.546H46.506v-12.296h27.232V90.051h10.534v62.498h27.233v12.296H84.272v30.546H163.155z M117.913,282.062h-34.28v31.457h34.28 V282.062z M117.913,231.651h-34.28v31.458h34.28V231.651z" fill="#FFFFFF"/>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 1.8 KiB |
125
docs/index.md
Normal file
125
docs/index.md
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
# 2019-snakemake-byok8s
|
||||||
|
|
||||||
|
[](https://travis-ci.org/charlesreid1/2019-snakemake-byok8s)
|
||||||
|
[](https://github.com/charlesreid1/2019-snakemake-byok8s/blob/master/LICENSE)
|
||||||
|

|
||||||
|

|
||||||
|

|
||||||
|

|
||||||
|
|
||||||
|
# Overview
|
||||||
|
|
||||||
|
This is an example of a Snakemake workflow that:
|
||||||
|
|
||||||
|
- is a **command line utility** called `byok8s`
|
||||||
|
- is bundled as an installable **Python package**
|
||||||
|
- is designed to run on a **Kubernetes (k8s) cluster**
|
||||||
|
- can be **tested with Travis CI** (and/or locally) using [minikube](https://github.com/kubernetes/minikube)
|
||||||
|
|
||||||
|
## What is byok8s?
|
||||||
|
|
||||||
|
byok8s = Bring Your Own Kubernetes (cluster)
|
||||||
|
|
||||||
|
k8s = kubernetes
|
||||||
|
|
||||||
|
byok8s is a command line utility that launches
|
||||||
|
a Snakemake workflow on an existing Kubernetes
|
||||||
|
cluster. This allows you to do something
|
||||||
|
like this (also see the [Installation](installing.md)
|
||||||
|
and [Quickstart](quickstart.md) guides in the
|
||||||
|
documentation):
|
||||||
|
|
||||||
|
```
|
||||||
|
# Install byok8s
|
||||||
|
python setup.py build install
|
||||||
|
|
||||||
|
# Create virtual k8s cluster
|
||||||
|
minikube start
|
||||||
|
|
||||||
|
# Run the workflow on the k8s cluster
|
||||||
|
cd /path/to/workflow/
|
||||||
|
byok8s my-workflowfile my-paramsfile --s3-bucket=my-bucket
|
||||||
|
|
||||||
|
# Clean up the virtual k8s cluster
|
||||||
|
minikube stop
|
||||||
|
```
|
||||||
|
|
||||||
|
## Getting Up and Running
|
||||||
|
|
||||||
|
See the [Quickstart Guide](quickstart.md) to get up and
|
||||||
|
running with byok8s.
|
||||||
|
|
||||||
|
## How does byok8s work?
|
||||||
|
|
||||||
|
The command line utility requires the user to provide
|
||||||
|
three input files:
|
||||||
|
|
||||||
|
* A snakemake workflow, via a `Snakefile`
|
||||||
|
* A workflow configuration file (JSON)
|
||||||
|
* A workflow parameters file (JSON)
|
||||||
|
|
||||||
|
Additionally, the user must create the following resources:
|
||||||
|
|
||||||
|
* A kubernetes cluster up and running
|
||||||
|
* An S3 bucket (and AWS credentials to read/write)
|
||||||
|
|
||||||
|
A sample Snakefile, workflow config file, and workflow
|
||||||
|
params file are provided in the `test/` directory.
|
||||||
|
|
||||||
|
The workflow config file specifies which workflow targets
|
||||||
|
and input files to use.
|
||||||
|
|
||||||
|
The workflow parameters file specifies which parameters to
|
||||||
|
use for the workflow steps.
|
||||||
|
|
||||||
|
## Why S3 buckets?
|
||||||
|
|
||||||
|
AWS credentials and an S3 bucket is required to run workflows because
|
||||||
|
of restrictions on file I/O on nodes in a kubernes cluster. The Snakemake
|
||||||
|
workflows use AWS S3 buckets as remote providers for the Kubernetes nodes,
|
||||||
|
but this can be modified to any others that Snakemake supports.
|
||||||
|
|
||||||
|
AWS credentials are set with the two environment variables:
|
||||||
|
|
||||||
|
```
|
||||||
|
AWS_ACCESS_KEY_ID
|
||||||
|
AWS_SECRET_ACCESS_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
These are passed into the Kubernetes cluster by byok8s and Snakemake.
|
||||||
|
|
||||||
|
## Kubernetes and Minikube
|
||||||
|
|
||||||
|
[Kubernetes](https://kubernetes.io/) is a technology that utilizes Docker
|
||||||
|
container to orchestrate a cluster of compute nodes. These compute nodes are
|
||||||
|
usually real compute nodes requested and managed via a cloud provider, like AWS
|
||||||
|
or Google Cloud.
|
||||||
|
|
||||||
|
But the compute nodes can also be virtual, which is where
|
||||||
|
[minikube](https://github.com/kubernetes/minikube) comes in. It creates a
|
||||||
|
kubernetes cluster that is entirely local and virtual, which makes testing
|
||||||
|
easy. See the [byok8s Minikube Guide](kubernetes_minikube.md) for details
|
||||||
|
about how to use minikube with byok8s.
|
||||||
|
|
||||||
|
The Travis CI tests also utilize minikube to run test workflows. See [byok8s
|
||||||
|
Travis Tests](travis_tests.md) for more information.
|
||||||
|
|
||||||
|
## Cloud Providers
|
||||||
|
|
||||||
|
For real workflows, your options for
|
||||||
|
kubernetes clusters are cloud providers.
|
||||||
|
We have guides for the following:
|
||||||
|
|
||||||
|
- AWS EKS (Elastic Container Service)
|
||||||
|
- GCP GKE (Google Kuberntes Engine)
|
||||||
|
- Digital Ocean Kubernetes service
|
||||||
|
|
||||||
|
# Kubernetes + byok8s: In Practice
|
||||||
|
|
||||||
|
| Cloud Provider | Kubernetes Service | Guide | State |
|
||||||
|
|-----------------------------|---------------------------------|-------------------------------------------------|------------|
|
||||||
|
| Minikube (on AWS EC2) | Minikube | [byok8s Minikube Guide](kubernetes_minikube.md) | Finished |
|
||||||
|
| Google Cloud Platform (GCP) | Google Container Engine (GKE) | [byok8s GCP GKE Guide](kubernetes_gcp.md) | Finished |
|
||||||
|
| Amazon Web Services (AWS) | Elastic Container Service (EKS) | [byok8s AWS EKS Guide](kubernetes_aws.md) | Unfinished |
|
||||||
|
| Digital Ocean (DO) | DO Kubernetes (DOK) | [byok8s DO DOK Guide](kubernetes_dok.md) | Unfinished |
|
||||||
|
|
109
docs/installing.md
Normal file
109
docs/installing.md
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
# Installing byok8s
|
||||||
|
|
||||||
|
byok8s requires pieces of prerequisite software:
|
||||||
|
|
||||||
|
- python (conda)
|
||||||
|
- virtualenv (optional)
|
||||||
|
|
||||||
|
Additionally, if you are planning to run byok8s on
|
||||||
|
a local virtual kubernetes cluster, you must install:
|
||||||
|
|
||||||
|
- minikube
|
||||||
|
|
||||||
|
Otherwise, if you are planning on running byok8s on remote
|
||||||
|
kubernetes clusters provided by cloud providers
|
||||||
|
or etc., you must install:
|
||||||
|
|
||||||
|
- kubernetes, ***OR***
|
||||||
|
- a cloud provider command line tool (`gcloud`, `aws`)
|
||||||
|
|
||||||
|
## Installing Python
|
||||||
|
|
||||||
|
We recommend installing pyenv and using pyenv
|
||||||
|
to install miniconda:
|
||||||
|
|
||||||
|
```plain
|
||||||
|
curl https://pyenv.run | bash
|
||||||
|
```
|
||||||
|
|
||||||
|
Restart your shell and install miniconda:
|
||||||
|
|
||||||
|
```plain
|
||||||
|
pyenv update
|
||||||
|
pyenv install miniconda3-4.3.30
|
||||||
|
pyenv global miniconda3-4.3.30
|
||||||
|
```
|
||||||
|
|
||||||
|
## Installing virtualenv
|
||||||
|
|
||||||
|
You will need the virtualenv package to
|
||||||
|
set up a virtual environment:
|
||||||
|
|
||||||
|
```plain
|
||||||
|
pip install virtualenv
|
||||||
|
```
|
||||||
|
|
||||||
|
## Installing minikube
|
||||||
|
|
||||||
|
This step is only required if you plan to run byok8s
|
||||||
|
kubernetes workflows locally on a virtual kubernetes
|
||||||
|
cluster (i.e., testing mode).
|
||||||
|
|
||||||
|
Install the 64-bit Linux version of minikube, or visit the
|
||||||
|
[installing minikube](https://kubernetes.io/docs/tasks/tools/install-minikube/)
|
||||||
|
to find the right version:
|
||||||
|
|
||||||
|
```plain
|
||||||
|
curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64 \
|
||||||
|
&& sudo install minikube-linux-amd64 /usr/local/bin/minikube
|
||||||
|
```
|
||||||
|
|
||||||
|
(On a Mac you can do `brew install minikube`.)
|
||||||
|
|
||||||
|
If you are planning on running on a bare metal
|
||||||
|
machine, you will also need to install a hypervisor
|
||||||
|
like VirtualBox or KVM, see [installing minikube](https://kubernetes.io/docs/tasks/tools/install-minikube/).
|
||||||
|
|
||||||
|
If you are planning on running minikube on a compute
|
||||||
|
node in the cloud, you cannot run a hypervisor, so you
|
||||||
|
will need to run using the native driver; see
|
||||||
|
[installing minikube](https://kubernetes.io/docs/tasks/tools/install-minikube/).
|
||||||
|
|
||||||
|
Once you have installed minikube, you do not need to
|
||||||
|
install kubernetes.
|
||||||
|
|
||||||
|
## Installing byok8s
|
||||||
|
|
||||||
|
Start by cloning the repo and installing byok8s:
|
||||||
|
|
||||||
|
```plain
|
||||||
|
cd
|
||||||
|
git clone https://github.com/charlesreid1/2019-snakemake-byok8s.git
|
||||||
|
cd ~/2019-snakemake-byok8s
|
||||||
|
```
|
||||||
|
|
||||||
|
Next, you'll create a virtual environment:
|
||||||
|
|
||||||
|
```plain
|
||||||
|
virtualenv vp
|
||||||
|
source vp/bin/activate
|
||||||
|
|
||||||
|
pip install -r requirements.txt
|
||||||
|
python setup.py build install
|
||||||
|
```
|
||||||
|
|
||||||
|
Now you should be ready to rock:
|
||||||
|
|
||||||
|
```
|
||||||
|
which byok8s
|
||||||
|
```
|
||||||
|
|
||||||
|
This will only be present when you have activated
|
||||||
|
your virtual environment. To activate/re-activate your
|
||||||
|
virtual environment:
|
||||||
|
|
||||||
|
```
|
||||||
|
cd ~/2019-snakemake-byok8s
|
||||||
|
source vp/bin/activate
|
||||||
|
```
|
||||||
|
|
4
docs/kubernetes_aws.md
Normal file
4
docs/kubernetes_aws.md
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
# Kubernetes on AWS
|
||||||
|
|
||||||
|
Check back soon for an EKS guide!
|
||||||
|
|
9
docs/kubernetes_dok.md
Normal file
9
docs/kubernetes_dok.md
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# Kubernetes on Digital Ocean
|
||||||
|
|
||||||
|
Check back soon for a Digital Ocean kubernetes guide!
|
||||||
|
|
||||||
|
(Use web interface to set up a Kubernetes cluster,
|
||||||
|
then use `kubectl` to connect with Digital Ocean
|
||||||
|
via Digital Ocean credentials.)
|
||||||
|
|
||||||
|
[link](https://www.digitalocean.com/docs/kubernetes/how-to/connect-with-kubectl/)
|
266
docs/kubernetes_gcp.md
Normal file
266
docs/kubernetes_gcp.md
Normal file
@@ -0,0 +1,266 @@
|
|||||||
|
# Kubernetes on Google Cloud Platform
|
||||||
|
|
||||||
|
This document will walk you through how to start a kubernetes cluster using the
|
||||||
|
Google Kubernetes Engine (GKE) on Google Cloud Platform (GCP), run the byok8s
|
||||||
|
Snakemake workflow on the GKE kubernetes cluster, and tear down the cluster
|
||||||
|
when the workflow is complete.
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
Before you can create a kubernetes cluster on Google Cloud,
|
||||||
|
you need a Google Cloud account and a Google Cloud project.
|
||||||
|
You can sign up for a Google Cloud account [here](https://cloud.google.com/).
|
||||||
|
You can create a new project from the [Google Cloud Console](https://console.cloud.google.com/).
|
||||||
|
New accounts start with 300 free hours specifically to let you
|
||||||
|
test drive features like GKE! Cool!
|
||||||
|
|
||||||
|
Once you have your account and your project, you can install
|
||||||
|
the `gcloud` Google Cloud SDK command line utility
|
||||||
|
(see [Google Cloud SDK Quickstart Guide](https://cloud.google.com/sdk/docs/quickstarts)).
|
||||||
|
|
||||||
|
Once you have installed the `gcloud` utility, you will need
|
||||||
|
to log in with your Google acount using the `init` command:
|
||||||
|
|
||||||
|
```
|
||||||
|
gcloud init
|
||||||
|
```
|
||||||
|
|
||||||
|
This will give you a link to enter into your browser, where
|
||||||
|
you will log in with your Google account and recieve a code to
|
||||||
|
copy and paste into the terminal.
|
||||||
|
|
||||||
|
The **Compute API** and **Kubernetes API** will both need to be
|
||||||
|
enabled as well. These can be enabled via the
|
||||||
|
[Google Cloud Console](https://console.cloud.google.com/)
|
||||||
|
(or read on).
|
||||||
|
|
||||||
|
If you aren't sure how to use the console to enable these APIs, just start
|
||||||
|
running the commands below to create a kubernetes cluster, and the gcloud
|
||||||
|
utility will let you know if it needs APIs enabled for actions. If it can't
|
||||||
|
enable the API for you, it will give you a direct link to the relevant Google
|
||||||
|
Cloud Console page.
|
||||||
|
|
||||||
|
## Google Kubernetes Engine (GKE)
|
||||||
|
|
||||||
|
GKE uses GCP compute nodes to run a kubernetes cluster
|
||||||
|
on Google Cloud infrastructure. It automatically sets up the
|
||||||
|
cluster for you, and allows you to use `kubectl` and `gcloud` to
|
||||||
|
manage and interact with the remote cluster.
|
||||||
|
|
||||||
|
Official Google link: <https://cloud.google.com/kubernetes-engine/>
|
||||||
|
|
||||||
|
## Quickstart
|
||||||
|
|
||||||
|
As mentioned, make sure your account credentials are initialized:
|
||||||
|
|
||||||
|
```
|
||||||
|
gcloud init
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a new GKE cluster:
|
||||||
|
|
||||||
|
```
|
||||||
|
gcloud container clusters create $CLUSTER_NAME --num-nodes=$NODES --region=us-west1
|
||||||
|
```
|
||||||
|
|
||||||
|
The `--scopes storage-rw` flag is required if you plan to use Google
|
||||||
|
Cloud buckets instead of S3 buckets (not currently enabled in byok8s).
|
||||||
|
|
||||||
|
Next get configuration details about the cluster so your local
|
||||||
|
kubernetes controller can control the cluster:
|
||||||
|
|
||||||
|
```
|
||||||
|
gcloud container clusters get-credentials $CLUSTER_NAME
|
||||||
|
```
|
||||||
|
|
||||||
|
**This will take several minutes.**
|
||||||
|
|
||||||
|
The cluster should now be up and running and ready to rock:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ kubectl get pods --namespace=kube-system
|
||||||
|
NAME READY STATUS RESTARTS AGE
|
||||||
|
event-exporter-v0.2.3-54f94754f4-5jczv 2/2 Running 0 4m
|
||||||
|
fluentd-gcp-scaler-6d7bbc67c5-hkllz 1/1 Running 0 4m
|
||||||
|
fluentd-gcp-v3.1.0-48pb2 2/2 Running 0 2m
|
||||||
|
fluentd-gcp-v3.1.0-58dpx 2/2 Running 0 2m
|
||||||
|
fluentd-gcp-v3.1.0-c4b49 2/2 Running 0 2m
|
||||||
|
fluentd-gcp-v3.1.0-h24m5 2/2 Running 0 2m
|
||||||
|
fluentd-gcp-v3.1.0-hbdj4 2/2 Running 0 2m
|
||||||
|
fluentd-gcp-v3.1.0-rfnmt 2/2 Running 0 2m
|
||||||
|
fluentd-gcp-v3.1.0-vwd8w 2/2 Running 0 2m
|
||||||
|
fluentd-gcp-v3.1.0-wxt79 2/2 Running 0 2m
|
||||||
|
fluentd-gcp-v3.1.0-xkt42 2/2 Running 0 2m
|
||||||
|
heapster-v1.5.3-bc9f6bfd5-7jhqs 3/3 Running 0 3m
|
||||||
|
kube-dns-788979dc8f-l7hch 4/4 Running 0 4m
|
||||||
|
kube-dns-788979dc8f-pts99 4/4 Running 0 3m
|
||||||
|
kube-dns-autoscaler-79b4b844b9-j48js 1/1 Running 0 4m
|
||||||
|
kube-proxy-gke-mycluster-default-pool-9ad2912e-130p 1/1 Running 0 4m
|
||||||
|
kube-proxy-gke-mycluster-default-pool-9ad2912e-lfpw 1/1 Running 0 4m
|
||||||
|
kube-proxy-gke-mycluster-default-pool-9ad2912e-rt9m 1/1 Running 0 4m
|
||||||
|
kube-proxy-gke-mycluster-default-pool-b44fa389-2ds8 1/1 Running 0 4m
|
||||||
|
kube-proxy-gke-mycluster-default-pool-b44fa389-hc66 1/1 Running 0 4m
|
||||||
|
kube-proxy-gke-mycluster-default-pool-b44fa389-vh3x 1/1 Running 0 4m
|
||||||
|
kube-proxy-gke-mycluster-default-pool-d58ee1e7-2kkw 1/1 Running 0 4m
|
||||||
|
kube-proxy-gke-mycluster-default-pool-d58ee1e7-3l6r 1/1 Running 0 4m
|
||||||
|
kube-proxy-gke-mycluster-default-pool-d58ee1e7-4w18 1/1 Running 0 4m
|
||||||
|
l7-default-backend-5d5b9874d5-ms75l 1/1 Running 0 4m
|
||||||
|
metrics-server-v0.2.1-7486f5bd67-2n6cn 2/2 Running 0 3m
|
||||||
|
```
|
||||||
|
|
||||||
|
Now assuming you have installed `byok8s` and it is located
|
||||||
|
at `~/2019-snakemake-byok8s/`, you can run the test workflow
|
||||||
|
on the kubernetes cluster:
|
||||||
|
|
||||||
|
```
|
||||||
|
# Return to our virtual environment
|
||||||
|
cd ~/2019-snakemake-byok8s/test/
|
||||||
|
source vp/bin/activate
|
||||||
|
|
||||||
|
# Export AWS keys for Snakemake
|
||||||
|
export AWS_ACCESS_KEY_ID="XXXXX"
|
||||||
|
export AWS_SECRET_ACCESS_KEY="XXXXX"
|
||||||
|
|
||||||
|
# Run byok8s
|
||||||
|
byok8s workflow-alpha params-blue --s3-bucket=mah-bukkit
|
||||||
|
```
|
||||||
|
|
||||||
|
Once the workflow has run successfully, the results will be written
|
||||||
|
to S3 buckets and all the kubernetes containers created by snakemake
|
||||||
|
will be gone.
|
||||||
|
|
||||||
|
If all goes well, you should see output like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ byok8s --s3-bucket=mah-bukkit -f workflow-alpha params-blue
|
||||||
|
--------
|
||||||
|
details!
|
||||||
|
snakefile: /home/ubuntu/2019-snakemake-byok8s/test/Snakefile
|
||||||
|
config: /home/ubuntu/2019-snakemake-byok8s/test/workflow-alpha.json
|
||||||
|
params: /home/ubuntu/2019-snakemake-byok8s/test/params-blue.json
|
||||||
|
target: target1
|
||||||
|
k8s namespace: default
|
||||||
|
--------
|
||||||
|
Building DAG of jobs...
|
||||||
|
Using shell: /bin/bash
|
||||||
|
Provided cores: 1
|
||||||
|
Rules claiming more threads will be scaled down.
|
||||||
|
Job counts:
|
||||||
|
count jobs
|
||||||
|
1 target1
|
||||||
|
1
|
||||||
|
Resources before job selection: {'_cores': 1, '_nodes': 9223372036854775807}
|
||||||
|
Ready jobs (1):
|
||||||
|
target1
|
||||||
|
Selected jobs (1):
|
||||||
|
target1
|
||||||
|
Resources after job selection: {'_cores': 0, '_nodes': 9223372036854775806}
|
||||||
|
|
||||||
|
[Mon Jan 28 23:49:51 2019]
|
||||||
|
rule target1:
|
||||||
|
output: cmr-0123/alpha.txt
|
||||||
|
jobid: 0
|
||||||
|
|
||||||
|
echo alpha blue > cmr-0123/alpha.txt
|
||||||
|
Get status with:
|
||||||
|
kubectl describe pod snakejob-1ab52bdb-903b-5506-b712-ccc86772dc8d
|
||||||
|
kubectl logs snakejob-1ab52bdb-903b-5506-b712-ccc86772dc8d
|
||||||
|
Checking status for pod snakejob-1ab52bdb-903b-5506-b712-ccc86772dc8d
|
||||||
|
Checking status for pod snakejob-1ab52bdb-903b-5506-b712-ccc86772dc8d
|
||||||
|
Checking status for pod snakejob-1ab52bdb-903b-5506-b712-ccc86772dc8d
|
||||||
|
Checking status for pod snakejob-1ab52bdb-903b-5506-b712-ccc86772dc8d
|
||||||
|
Checking status for pod snakejob-1ab52bdb-903b-5506-b712-ccc86772dc8d
|
||||||
|
[Mon Jan 28 23:50:41 2019]
|
||||||
|
Finished job 0.
|
||||||
|
1 of 1 steps (100%) done
|
||||||
|
Complete log: /home/ubuntu/2019-snakemake-byok8s/test/.snakemake/log/2019-01-28T234950.253823.snakemake.log
|
||||||
|
unlocking
|
||||||
|
removing lock
|
||||||
|
removing lock
|
||||||
|
removed all locks
|
||||||
|
```
|
||||||
|
|
||||||
|
Congratulations! You'e just run an executable Snakemake workflow
|
||||||
|
on a Google Cloud kubernetes cluster!
|
||||||
|
|
||||||
|
You can get more information about the containers running each step of
|
||||||
|
the workflow using the `kubectl describe` commands printed in the output.
|
||||||
|
Here is an example:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ kubectl describe pod snakejob-c91f804c-805a-56a2-b0ea-b3b74bc38001
|
||||||
|
Name: snakejob-c91f804c-805a-56a2-b0ea-b3b74bc38001
|
||||||
|
Namespace: default
|
||||||
|
Node: gke-mycluster-default-pool-b44fa389-vh3x/10.138.0.7
|
||||||
|
Start Time: Mon, 28 Jan 2019 23:55:18 -0800
|
||||||
|
Labels: app=snakemake
|
||||||
|
Annotations: <none>
|
||||||
|
Status: Running
|
||||||
|
IP: 10.0.6.4
|
||||||
|
Containers:
|
||||||
|
snakejob-c91f804c-805a-56a2-b0ea-b3b74bc38001:
|
||||||
|
Container ID: docker://2aaa04c34770c6088334b29c0332dc426aff2fbbd3a8af07b65bbbc2c5fe437d
|
||||||
|
Image: quay.io/snakemake/snakemake:v5.4.0
|
||||||
|
Image ID: docker-pullable://quay.io/snakemake/snakemake@sha256:f5bb7bef99c4e45cb7dfd5b55535b8dc185b43ca610341476378a9566a8b52c5
|
||||||
|
Port: <none>
|
||||||
|
Host Port: <none>
|
||||||
|
Command:
|
||||||
|
/bin/sh
|
||||||
|
Args:
|
||||||
|
-c
|
||||||
|
cp -rf /source/. . && snakemake cmr-0123/.zetaB1 --snakefile Snakefile --force -j --keep-target-files --keep-remote --latency-wait 0 --attempt 1 --force-use-threads --wrapper-prefix None --config 'name='"'"'blue'"'"'' -p --nocolor --notemp --no-hooks --nolock --default-remote-provider S3 --default-remote-prefix cmr-0123 --allowed-rules target3sleepyB1
|
||||||
|
State: Running
|
||||||
|
Started: Mon, 28 Jan 2019 23:56:15 -0800
|
||||||
|
Ready: True
|
||||||
|
Restart Count: 0
|
||||||
|
Requests:
|
||||||
|
cpu: 0
|
||||||
|
Environment:
|
||||||
|
AWS_ACCESS_KEY_ID: <set to the key 'aws_access_key_id' in secret 'e077a45f-1274-4a98-a76c-d1a9718707db'> Optional: false
|
||||||
|
AWS_SECRET_ACCESS_KEY: <set to the key 'aws_secret_access_key' in secret 'e077a45f-1274-4a98-a76c-d1a9718707db'> Optional: false
|
||||||
|
Mounts:
|
||||||
|
/source from source (rw)
|
||||||
|
/var/run/secrets/kubernetes.io/serviceaccount from default-token-jmnv4 (ro)
|
||||||
|
Conditions:
|
||||||
|
Type Status
|
||||||
|
Initialized True
|
||||||
|
Ready True
|
||||||
|
PodScheduled True
|
||||||
|
Volumes:
|
||||||
|
source:
|
||||||
|
Type: Secret (a volume populated by a Secret)
|
||||||
|
SecretName: e077a45f-1274-4a98-a76c-d1a9718707db
|
||||||
|
Optional: false
|
||||||
|
workdir:
|
||||||
|
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
|
||||||
|
Medium:
|
||||||
|
default-token-jmnv4:
|
||||||
|
Type: Secret (a volume populated by a Secret)
|
||||||
|
SecretName: default-token-jmnv4
|
||||||
|
Optional: false
|
||||||
|
QoS Class: BestEffort
|
||||||
|
Node-Selectors: <none>
|
||||||
|
Tolerations: node.kubernetes.io/not-ready:NoExecute for 300s
|
||||||
|
node.kubernetes.io/unreachable:NoExecute for 300s
|
||||||
|
Events:
|
||||||
|
Type Reason Age From Message
|
||||||
|
---- ------ ---- ---- -------
|
||||||
|
Normal Scheduled 63s default-scheduler Successfully assigned snakejob-c91f804c-805a-56a2-b0ea-b3b74bc38001 to gke-mycluster-default-pool-b44fa389-vh3x
|
||||||
|
Normal SuccessfulMountVolume 63s kubelet, gke-mycluster-default-pool-b44fa389-vh3x MountVolume.SetUp succeeded for volume "workdir"
|
||||||
|
Normal SuccessfulMountVolume 63s kubelet, gke-mycluster-default-pool-b44fa389-vh3x MountVolume.SetUp succeeded for volume "default-token-jmnv4"
|
||||||
|
Normal SuccessfulMountVolume 63s kubelet, gke-mycluster-default-pool-b44fa389-vh3x MountVolume.SetUp succeeded for volume "source"
|
||||||
|
Normal Pulling 61s kubelet, gke-mycluster-default-pool-b44fa389-vh3x pulling image "quay.io/snakemake/snakemake:v5.4.0"
|
||||||
|
Normal Pulled 10s kubelet, gke-mycluster-default-pool-b44fa389-vh3x Successfully pulled image "quay.io/snakemake/snakemake:v5.4.0"
|
||||||
|
Normal Created 6s kubelet, gke-mycluster-default-pool-b44fa389-vh3x Created container
|
||||||
|
Normal Started 6s kubelet, gke-mycluster-default-pool-b44fa389-vh3x Started container
|
||||||
|
```
|
||||||
|
|
||||||
|
Congratulations! You've successfully run an executable Snakemake workflow
|
||||||
|
on a Google Cloud kubernetes cluster!
|
||||||
|
|
||||||
|
Delete the GKE cluster when you are done:
|
||||||
|
|
||||||
|
```
|
||||||
|
gcloud container clusters delete $CLUSTER_NAME
|
||||||
|
```
|
||||||
|
|
116
docs/kubernetes_minikube.md
Normal file
116
docs/kubernetes_minikube.md
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
# Running byok8s with minikube
|
||||||
|
|
||||||
|
## Installing
|
||||||
|
|
||||||
|
See the [Installing](installing.md) page for details
|
||||||
|
about installing byok8s and its prerequisites
|
||||||
|
(including minikube).
|
||||||
|
|
||||||
|
We cover two scenarios:
|
||||||
|
|
||||||
|
- bare metal machine, i.e., a laptop or desktop machine
|
||||||
|
that can run a hypervisor like VirtualBox
|
||||||
|
|
||||||
|
- cloud machine, i.e., AWS EC2 node, which is itself a
|
||||||
|
virtual machine and cannot run a hypervisor
|
||||||
|
|
||||||
|
These quickstarts assume you have Python and minikube
|
||||||
|
installed, and that you have cloned and installed byok8s
|
||||||
|
at `~/2019-snakemake-byok8s/`.
|
||||||
|
|
||||||
|
## Quickstart on Bare Metal Machine
|
||||||
|
|
||||||
|
On a bare metal machine, the procedure is
|
||||||
|
relatively uncomplicated: we create a cluster,
|
||||||
|
we export some variables, we run the workflow,
|
||||||
|
we tear down the cluster:
|
||||||
|
|
||||||
|
```plain
|
||||||
|
# Start a minikube cluster
|
||||||
|
minikube start
|
||||||
|
|
||||||
|
# Verify k8s is running
|
||||||
|
minikube status
|
||||||
|
|
||||||
|
# Export AWS credentials
|
||||||
|
export AWS_ACCESS_KEY_ID="XXXXX"
|
||||||
|
export AWS_SECRET_ACCESS_KEY="XXXXX"
|
||||||
|
|
||||||
|
# Run the workflow
|
||||||
|
byok8s workflow-alpha params-blue --s3-bucket=mah-bukkit
|
||||||
|
|
||||||
|
# Stop the minikube cluster
|
||||||
|
minikube stop
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quickstart on Cloud Machine
|
||||||
|
|
||||||
|
As mentioned above, cloud compute nodes are virtual machines
|
||||||
|
themselves and cannot run a hypervisor, so things are a bit
|
||||||
|
more complicated.
|
||||||
|
|
||||||
|
To tell minikube not to use a virtual machine driver,
|
||||||
|
run the following command in a terminal to create
|
||||||
|
a minikube config file:
|
||||||
|
|
||||||
|
```
|
||||||
|
cat <<'EOF' > ~/.minikube/config/config.json
|
||||||
|
{
|
||||||
|
"vm-driver": "none"
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
```
|
||||||
|
|
||||||
|
Now you can start up a minikube cluster.
|
||||||
|
|
||||||
|
There is an additional DNS problem that needs to be fixed
|
||||||
|
in the containers before you proceed. You will know there
|
||||||
|
is a problem if you run the `get pods` command with
|
||||||
|
`kubectl` and see your CoreDNS containers in a
|
||||||
|
`CrashLoopBackOff` state:
|
||||||
|
|
||||||
|
```text
|
||||||
|
$ kubectl get pods --namespace=kube-system
|
||||||
|
NAME READY STATUS RESTARTS AGE
|
||||||
|
coredns-86c58d9df4-lvq8b 0/1 CrashLoopBackOff 5 5m17s
|
||||||
|
coredns-86c58d9df4-pr52t 0/1 CrashLoopBackOff 5 5m17s
|
||||||
|
... ... ... ... ...
|
||||||
|
```
|
||||||
|
|
||||||
|
To fix the problem with the DNS settings, we have to patch
|
||||||
|
the CoreDNS image being used by `kube-system`.
|
||||||
|
To do that, use the file
|
||||||
|
[`test/fixcoredns.yml`](https://github.com/charlesreid1/2019-snakemake-byok8s/blob/master/test/fixcoredns.yml)
|
||||||
|
[`test/fixcoredns.yml`](https://github.com/charlesreid1/2019-snakemake-byok8s/blob/master/test/fixcoredns.yml)
|
||||||
|
in this repository with `kubectl apply`:
|
||||||
|
|
||||||
|
```plain
|
||||||
|
# Fix the DNS container
|
||||||
|
kubectl apply -f fixcoredns.yml
|
||||||
|
|
||||||
|
# Delete all kube-system containers
|
||||||
|
kubectl delete --all pods --namespace kube-system
|
||||||
|
```
|
||||||
|
|
||||||
|
The kube-system containers will be re-spawned by the cluster control system.
|
||||||
|
It should happen in a few seconds, and then you'll be ready to run byok8s:
|
||||||
|
|
||||||
|
```
|
||||||
|
# Return to our virtual environment
|
||||||
|
cd ~/2019-snakemake-byok8s/test/
|
||||||
|
source vp/bin/activate
|
||||||
|
|
||||||
|
# Verify k8s is running
|
||||||
|
minikube status
|
||||||
|
|
||||||
|
# Export AWS keys for Snakemake
|
||||||
|
export AWS_ACCESS_KEY_ID="XXXXX"
|
||||||
|
export AWS_SECRET_ACCESS_KEY="XXXXX"
|
||||||
|
|
||||||
|
# Run byok8s
|
||||||
|
byok8s workflow-alpha params-blue --s3-bucket=mah-bukkit
|
||||||
|
```
|
||||||
|
|
||||||
|
Congratulations! You've just run an executable Snakemake workflow
|
||||||
|
on a minikube kubernetes cluster.
|
||||||
|
|
155
docs/quickstart.md
Normal file
155
docs/quickstart.md
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
# Quickstart
|
||||||
|
|
||||||
|
This runs through the installation and usage
|
||||||
|
of `2019-snakemake-byok8s`.
|
||||||
|
|
||||||
|
Step 1: Set up Kubernetes cluster with `minikube`.
|
||||||
|
|
||||||
|
Step 2: Install `byok8s`.
|
||||||
|
|
||||||
|
Step 3: Run the `byok8s` workflow using the Kubernetes cluster.
|
||||||
|
|
||||||
|
Step 4: Tear down Kubernetes cluster with `minikube`.
|
||||||
|
|
||||||
|
|
||||||
|
## Step 1: Set Up Virtual Kubernetes Cluster
|
||||||
|
|
||||||
|
For the purposes of the quickstart, we will walk
|
||||||
|
through how to set up a local, virtual Kubernetes
|
||||||
|
cluster using `minikube`.
|
||||||
|
|
||||||
|
Start by installing minikube:
|
||||||
|
|
||||||
|
```
|
||||||
|
scripts/install_minikube.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Once it is installed, you can start up a kubernetes cluster
|
||||||
|
with minikube using the following commands:
|
||||||
|
|
||||||
|
```
|
||||||
|
cd test
|
||||||
|
minikube start
|
||||||
|
```
|
||||||
|
|
||||||
|
NOTE: If you are running on AWS, run this command first
|
||||||
|
|
||||||
|
```
|
||||||
|
minikube config set vm-driver none
|
||||||
|
```
|
||||||
|
|
||||||
|
to set the the vm driver to none and use native Docker to run stuff.
|
||||||
|
|
||||||
|
If you are running on AWS, the DNS in the minikube
|
||||||
|
kubernetes cluster will not work, so run this command
|
||||||
|
to fix the DNS settings (should be run from the
|
||||||
|
`test/` directory):
|
||||||
|
|
||||||
|
```
|
||||||
|
kubectl apply -f fixcoredns.yml
|
||||||
|
kubectl delete --all pods --namespace kube-system
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Step 2: Install byok8s
|
||||||
|
|
||||||
|
Start by setting up a python virtual environment,
|
||||||
|
and install the required packages into the
|
||||||
|
virtual environment:
|
||||||
|
|
||||||
|
```
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
This installs snakemake and kubernetes Python
|
||||||
|
modules. Now install the `byok8s` command line
|
||||||
|
tool:
|
||||||
|
|
||||||
|
```
|
||||||
|
python setup.py build install
|
||||||
|
```
|
||||||
|
|
||||||
|
Now you can run:
|
||||||
|
|
||||||
|
```
|
||||||
|
which byok8s
|
||||||
|
```
|
||||||
|
|
||||||
|
and you should see `byok8s` in your virtual
|
||||||
|
environment's `bin/` directory.
|
||||||
|
|
||||||
|
This command line utility will expect a kubernetes
|
||||||
|
cluster to be set up before it is run.
|
||||||
|
|
||||||
|
Setting up a kubernetes cluster will create...
|
||||||
|
(fill in more info here)...
|
||||||
|
|
||||||
|
Snakemake will automatically create the pods
|
||||||
|
in the cluster, so you just need to allocate
|
||||||
|
a kubernetes cluster.
|
||||||
|
|
||||||
|
|
||||||
|
## Step 3: Run byok8s
|
||||||
|
|
||||||
|
Now you can run the workflow with the `byok8s` command.
|
||||||
|
This submits the Snakemake workflow jobs to the Kubernetes
|
||||||
|
cluster that minikube created.
|
||||||
|
|
||||||
|
You should have your workflow in a `Snakefile` in the
|
||||||
|
current directory. Use the `--snakefile` flag if it is
|
||||||
|
named something other than `Snakefile`.
|
||||||
|
|
||||||
|
You will also need to specify your AWS credentials
|
||||||
|
via the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`
|
||||||
|
environment variables. These are used to to access
|
||||||
|
S3 buckets for file I/O.
|
||||||
|
|
||||||
|
Finally, you will need to create an S3 bucket for
|
||||||
|
Snakemake to use for file I/O. Pass the name of the
|
||||||
|
bucket using the `--s3-bucket` flag.
|
||||||
|
|
||||||
|
Start by exporting these two vars (careful to
|
||||||
|
scrub them from bash history):
|
||||||
|
|
||||||
|
```
|
||||||
|
export AWS_ACCESS_KEY_ID=XXXXX
|
||||||
|
export AWS_SECRET_ACCESS_KEY=XXXXX
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the alpha workflow with blue params:
|
||||||
|
|
||||||
|
```
|
||||||
|
byok8s --s3-bucket=mah-bukkit workflow-alpha params-blue
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the alpha workflow with red params:
|
||||||
|
|
||||||
|
```
|
||||||
|
byok8s --s3-bucket=mah-bukkit workflow-alpha params-red
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the gamma workflow with red params, &c:
|
||||||
|
|
||||||
|
```
|
||||||
|
byok8s --s3-bucket=mah-bukkit workflow-gamma params-red
|
||||||
|
```
|
||||||
|
|
||||||
|
(NOTE: May want to let the user specify
|
||||||
|
input and output directories with flags.)
|
||||||
|
|
||||||
|
All input files are searched for relative to the working
|
||||||
|
directory.
|
||||||
|
|
||||||
|
|
||||||
|
## Step 4: Tear Down Kubernetes Cluster
|
||||||
|
|
||||||
|
The last step once the workflow has been finished,
|
||||||
|
is to tear down the kubernetes cluster. The virtual
|
||||||
|
kubernetes cluster created by minikube can be torn
|
||||||
|
down with the following command:
|
||||||
|
|
||||||
|
```
|
||||||
|
minikube stop
|
||||||
|
```
|
||||||
|
|
||||||
|
|
5
docs/travis_tests.md
Normal file
5
docs/travis_tests.md
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# Travis Tests with Minikube
|
||||||
|
|
||||||
|
This page is in progress; see this post
|
||||||
|
on the <https://charlesreid1.github.io> blog for info:
|
||||||
|
[Building Snakemake Command Line Wrappers for Kubernetes Workflows](https://charlesreid1.github.io/building-snakemake-command-line-wrappers-for-kubernetes-workflows.html).
|
1
mkdocs-material-dib
Submodule
1
mkdocs-material-dib
Submodule
Submodule mkdocs-material-dib added at 745d13f187
42
mkdocs.yml
Normal file
42
mkdocs.yml
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
site_name: 2019-snakemake-byok8s
|
||||||
|
site_url: https://charlesreid1.github.io/2019-snakemake-byok8s
|
||||||
|
repo_name: 2019-snakemake-byok8s
|
||||||
|
repo_url: https://github.com/charlesreid1/2019-snakemake-byok8s
|
||||||
|
edit_uri: ""
|
||||||
|
|
||||||
|
copyright: 'Copyright © 2018 <a href="https://charlesreid1.com">Charles Reid</a>, released under the <a href="https://opensource.org/licenses/MIT">MIT license</a> <br /><br />
|
||||||
|
<div>Icon made by Freepik, obtained from <a href="https://www.flaticon.com/" title="Flaticon">www.flaticon.com</a>, used under a <a href="http://creativecommons.org/licenses/by/3.0/" title="Creative Commons BY 3.0" target="_blank">CC 3.0 BY</a></div> license.'
|
||||||
|
|
||||||
|
docs_dir: docs
|
||||||
|
site_dir: site
|
||||||
|
extra_css:
|
||||||
|
- css/custom.css
|
||||||
|
theme:
|
||||||
|
name: null
|
||||||
|
custom_dir: 'mkdocs-material-dib/material'
|
||||||
|
palette:
|
||||||
|
primary: 'blue'
|
||||||
|
accent: 'blue'
|
||||||
|
logo: 'images/ship.svg'
|
||||||
|
font:
|
||||||
|
text: 'Roboto'
|
||||||
|
code: 'Roboto Mono'
|
||||||
|
nav:
|
||||||
|
- 'Index': 'index.md'
|
||||||
|
- 'Installing': 'installing.md'
|
||||||
|
- 'Quickstart': 'quickstart.md'
|
||||||
|
- 'K8s with Minikube' : 'kubernetes_minikube.md'
|
||||||
|
- 'K8s with GCP' : 'kubernetes_gcp.md'
|
||||||
|
- 'K8s with AWS' : 'kubernetes_aws.md'
|
||||||
|
- 'K8s with DigitalOcean' : 'kubernetes_dok.md'
|
||||||
|
- 'Travis Tests': 'travis_tests.md'
|
||||||
|
|
||||||
|
# Extensions
|
||||||
|
markdown_extensions:
|
||||||
|
- admonition
|
||||||
|
- codehilite:
|
||||||
|
guess_lang: false
|
||||||
|
- toc:
|
||||||
|
permalink: true
|
||||||
|
|
||||||
|
strict: true
|
1
requirements-docs.txt
Normal file
1
requirements-docs.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
mkdocs>=1.0
|
@@ -1,2 +1,4 @@
|
|||||||
snakemake>=5.4.0
|
snakemake>=5.4.0
|
||||||
python-kubernetes
|
kubernetes
|
||||||
|
moto
|
||||||
|
boto3
|
||||||
|
60
test/Readme.md
Normal file
60
test/Readme.md
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# 2019-snakemake-byok8s tests
|
||||||
|
|
||||||
|
This guide assumes you have minikube installed. (See `../scripts/` directory...)
|
||||||
|
|
||||||
|
We will need to fix a problem with a DNS setting in Kubernetes if we are on
|
||||||
|
an AWS EC2 node, so we'll walk through how to do that first.
|
||||||
|
|
||||||
|
Then we'll cover how to start a Kubernetes cluster and run a simple test.
|
||||||
|
|
||||||
|
|
||||||
|
## Fix k8s DNS problem
|
||||||
|
|
||||||
|
If you are running on EC2, you will have
|
||||||
|
to fix the DNS settings inside the container
|
||||||
|
by patching the `kube-dns` container that
|
||||||
|
runs as part of Kubernetes.
|
||||||
|
|
||||||
|
Apply the DNS fix to the container,
|
||||||
|
|
||||||
|
```
|
||||||
|
kubernetes apply -f fixcoredns.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
(If you are using an older version of minikube + kubernetes
|
||||||
|
that uses kube-dns, use `fixkubedns.yml` instead.)
|
||||||
|
|
||||||
|
|
||||||
|
## Start (restart) cluster
|
||||||
|
|
||||||
|
If you don't already have a Kubernetes cluster running,
|
||||||
|
start one with minikube:
|
||||||
|
|
||||||
|
```
|
||||||
|
minikube start
|
||||||
|
|
||||||
|
# or, if on ec2,
|
||||||
|
|
||||||
|
sudo minikube start
|
||||||
|
```
|
||||||
|
|
||||||
|
If you have a Kubernetes pod currently running,
|
||||||
|
you can delete all of the kube-system pods, and
|
||||||
|
they will automatically respawn, including the
|
||||||
|
(now-fixed) kube-dns container:
|
||||||
|
|
||||||
|
```
|
||||||
|
kubernetes delete --all pods --namespace kube-system
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Running tests
|
||||||
|
|
||||||
|
Now that DNS is fixed, the host and container can
|
||||||
|
properly communicate, which is required for Kubernetes
|
||||||
|
to return files it has created.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
93
test/Snakefile
Normal file
93
test/Snakefile
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
name = config['name']
|
||||||
|
|
||||||
|
rule rulename1:
|
||||||
|
input:
|
||||||
|
"alpha.txt"
|
||||||
|
|
||||||
|
rule target1:
|
||||||
|
output:
|
||||||
|
"alpha.txt"
|
||||||
|
shell:
|
||||||
|
"echo alpha {name} > {output}"
|
||||||
|
|
||||||
|
rule target2:
|
||||||
|
output:
|
||||||
|
"gamma.txt"
|
||||||
|
shell:
|
||||||
|
"echo gamma {name} > {output}"
|
||||||
|
|
||||||
|
|
||||||
|
# A somewhat contrived workflow:
|
||||||
|
#
|
||||||
|
# zetaA workflow
|
||||||
|
#
|
||||||
|
# +---- (sleepy process) -- (sleepy process) -- (sleepy process) --+
|
||||||
|
# | |
|
||||||
|
# target3 <---+ +---<----
|
||||||
|
# | |
|
||||||
|
# +-----------( sleepy process ) ------ ( sleepy process ) --------+
|
||||||
|
#
|
||||||
|
# zetaB workflow
|
||||||
|
|
||||||
|
rule target3:
|
||||||
|
input:
|
||||||
|
"zetaA.txt", "zetaB.txt"
|
||||||
|
output:
|
||||||
|
"zeta.txt"
|
||||||
|
shell:
|
||||||
|
"cat {input[0]} {input[1]} > {output}"
|
||||||
|
|
||||||
|
rule target3sleepyA1:
|
||||||
|
output:
|
||||||
|
touch(".zetaA1")
|
||||||
|
shell:
|
||||||
|
"""
|
||||||
|
sleep 3s
|
||||||
|
echo zeta_A1 {name} > zetaA.txt
|
||||||
|
"""
|
||||||
|
|
||||||
|
rule target3sleepyA2:
|
||||||
|
input:
|
||||||
|
".zetaA1"
|
||||||
|
output:
|
||||||
|
touch(".zetaA2")
|
||||||
|
shell:
|
||||||
|
"""
|
||||||
|
sleep 3s
|
||||||
|
echo zeta_A2 {name} >> zetaA.txt
|
||||||
|
rm -f .zetaA1
|
||||||
|
"""
|
||||||
|
|
||||||
|
rule target3sleepyA3:
|
||||||
|
input:
|
||||||
|
".zetaA2"
|
||||||
|
output:
|
||||||
|
"zetaA.txt"
|
||||||
|
shell:
|
||||||
|
"""
|
||||||
|
sleep 3s
|
||||||
|
echo zeta_A3 {name} >> {output}
|
||||||
|
rm -f .zetaA2
|
||||||
|
"""
|
||||||
|
|
||||||
|
rule target3sleepyB1:
|
||||||
|
output:
|
||||||
|
touch(".zetaB1")
|
||||||
|
shell:
|
||||||
|
"""
|
||||||
|
sleep 4s
|
||||||
|
echo zeta_B1 {name} > zetaB.txt
|
||||||
|
"""
|
||||||
|
|
||||||
|
rule target3sleepyB2:
|
||||||
|
input:
|
||||||
|
".zetaB1"
|
||||||
|
output:
|
||||||
|
"zetaB.txt"
|
||||||
|
shell:
|
||||||
|
"""
|
||||||
|
sleep 4s
|
||||||
|
echo zeta_B2 {name} >> {output}
|
||||||
|
rm -f .zetaB1
|
||||||
|
"""
|
||||||
|
|
22
test/fixcoredns.yml
Normal file
22
test/fixcoredns.yml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
kind: ConfigMap
|
||||||
|
apiVersion: v1
|
||||||
|
data:
|
||||||
|
Corefile: |
|
||||||
|
.:53 {
|
||||||
|
errors
|
||||||
|
health
|
||||||
|
kubernetes cluster.local in-addr.arpa ip6.arpa {
|
||||||
|
upstream 8.8.8.8 8.8.4.4
|
||||||
|
pods insecure
|
||||||
|
fallthrough in-addr.arpa ip6.arpa
|
||||||
|
}
|
||||||
|
proxy . 8.8.8.8 8.8.4.4
|
||||||
|
cache 30
|
||||||
|
reload
|
||||||
|
}
|
||||||
|
metadata:
|
||||||
|
creationTimestamp: 2019-01-25T22:55:15Z
|
||||||
|
name: coredns
|
||||||
|
namespace: kube-system
|
||||||
|
#resourceVersion: "198"
|
||||||
|
#selfLink: /api/v1/namespaces/kube-system/configmaps/coredns
|
11
test/fixkubedns.yml
Normal file
11
test/fixkubedns.yml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: kube-dns
|
||||||
|
namespace: kube-system
|
||||||
|
labels:
|
||||||
|
addonmanager.kubernetes.io/mode: EnsureExists
|
||||||
|
data:
|
||||||
|
upstreamNameservers: |-
|
||||||
|
["8.8.8.8", "8.8.4.4"]
|
||||||
|
|
@@ -1,76 +0,0 @@
|
|||||||
from unittest import TestCase
|
|
||||||
from subprocess import call, Popen, PIPE
|
|
||||||
import os
|
|
||||||
import shutil, tempfile
|
|
||||||
from os.path import isdir, join
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
test banana
|
|
||||||
|
|
||||||
this test will run bananas with the test
|
|
||||||
config and params provided in the test dir.
|
|
||||||
|
|
||||||
this test will also show how to run tests where
|
|
||||||
failure is expected (i.e., checking that we handle
|
|
||||||
invalid parameters).
|
|
||||||
|
|
||||||
each test has a unittest TestCase defined.
|
|
||||||
pytest will automatically find these tests.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class TestBananas(TestCase):
|
|
||||||
"""
|
|
||||||
simple bananas test class
|
|
||||||
|
|
||||||
This uses the subprocess PIPE var
|
|
||||||
to capture system input and output,
|
|
||||||
since we are running bananas from the
|
|
||||||
command line directly using subprocess.
|
|
||||||
"""
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(self):
|
|
||||||
"""
|
|
||||||
set up a bananas workflow test.
|
|
||||||
|
|
||||||
we are using the existing test/ dir
|
|
||||||
as our working dir, so no setup to do.
|
|
||||||
|
|
||||||
if we were expecting the user to provide
|
|
||||||
a Snakefile, this is where we would set
|
|
||||||
up a test Snakefile.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def test_hello(self):
|
|
||||||
"""
|
|
||||||
test hello workflow
|
|
||||||
"""
|
|
||||||
command_prefix = ['bananas','workflow-hello']
|
|
||||||
|
|
||||||
params = ['params-amy','params-beth']
|
|
||||||
|
|
||||||
pwd = os.path.abspath(os.path.dirname(__file__))
|
|
||||||
|
|
||||||
for param in params:
|
|
||||||
|
|
||||||
command = command_prefix + [param]
|
|
||||||
|
|
||||||
p = Popen(command, cwd=pwd, stdout=PIPE, stderr=PIPE).communicate()
|
|
||||||
p_out = p[0].decode('utf-8').strip()
|
|
||||||
p_err = p[1].decode('utf-8').strip()
|
|
||||||
|
|
||||||
self.assertIn('details',p_out)
|
|
||||||
|
|
||||||
# clean up
|
|
||||||
call(['rm','-f','hello.txt'])
|
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def tearDownClass(self):
|
|
||||||
"""
|
|
||||||
clean up after the tests
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
66
test/test_byok8s.py
Normal file
66
test/test_byok8s.py
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
from unittest import TestCase
|
||||||
|
from subprocess import call, Popen, PIPE
|
||||||
|
import os
|
||||||
|
import shutil, tempfile
|
||||||
|
from os.path import isdir, join
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
test byok8s
|
||||||
|
|
||||||
|
This tests the byok8s command line utility,
|
||||||
|
and assumes you have already set up your
|
||||||
|
k8s cluster using e.g. minikube.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class TestByok8s(TestCase):
|
||||||
|
"""
|
||||||
|
simple byok8s test class
|
||||||
|
|
||||||
|
This uses the subprocess PIPE var
|
||||||
|
to capture system input and output,
|
||||||
|
since we are running byok8s from the
|
||||||
|
command line directly using subprocess.
|
||||||
|
"""
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(self):
|
||||||
|
"""
|
||||||
|
set up a byok8s workflow test.
|
||||||
|
"""
|
||||||
|
# verify that a kubernetes cluster is running
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_alpha(self):
|
||||||
|
"""
|
||||||
|
test alpha workflow
|
||||||
|
"""
|
||||||
|
workflows = ['workflow-alpha','workflow-gamma','workflow-zeta']
|
||||||
|
params = ['params-red','params-blue']
|
||||||
|
|
||||||
|
pwd = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
|
||||||
|
for workflow in workflows:
|
||||||
|
|
||||||
|
for param in params:
|
||||||
|
|
||||||
|
command = ['byok8s',workflow,param]
|
||||||
|
|
||||||
|
p = Popen(command, cwd=pwd, stdout=PIPE, stderr=PIPE).communicate()
|
||||||
|
p_out = p[0].decode('utf-8').strip()
|
||||||
|
p_err = p[1].decode('utf-8').strip()
|
||||||
|
|
||||||
|
self.assertIn('details',p_out)
|
||||||
|
|
||||||
|
# clean up
|
||||||
|
call(['rm','-f','*.txt'])
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def tearDownClass(self):
|
||||||
|
"""
|
||||||
|
clean up after the tests
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
3
test/workflow-zeta.json
Normal file
3
test/workflow-zeta.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"workflow_target": "target3"
|
||||||
|
}
|
Reference in New Issue
Block a user