Compare commits

..

2 Commits

  1. 3
      .gitignore
  2. 3
      .gitmodules
  3. 13
      .travis.yml
  4. 242
      README.md
  5. 11
      docs/css/custom.css
  6. 38
      docs/images/ship.svg
  7. 125
      docs/index.md
  8. 114
      docs/installing.md
  9. 4
      docs/kubernetes_aws.md
  10. 263
      docs/kubernetes_gcp.md
  11. 115
      docs/kubernetes_minikube.md
  12. 155
      docs/quickstart.md
  13. 5
      docs/travis_tests.md
  14. 6
      kubernetes_aws.md
  15. 4
      kubernetes_dok.md
  16. 7
      kubernetes_gcp.md
  17. 6
      kubernetes_minikube.md
  18. 1
      mkdocs-material-dib
  19. 42
      mkdocs.yml
  20. 1
      requirements-docs.txt

3
.gitignore vendored

@ -5,6 +5,3 @@ vp/
build/ build/
dist/ dist/
test/*.txt test/*.txt
# mkdocs
site/

3
.gitmodules vendored

@ -1,3 +0,0 @@
[submodule "mkdocs-material-dib"]
path = mkdocs-material-dib
url = https://github.com/dib-lab/mkdocs-material-dib.git

13
.travis.yml

@ -1,13 +1,9 @@
# Modified from original:
# https://raw.githubusercontent.com/LiliC/travis-minikube/minikube-30-kube-1.12/.travis.yml # https://raw.githubusercontent.com/LiliC/travis-minikube/minikube-30-kube-1.12/.travis.yml
# byok8s and Snakemake both require Python,
# so we make this Travis CI test Python-based.
language: python language: python
python: python:
- "3.6" - "3.6"
# Running minikube via travis requires sudo
sudo: required sudo: required
# We need the systemd for the kubeadm and it's default from 16.04+ # We need the systemd for the kubeadm and it's default from 16.04+
@ -47,8 +43,11 @@ script:
- JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'; until kubectl -n kube-system get pods -lcomponent=kube-addon-manager -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do sleep 1;echo "waiting for kube-addon-manager to be available"; kubectl get pods --all-namespaces; done - JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'; until kubectl -n kube-system get pods -lcomponent=kube-addon-manager -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do sleep 1;echo "waiting for kube-addon-manager to be available"; kubectl get pods --all-namespaces; done
# Wait for kube-dns to be ready. # Wait for kube-dns to be ready.
- JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'; until kubectl -n kube-system get pods -lk8s-app=kube-dns -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do sleep 1;echo "waiting for kube-dns to be available"; kubectl get pods --all-namespaces; done - JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'; until kubectl -n kube-system get pods -lk8s-app=kube-dns -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do sleep 1;echo "waiting for kube-dns to be available"; kubectl get pods --all-namespaces; done
# Create example Redis deployment on Kubernetes.
- kubectl run travis-example --image=redis --labels="app=travis-example"
# Make sure created pod is scheduled and running.
- JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'; until kubectl -n default get pods -lapp=travis-example -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do sleep 1;echo "waiting for travis-example deployment to be available"; kubectl get pods -n default; done
#
################ ################
## hard test ## harder
# run byok8s workflow on the k8s cluster
- byok8s --s3-bucket=cmr-0123 -f workflow-alpha params-blue - byok8s --s3-bucket=cmr-0123 -f workflow-alpha params-blue

242
README.md

@ -2,129 +2,211 @@
[![travis](https://img.shields.io/travis/charlesreid1/2019-snakemake-byok8s.svg)](https://travis-ci.org/charlesreid1/2019-snakemake-byok8s) [![travis](https://img.shields.io/travis/charlesreid1/2019-snakemake-byok8s.svg)](https://travis-ci.org/charlesreid1/2019-snakemake-byok8s)
[![license](https://img.shields.io/github/license/charlesreid1/2019-snakemake-byok8s.svg)](https://github.com/charlesreid1/2019-snakemake-byok8s/blob/master/LICENSE) [![license](https://img.shields.io/github/license/charlesreid1/2019-snakemake-byok8s.svg)](https://github.com/charlesreid1/2019-snakemake-byok8s/blob/master/LICENSE)
![minikube 0.32](https://img.shields.io/badge/minikube-%3E%3D0.32-blue.svg)
![k8s 0.12](https://img.shields.io/badge/kubernetes-%3E%3D0.12-blue.svg)
![ubuntu bionic](https://img.shields.io/badge/ubuntu_bionic-16.04-orange.svg)
![ubuntu xenial](https://img.shields.io/badge/ubuntu_xenial-18.04-orange.svg)
# Overview # Overview
This is an example of a Snakemake workflow that: This is an example of a Snakemake workflow that:
- is a **command line utility** called `byok8s` - is a command line utility
- is bundled as an installable **Python package** - is bundled as a Python package
- is designed to run on a **Kubernetes (k8s) cluster** - is designed to run on a Kubernetes cluster
- can be **tested with Travis CI** (and/or locally) using [minikube](https://github.com/kubernetes/minikube) - can be tested locally or with Travis CI using minikube
## What is byok8s? Snakemake functionality is provided through
a command line tool called `byok8s`, so that
byok8s = Bring Your Own Kubernetes (cluster) it allows you to do this (abbreviated for clarity):
k8s = kubernetes
byok8s is a command line utility that launches
a Snakemake workflow on an existing Kubernetes
cluster. This allows you to do something
like this (also see the [Installation](installing.md)
and [Quickstart](quickstart.md) guides in the
documentation):
``` ```
# Install byok8s
python setup.py build install
# Create virtual k8s cluster # Create virtual k8s cluster
minikube start minikube start
# Run the workflow on the k8s cluster # Run the workflow
cd /path/to/workflow/ byok8s --s3-bucket=mah-s3-bukkit my-workflowfile my-paramsfile
byok8s my-workflowfile my-paramsfile --s3-bucket=my-bucket
# Clean up the virtual k8s cluster # Clean up the virtual k8s cluster
minikube stop minikube stop
``` ```
## Getting Up and Running
Snakemake workflows are provided via a Snakefile by Snakemake workflows are provided via a Snakefile by
the user. Snakemake runs tasks on the Kubernetes (k8s) the user. Snakemake runs tasks on the Kubernetes (k8s)
cluster. The approach is for the user to provide cluster. The approach is for the user to provide
their own Kubernetes cluster (byok8s = Bring Your their own Kubernetes cluster (byok8s = Bring Your
Own Kubernetes).
See the [Quickstart Guide](quickstart.md) to get up and The example above uses [`minikube`](https://github.com/kubernetes/minikube)
running with byok8s. to make a virtual k8s cluster, useful for testing.
## How does byok8s work? For real workflows, your options for
kubernetes clusters are cloud providers:
The command line utility requires the user to provide - AWS EKS (Elastic Container Service)
three input files: - GCP GKE (Google Kuberntes Engine)
- Digital Ocean Kubernetes service
- etc...
* A snakemake workflow, via a `Snakefile` The Travis CI tests utilize minikube to run
* A workflow configuration file (JSON) test workflows.
* A workflow parameters file (JSON)
Additionally, the user must create the following resources: # Quickstart
* A kubernetes cluster up and running This runs through the installation and usage
* An S3 bucket (and AWS credentials to read/write) of `2019-snakemake-byok8s`.
A sample Snakefile, workflow config file, and workflow Step 1: Set up Kubernetes cluster with `minikube`.
params file are provided in the `test/` directory.
The workflow config file specifies which workflow targets Step 2: Install `byok8s`.
and input files to use.
The workflow parameters file specifies which parameters to Step 3: Run the `byok8s` workflow using the Kubernetes cluster.
use for the workflow steps.
## Why S3 buckets? Step 4: Tear down Kubernetes cluster with `minikube`.
AWS credentials and an S3 bucket is required to run workflows because
of restrictions on file I/O on nodes in a kubernes cluster. The Snakemake
workflows use AWS S3 buckets as remote providers for the Kubernetes nodes,
but this can be modified to any others that Snakemake supports.
AWS credentials are set with the two environment variables: ## Step 1: Set Up Virtual Kubernetes Cluster
For the purposes of the quickstart, we will walk
through how to set up a local, virtual Kubernetes
cluster using `minikube`.
Start by installing minikube:
``` ```
AWS_ACCESS_KEY_ID scripts/install_minikube.sh
AWS_SECRET_ACCESS_KEY
``` ```
These are passed into the Kubernetes cluster by byok8s and Snakemake. Once it is installed, you can start up a kubernetes cluster
with minikube using the following commands:
## Kubernetes and Minikube ```
cd test
minikube start
```
[Kubernetes](https://kubernetes.io/) is a technology that utilizes Docker NOTE: If you are running on AWS, run this command first
container to orchestrate a cluster of compute nodes. These compute nodes are
usually real compute nodes requested and managed via a cloud provider, like AWS
or Google Cloud.
But the compute nodes can also be virtual, which is where ```
[minikube](https://github.com/kubernetes/minikube) comes in. It creates a minikube config set vm-driver none
kubernetes cluster that is entirely local and virtual, which makes testing ```
easy. See the [byok8s Minikube Guide](kubernetes_minikube.md) for details
about how to use minikube with byok8s.
The Travis CI tests also utilize minikube to run test workflows. See [byok8s to set the the vm driver to none and use native Docker to run stuff.
Travis Tests](travis_tests.md) for more information.
## Cloud Providers If you are running on AWS, the DNS in the minikube
kubernetes cluster will not work, so run this command
to fix the DNS settings (should be run from the
`test/` directory):
For real workflows, your options for ```
kubernetes clusters are cloud providers. kubectl apply -f fixcoredns.yml
We have guides for the following: kubectl delete --all pods --namespace kube-system
```
- AWS EKS (Elastic Container Service)
- GCP GKE (Google Kuberntes Engine)
- Digital Ocean Kubernetes service
# Kubernetes + byok8s: In Practice ## Step 2: Install byok8s
| Cloud Provider | Kubernetes Service | Guide | State | Start by setting up a python virtual environment,
|-----------------------------|---------------------------------|-------------------------------------------------|------------| and install the required packages into the
| Minikube (on AWS EC2) | Minikube | [byok8s Minikube Guide](kubernetes_minikube.md) | Finished | virtual environment:
| Google Cloud Platform (GCP) | Google Container Engine (GKE) | [byok8s GCP GKE Guide](kubernetes_gcp.md) | Finished |
| Amazon Web Services (AWS) | Elastic Container Service (EKS) | [byok8s AWS EKS Guide](kubernetes_aws.md) | Unfinished | ```
| Digital Ocean (DO) | DO Kubernetes (DOK) | [byok8s DO DOK Guide](kubernetes_dok.md) | Unfinished | pip install -r requirements.txt
```
This installs snakemake and kubernetes Python
modules. Now install the `byok8s` command line
tool:
```
python setup.py build install
```
Now you can run:
```
which byok8s
```
and you should see `byok8s` in your virtual
environment's `bin/` directory.
This command line utility will expect a kubernetes
cluster to be set up before it is run.
Setting up a kubernetes cluster will create...
(fill in more info here)...
Snakemake will automatically create the pods
in the cluster, so you just need to allocate
a kubernetes cluster.
## Step 3: Run byok8s
Now you can run the workflow with the `byok8s` command.
This submits the Snakemake workflow jobs to the Kubernetes
cluster that minikube created.
You should have your workflow in a `Snakefile` in the
current directory. Use the `--snakefile` flag if it is
named something other than `Snakefile`.
You will also need to specify your AWS credentials
via the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`
environment variables. These are used to to access
S3 buckets for file I/O.
Finally, you will need to create an S3 bucket for
Snakemake to use for file I/O. Pass the name of the
bucket using the `--s3-bucket` flag.
Start by exporting these two vars (careful to
scrub them from bash history):
```
export AWS_ACCESS_KEY_ID=XXXXX
export AWS_SECRET_ACCESS_KEY=XXXXX
```
Run the alpha workflow with blue params:
```
byok8s --s3-bucket=mah-bukkit workflow-alpha params-blue
```
Run the alpha workflow with red params:
```
byok8s --s3-bucket=mah-bukkit workflow-alpha params-red
```
Run the gamma workflow with red params, &c:
```
byok8s --s3-bucket=mah-bukkit workflow-gamma params-red
```
(NOTE: May want to let the user specify
input and output directories with flags.)
All input files are searched for relative to the working
directory.
## Step 4: Tear Down Kubernetes Cluster
The last step once the workflow has been finished,
is to tear down the kubernetes cluster. The virtual
kubernetes cluster created by minikube can be torn
down with the following command:
```
minikube stop
```
# Using Kubernetes with Cloud Providers
| Cloud Provider | Kubernetes Service | Guide |
|-----------------------------|---------------------------------|----------------------------------------------|
| Minikube (on AWS EC2) | Minikube | [Minikube AWS Guide](kubernetes_minikube.md) |
| Google Cloud Platform (GCP) | Google Container Engine (GKE) | [GCP GKE Guide](kubernetes_gcp.md) |
| Amazon Web Services (AWS) | Elastic Container Service (EKS) | [AWS EKS Guide](kubernetes_aws.md) |
| Digital Ocean (DO) | DO Kubernetes (DOK) | [DO DOK Guide](kubernetes_dok.md) |
Own Kubernetes).

11
docs/css/custom.css

@ -1,11 +0,0 @@
.md-typeset h1 { font-weight: 600; }
.md-typeset h2 { font-weight: 600; }
.md-typeset h3 { font-weight: 600; }
.md-typeset h4 { font-weight: 600; }
body {
background-color: #FAFAFA;
}
div.body {
background-color: #FAFAFA;
}

38
docs/images/ship.svg

@ -1,38 +0,0 @@
<?xml version="1.0" encoding="iso-8859-1"?>
<!-- Generator: Adobe Illustrator 16.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" id="Capa_1" x="0px" y="0px" width="512px" height="512px" viewBox="0 0 612 612" style="enable-background:new 0 0 612 612;" xml:space="preserve">
<g>
<path d="M612,342.869l-72.243,150.559c-9.036,17.516-27.098,28.521-46.808,28.521H66.974c-7.85,0-12.942-8.277-9.402-15.285 l0.179-0.355c5.778-11.439,2.35-25.383-8.074-32.836l-0.589-0.422c-24.197-17.305-38.554-45.225-38.554-74.973v-34.141h379.228 v-0.211c0-11.52,9.338-20.857,20.856-20.857H612L612,342.869z M368.693,216.46h-73.738c-5.818,0-10.534,4.716-10.534,10.534 v115.875c0,5.818,4.716,10.535,10.534,10.535h73.738c5.817,0,10.534-4.717,10.534-10.535V226.994 C379.228,221.176,374.511,216.46,368.693,216.46z M495.102,258.596h-84.272c-5.817,0-10.534,4.716-10.534,10.534v42.135 c0,5.818,4.717,10.535,10.534,10.535h84.272c5.818,0,10.534-4.717,10.534-10.535V269.13 C505.636,263.312,500.92,258.596,495.102,258.596z M168.545,353.402h84.272c5.818,0,10.534-4.717,10.534-10.533v-84.273 c0-5.818-4.716-10.534-10.534-10.534h-84.272c-5.818,0-10.534,4.716-10.534,10.534v84.273 C158.012,348.686,162.728,353.402,168.545,353.402z M163.155,195.391l-26.211,21.069v136.942H31.602V216.46H0v-21.069h73.738 v-30.546H46.506v-12.296h27.232V90.051h10.534v62.498h27.233v12.296H84.272v30.546H163.155z M117.913,282.062h-34.28v31.457h34.28 V282.062z M117.913,231.651h-34.28v31.458h34.28V231.651z" fill="#FFFFFF"/>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 1.8 KiB

125
docs/index.md

@ -1,125 +0,0 @@
# 2019-snakemake-byok8s
[![travis](https://img.shields.io/travis/charlesreid1/2019-snakemake-byok8s.svg)](https://travis-ci.org/charlesreid1/2019-snakemake-byok8s)
[![license](https://img.shields.io/github/license/charlesreid1/2019-snakemake-byok8s.svg)](https://github.com/charlesreid1/2019-snakemake-byok8s/blob/master/LICENSE)
![minikube 0.32](https://img.shields.io/badge/minikube-%3E%3D0.32-blue.svg)
![k8s 0.12](https://img.shields.io/badge/kubernetes-%3E%3D0.12-blue.svg)
![ubuntu bionic](https://img.shields.io/badge/ubuntu_bionic-16.04-orange.svg)
![ubuntu xenial](https://img.shields.io/badge/ubuntu_xenial-18.04-orange.svg)
# Overview
This is an example of a Snakemake workflow that:
- is a **command line utility** called `byok8s`
- is bundled as an installable **Python package**
- is designed to run on a **Kubernetes (k8s) cluster**
- can be **tested with Travis CI** (and/or locally) using [minikube](https://github.com/kubernetes/minikube)
## What is byok8s?
byok8s = Bring Your Own Kubernetes (cluster)
k8s = kubernetes
byok8s is a command line utility that launches
a Snakemake workflow on an existing Kubernetes
cluster. This allows you to do something
like this (also see the [Installation](installing.md)
and [Quickstart](quickstart.md) guides in the
documentation):
```
# Install byok8s
python setup.py build install
# Create virtual k8s cluster
minikube start
# Run the workflow on the k8s cluster
cd /path/to/workflow/
byok8s my-workflowfile my-paramsfile --s3-bucket=my-bucket
# Clean up the virtual k8s cluster
minikube stop
```
## Getting Up and Running
See the [Quickstart Guide](quickstart.md) to get up and
running with byok8s.
## How does byok8s work?
The command line utility requires the user to provide
three input files:
* A snakemake workflow, via a `Snakefile`
* A workflow configuration file (JSON)
* A workflow parameters file (JSON)
Additionally, the user must create the following resources:
* A kubernetes cluster up and running
* An S3 bucket (and AWS credentials to read/write)
A sample Snakefile, workflow config file, and workflow
params file are provided in the `test/` directory.
The workflow config file specifies which workflow targets
and input files to use.
The workflow parameters file specifies which parameters to
use for the workflow steps.
## Why S3 buckets?
AWS credentials and an S3 bucket is required to run workflows because
of restrictions on file I/O on nodes in a kubernes cluster. The Snakemake
workflows use AWS S3 buckets as remote providers for the Kubernetes nodes,
but this can be modified to any others that Snakemake supports.
AWS credentials are set with the two environment variables:
```
AWS_ACCESS_KEY_ID
AWS_SECRET_ACCESS_KEY
```
These are passed into the Kubernetes cluster by byok8s and Snakemake.
## Kubernetes and Minikube
[Kubernetes](https://kubernetes.io/) is a technology that utilizes Docker
container to orchestrate a cluster of compute nodes. These compute nodes are
usually real compute nodes requested and managed via a cloud provider, like AWS
or Google Cloud.
But the compute nodes can also be virtual, which is where
[minikube](https://github.com/kubernetes/minikube) comes in. It creates a
kubernetes cluster that is entirely local and virtual, which makes testing
easy. See the [byok8s Minikube Guide](kubernetes_minikube.md) for details
about how to use minikube with byok8s.
The Travis CI tests also utilize minikube to run test workflows. See [byok8s
Travis Tests](travis_tests.md) for more information.
## Cloud Providers
For real workflows, your options for
kubernetes clusters are cloud providers.
We have guides for the following:
- AWS EKS (Elastic Container Service)
- GCP GKE (Google Kuberntes Engine)
- Digital Ocean Kubernetes service
# Kubernetes + byok8s: In Practice
| Cloud Provider | Kubernetes Service | Guide | State |
|-----------------------------|---------------------------------|-------------------------------------------------|------------|
| Minikube (on AWS EC2) | Minikube | [byok8s Minikube Guide](kubernetes_minikube.md) | Finished |
| Google Cloud Platform (GCP) | Google Container Engine (GKE) | [byok8s GCP GKE Guide](kubernetes_gcp.md) | Finished |
| Amazon Web Services (AWS) | Elastic Container Service (EKS) | [byok8s AWS EKS Guide](kubernetes_aws.md) | Unfinished |
| Digital Ocean (DO) | DO Kubernetes (DOK) | [byok8s DO DOK Guide](kubernetes_dok.md) | Unfinished |

114
docs/installing.md

@ -1,114 +0,0 @@
# Installing byok8s
byok8s requires two pieces of prerequisite software:
- python (conda)
- virtualenv (optional)
It also requires an AWS S3 bucket to be specified
(the bucket must exist and credentials to access it
must be provided via environment variables, see the
[Quickstart](quickstart.md)).
Additionally, if you are planning to run byok8s on
a local virtual kubernetes cluster, you must install:
- minikube
Otherwise, if you are planning on running byok8s on
remote kubernetes clusters provided by cloud providers,
you must install:
- kubernetes, ***OR***
- a cloud provider command line tool (`gcloud`, `aws`)
## Installing Python
We recommend installing pyenv and using pyenv
to install miniconda:
```plain
curl https://pyenv.run | bash
```
Restart your shell and install miniconda:
```plain
pyenv update
pyenv install miniconda3-4.3.30
pyenv global miniconda3-4.3.30
```
## Installing virtualenv
You will need the virtualenv package to
set up a virtual environment:
```plain
pip install virtualenv
```
## Installing minikube
This step is only required if you plan to run byok8s
kubernetes workflows locally on a virtual kubernetes
cluster (i.e., testing mode).
Install the 64-bit Linux version of minikube, or visit the
[installing minikube](https://kubernetes.io/docs/tasks/tools/install-minikube/)
to find the right version:
```plain
curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64 \
&& sudo install minikube-linux-amd64 /usr/local/bin/minikube
```
(On a Mac you can do `brew install minikube`.)
If you are planning on running on a bare metal
machine, you will also need to install a hypervisor
like VirtualBox or KVM, see [installing minikube](https://kubernetes.io/docs/tasks/tools/install-minikube/).
If you are planning on running minikube on a compute
node in the cloud, you cannot run a hypervisor, so you
will need to run using the native driver; see
[installing minikube](https://kubernetes.io/docs/tasks/tools/install-minikube/).
Once you have installed minikube, you do not need to
install kubernetes.
## Installing byok8s
Start by cloning the repo and installing byok8s:
```plain
cd
git clone https://github.com/charlesreid1/2019-snakemake-byok8s.git
cd ~/2019-snakemake-byok8s
```
Next, you'll create a virtual environment:
```plain
virtualenv vp
source vp/bin/activate
pip install -r requirements.txt
python setup.py build install
```
Now you should be ready to rock:
```
which byok8s
```
This will only be present when you have activated
your virtual environment. To activate/re-activate your
virtual environment:
```
cd ~/2019-snakemake-byok8s
source vp/bin/activate
```

4
docs/kubernetes_aws.md

@ -1,4 +0,0 @@
# Kubernetes on AWS
Check back soon for an EKS guide!

263
docs/kubernetes_gcp.md

@ -1,263 +0,0 @@
# Kubernetes on Google Cloud Platform
This document will walk you through how to start a kubernetes cluster using the
Google Kubernetes Engine (GKE) on Google Cloud Platform (GCP), run the byok8s
Snakemake workflow on the GKE kubernetes cluster, and tear down the cluster
when the workflow is complete.
## Setup
Before you can create a kubernetes cluster on Google Cloud,
you need a Google Cloud account and a Google Cloud project.
You can sign up for a Google Cloud account [here](https://cloud.google.com/).
You can create a new project from the [Google Cloud Console](https://console.cloud.google.com/).
New accounts start with 300 free hours specifically to let you
test drive features like GKE! Cool!
Once you have your account and your project, you can install
the `gcloud` Google Cloud SDK command line utility
(see [Google Cloud SDK Quickstart Guide](https://cloud.google.com/sdk/docs/quickstarts)).
Once you have installed the `gcloud` utility, you will need
to log in with your Google acount using the `init` command:
```
gcloud init
```
This will give you a link to enter into your browser, where
you will log in with your Google account and recieve a code to
copy and paste into the terminal.
The **Compute API** and **Kubernetes API** will both need to be
enabled as well. These can be enabled via the
[Google Cloud Console](https://console.cloud.google.com/)
(or read on).
If you aren't sure how to use the console to enable these APIs, just start
running the commands below to create a kubernetes cluster, and the gcloud
utility will let you know if it needs APIs enabled for actions. If it can't
enable the API for you, it will give you a direct link to the relevant Google
Cloud Console page.
## Google Kubernetes Engine
GKE uses Google Cloud compute nodes to run a kubernetes cluster
on Google Cloud infrastructure. It automatically sets up the
cluster for you, and allows you to use `kubectl` and `gcloud` to
manage and interact with the remote cluster.
Official Google link: <https://cloud.google.com/kubernetes-engine/>
## Quickstart
As mentioned, make sure your account credentials are initialized:
```
gcloud init
```
Create a new GKE cluster:
```
gcloud container clusters create $CLUSTER_NAME --num-nodes=$NODES --region=us-west1
```
The `--scopes storage-rw` flag is required if you plan to use Google
Cloud buckets instead of S3 buckets (not currently enabled in byok8s).
Next get configuration details about the cluster so your local
kubernetes controller can control the cluster:
```
gcloud container clusters get-credentials $CLUSTER_NAME
```
**This will take several minutes.**
The cluster should now be up and running and ready to rock:
```
$ kubectl get pods --namespace=kube-system
NAME READY STATUS RESTARTS AGE
event-exporter-v0.2.3-54f94754f4-5jczv 2/2 Running 0 4m
fluentd-gcp-scaler-6d7bbc67c5-hkllz 1/1 Running 0 4m
fluentd-gcp-v3.1.0-48pb2 2/2 Running 0 2m
fluentd-gcp-v3.1.0-58dpx 2/2 Running 0 2m
fluentd-gcp-v3.1.0-c4b49 2/2 Running 0 2m
fluentd-gcp-v3.1.0-h24m5 2/2 Running 0 2m
fluentd-gcp-v3.1.0-hbdj4 2/2 Running 0 2m
fluentd-gcp-v3.1.0-rfnmt 2/2 Running 0 2m
fluentd-gcp-v3.1.0-vwd8w 2/2 Running 0 2m
fluentd-gcp-v3.1.0-wxt79 2/2 Running 0 2m
fluentd-gcp-v3.1.0-xkt42 2/2 Running 0 2m
heapster-v1.5.3-bc9f6bfd5-7jhqs 3/3 Running 0 3m
kube-dns-788979dc8f-l7hch 4/4 Running 0 4m
kube-dns-788979dc8f-pts99 4/4 Running 0 3m
kube-dns-autoscaler-79b4b844b9-j48js 1/1 Running 0 4m
kube-proxy-gke-mycluster-default-pool-9ad2912e-130p 1/1 Running 0 4m
kube-proxy-gke-mycluster-default-pool-9ad2912e-lfpw 1/1 Running 0 4m
kube-proxy-gke-mycluster-default-pool-9ad2912e-rt9m 1/1 Running 0 4m
kube-proxy-gke-mycluster-default-pool-b44fa389-2ds8 1/1 Running 0 4m
kube-proxy-gke-mycluster-default-pool-b44fa389-hc66 1/1 Running 0 4m
kube-proxy-gke-mycluster-default-pool-b44fa389-vh3x 1/1 Running 0 4m
kube-proxy-gke-mycluster-default-pool-d58ee1e7-2kkw 1/1 Running 0 4m
kube-proxy-gke-mycluster-default-pool-d58ee1e7-3l6r 1/1 Running 0 4m
kube-proxy-gke-mycluster-default-pool-d58ee1e7-4w18 1/1 Running 0 4m
l7-default-backend-5d5b9874d5-ms75l 1/1 Running 0 4m
metrics-server-v0.2.1-7486f5bd67-2n6cn 2/2 Running 0 3m
```
Now assuming you have installed `byok8s` and it is located
at `~/2019-snakemake-byok8s/`, you can run the test workflow
on the kubernetes cluster:
```
# Return to our virtual environment
cd ~/2019-snakemake-byok8s/test/
source vp/bin/activate
# Export AWS keys for Snakemake
export AWS_ACCESS_KEY_ID="XXXXX"
export AWS_SECRET_ACCESS_KEY="XXXXX"
# Run byok8s
byok8s workflow-alpha params-blue --s3-bucket=mah-bukkit
```
Once the workflow has run successfully, the results will be written
to S3 buckets and all the kubernetes containers created by snakemake
will be gone.
If all goes well, you should see output like this:
```
$ byok8s --s3-bucket=mah-bukkit -f workflow-alpha params-blue
--------
details!
snakefile: /home/ubuntu/2019-snakemake-byok8s/test/Snakefile
config: /home/ubuntu/2019-snakemake-byok8s/test/workflow-alpha.json
params: /home/ubuntu/2019-snakemake-byok8s/test/params-blue.json
target: target1
k8s namespace: default
--------
Building DAG of jobs...
Using shell: /bin/bash
Provided cores: 1
Rules claiming more threads will be scaled down.
Job counts:
count jobs
1 target1
1
Resources before job selection: {'_cores': 1, '_nodes': 9223372036854775807}
Ready jobs (1):
target1
Selected jobs (1):
target1
Resources after job selection: {'_cores': 0, '_nodes': 9223372036854775806}
[Mon Jan 28 23:49:51 2019]
rule target1:
output: cmr-0123/alpha.txt
jobid: 0
echo alpha blue > cmr-0123/alpha.txt
Get status with:
kubectl describe pod snakejob-1ab52bdb-903b-5506-b712-ccc86772dc8d
kubectl logs snakejob-1ab52bdb-903b-5506-b712-ccc86772dc8d
Checking status for pod snakejob-1ab52bdb-903b-5506-b712-ccc86772dc8d
Checking status for pod snakejob-1ab52bdb-903b-5506-b712-ccc86772dc8d
Checking status for pod snakejob-1ab52bdb-903b-5506-b712-ccc86772dc8d
Checking status for pod snakejob-1ab52bdb-903b-5506-b712-ccc86772dc8d
Checking status for pod snakejob-1ab52bdb-903b-5506-b712-ccc86772dc8d
[Mon Jan 28 23:50:41 2019]
Finished job 0.
1 of 1 steps (100%) done
Complete log: /home/ubuntu/2019-snakemake-byok8s/test/.snakemake/log/2019-01-28T234950.253823.snakemake.log
unlocking
removing lock
removing lock
removed all locks
```
Congratulations! You'e just run an executable Snakemake workflow
on a Google Cloud kubernetes cluster!
You can get more information about the containers running each step of
the workflow using the `kubectl describe` commands printed in the output.
Here is an example:
```
$ kubectl describe pod snakejob-c91f804c-805a-56a2-b0ea-b3b74bc38001
Name: snakejob-c91f804c-805a-56a2-b0ea-b3b74bc38001
Namespace: default
Node: gke-mycluster-default-pool-b44fa389-vh3x/10.138.0.7
Start Time: Mon, 28 Jan 2019 23:55:18 -0800
Labels: app=snakemake
Annotations: <none>
Status: Running
IP: 10.0.6.4
Containers:
snakejob-c91f804c-805a-56a2-b0ea-b3b74bc38001:
Container ID: docker://2aaa04c34770c6088334b29c0332dc426aff2fbbd3a8af07b65bbbc2c5fe437d
Image: quay.io/snakemake/snakemake:v5.4.0
Image ID: docker-pullable://quay.io/snakemake/snakemake@sha256:f5bb7bef99c4e45cb7dfd5b55535b8dc185b43ca610341476378a9566a8b52c5
Port: <none>
Host Port: <none>
Command:
/bin/sh
Args:
-c
cp -rf /source/. . && snakemake cmr-0123/.zetaB1 --snakefile Snakefile --force -j --keep-target-files --keep-remote --latency-wait 0 --attempt 1 --force-use-threads --wrapper-prefix None --config 'name='"'"'blue'"'"'' -p --nocolor --notemp --no-hooks --nolock --default-remote-provider S3 --default-remote-prefix cmr-0123 --allowed-rules target3sleepyB1
State: Running
Started: Mon, 28 Jan 2019 23:56:15 -0800
Ready: True
Restart Count: 0
Requests:
cpu: 0
Environment:
AWS_ACCESS_KEY_ID: <set to the key 'aws_access_key_id' in secret 'e077a45f-1274-4a98-a76c-d1a9718707db'> Optional: false
AWS_SECRET_ACCESS_KEY: <set to the key 'aws_secret_access_key' in secret 'e077a45f-1274-4a98-a76c-d1a9718707db'> Optional: false
Mounts:
/source from source (rw)
/var/run/secrets/kubernetes.io/serviceaccount from default-token-jmnv4 (ro)
Conditions:
Type Status
Initialized True
Ready True
PodScheduled True
Volumes:
source:
Type: Secret (a volume populated by a Secret)
SecretName: e077a45f-1274-4a98-a76c-d1a9718707db
Optional: false
workdir:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
default-token-jmnv4:
Type: Secret (a volume populated by a Secret)
SecretName: default-token-jmnv4
Optional: false
QoS Class: BestEffort
Node-Selectors: <none>
Tolerations: node.kubernetes.io/not-ready:NoExecute for 300s
node.kubernetes.io/unreachable:NoExecute for 300s
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Scheduled 63s default-scheduler Successfully assigned snakejob-c91f804c-805a-56a2-b0ea-b3b74bc38001 to gke-mycluster-default-pool-b44fa389-vh3x
Normal SuccessfulMountVolume 63s kubelet, gke-mycluster-default-pool-b44fa389-vh3x MountVolume.SetUp succeeded for volume "workdir"
Normal SuccessfulMountVolume 63s kubelet, gke-mycluster-default-pool-b44fa389-vh3x MountVolume.SetUp succeeded for volume "default-token-jmnv4"
Normal SuccessfulMountVolume 63s kubelet, gke-mycluster-default-pool-b44fa389-vh3x MountVolume.SetUp succeeded for volume "source"
Normal Pulling 61s kubelet, gke-mycluster-default-pool-b44fa389-vh3x pulling image "quay.io/snakemake/snakemake:v5.4.0"
Normal Pulled 10s kubelet, gke-mycluster-default-pool-b44fa389-vh3x Successfully pulled image "quay.io/snakemake/snakemake:v5.4.0"
Normal Created 6s kubelet, gke-mycluster-default-pool-b44fa389-vh3x Created container
Normal Started 6s kubelet, gke-mycluster-default-pool-b44fa389-vh3x Started container
```
Delete the GKE cluster when you are done:
```
gcloud container clusters delete $CLUSTER_NAME
```

115
docs/kubernetes_minikube.md

@ -1,115 +0,0 @@
# Running byok8s with minikube
## Installing
See the [Installing](installing.md) page for details
about installing byok8s and its prerequisites
(including minikube).
We cover two scenarios:
- bare metal machine, i.e., a laptop or desktop machine
that can run a hypervisor like VirtualBox
- cloud machine, i.e., AWS EC2 node, which is itself a
virtual machine and cannot run a hypervisor
These quickstarts assume you have Python and minikube
installed, and that you have cloned and installed byok8s
at `~/2019-snakemake-byok8s/`.
## Quickstart on Bare Metal Machine
On a bare metal machine, the procedure is
relatively uncomplicated: we create a cluster,
we export some variables, we run the workflow,
we tear down the cluster:
```plain
# Start a minikube cluster
minikube start
# Verify k8s is running
minikube status
# Export AWS credentials
export AWS_ACCESS_KEY_ID="XXXXX"
export AWS_SECRET_ACCESS_KEY="XXXXX"
# Run the workflow
byok8s workflow-alpha params-blue --s3-bucket=mah-bukkit
# Stop the minikube cluster
minikube stop
```
## Quickstart on Cloud Machine
As mentioned above, cloud compute nodes are virtual machines
themselves and cannot run a hypervisor, so things are a bit
more complicated.
To tell minikube not to use a virtual machine driver,
run the following command in a terminal to create
a minikube config file:
```
cat <<'EOF' > ~/.minikube/config/config.json
{
"vm-driver": "none"
}
EOF
```
Now you can start up a minikube cluster.
There is an additional DNS problem that needs to be fixed
in the containers before you proceed. You will know there
is a problem if you run the `get pods` command with
`kubectl` and see your CoreDNS containers in a
`CrashLoopBackOff` state:
```text
$ kubectl get pods --namespace=kube-system
NAME READY STATUS RESTARTS AGE
coredns-86c58d9df4-lvq8b 0/1 CrashLoopBackOff 5 5m17s
coredns-86c58d9df4-pr52t 0/1 CrashLoopBackOff 5 5m17s
... ... ... ... ...
```
To fix the problem with the DNS settings, we have to patch
the CoreDNS image being used by `kube-system`.
To do that, use the file
[`test/fixcoredns.yml`](https://github.com/charlesreid1/2019-snakemake-byok8s/blob/master/test/fixcoredns.yml)
in this repository with `kubectl apply`:
```plain
# Fix the DNS container
kubectl apply -f fixcoredns.yml
# Delete all kube-system containers
kubectl delete --all pods --namespace kube-system
```
The kube-system containers will be re-spawned by the cluster control system.
It should happen in a few seconds, and then you'll be ready to run byok8s:
```
# Return to our virtual environment
cd ~/2019-snakemake-byok8s/test/
source vp/bin/activate
# Verify k8s is running
minikube status
# Export AWS keys for Snakemake
export AWS_ACCESS_KEY_ID="XXXXX"
export AWS_SECRET_ACCESS_KEY="XXXXX"
# Run byok8s
byok8s workflow-alpha params-blue --s3-bucket=mah-bukkit
```
Congratulations! You've just run an executable Snakemake workflow
on a minikube kubernetes cluster.

155
docs/quickstart.md

@ -1,155 +0,0 @@
# Quickstart
This runs through the installation and usage
of `2019-snakemake-byok8s`.
Step 1: Set up Kubernetes cluster with `minikube`.
Step 2: Install `byok8s`.
Step 3: Run the `byok8s` workflow using the Kubernetes cluster.
Step 4: Tear down Kubernetes cluster with `minikube`.
## Step 1: Set Up Virtual Kubernetes Cluster
For the purposes of the quickstart, we will walk
through how to set up a local, virtual Kubernetes
cluster using `minikube`.
Start by installing minikube:
```
scripts/install_minikube.sh
```
Once it is installed, you can start up a kubernetes cluster
with minikube using the following commands:
```
cd test
minikube start
```
NOTE: If you are running on AWS, run this command first
```
minikube config set vm-driver none
```
to set the the vm driver to none and use native Docker to run stuff.
If you are running on AWS, the DNS in the minikube
kubernetes cluster will not work, so run this command
to fix the DNS settings (should be run from the
`test/` directory):
```
kubectl apply -f fixcoredns.yml
kubectl delete --all pods --namespace kube-system
```
## Step 2: Install byok8s
Start by setting up a python virtual environment,
and install the required packages into the
virtual environment:
```
pip install -r requirements.txt
```
This installs snakemake and kubernetes Python
modules. Now install the `byok8s` command line
tool:
```
python setup.py build install
```
Now you can run:
```
which byok8s
```
and you should see `byok8s` in your virtual
environment's `bin/` directory.
This command line utility will expect a kubernetes
cluster to be set up before it is run.
Setting up a kubernetes cluster will create...
(fill in more info here)...
Snakemake will automatically create the pods
in the cluster, so you just need to allocate
a kubernetes cluster.
## Step 3: Run byok8s
Now you can run the workflow with the `byok8s` command.
This submits the Snakemake workflow jobs to the Kubernetes
cluster that minikube created.
You should have your workflow in a `Snakefile` in the
current directory. Use the `--snakefile` flag if it is
named something other than `Snakefile`.
You will also need to specify your AWS credentials
via the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`
environment variables. These are used to to access
S3 buckets for file I/O.
Finally, you will need to create an S3 bucket for
Snakemake to use for file I/O. Pass the name of the
bucket using the `--s3-bucket` flag.
Start by exporting these two vars (careful to
scrub them from bash history):
```
export AWS_ACCESS_KEY_ID=XXXXX
export AWS_SECRET_ACCESS_KEY=XXXXX
```
Run the alpha workflow with blue params:
```
byok8s --s3-bucket=mah-bukkit workflow-alpha params-blue
```
Run the alpha workflow with red params:
```
byok8s --s3-bucket=mah-bukkit workflow-alpha params-red
```
Run the gamma workflow with red params, &c:
```
byok8s --s3-bucket=mah-bukkit workflow-gamma params-red
```
(NOTE: May want to let the user specify
input and output directories with flags.)
All input files are searched for relative to the working
directory.
## Step 4: Tear Down Kubernetes Cluster
The last step once the workflow has been finished,
is to tear down the kubernetes cluster. The virtual
kubernetes cluster created by minikube can be torn
down with the following command:
```
minikube stop
```

5
docs/travis_tests.md

@ -1,5 +0,0 @@
# Travis Tests with Minikube
This page is in progress; see this post
on the <https://charlesreid1.github.io> blog for info:
[Building Snakemake Command Line Wrappers for Kubernetes Workflows](https://charlesreid1.github.io/building-snakemake-command-line-wrappers-for-kubernetes-workflows.html).

6
kubernetes_aws.md

@ -0,0 +1,6 @@
# Kubernetes on AWS
## Elastic Container Service
## Quickstart

4
docs/kubernetes_dok.md → kubernetes_dok.md

@ -1,9 +1,11 @@
# Kubernetes on Digital Ocean # Kubernetes on Digital Ocean
Check back soon for a Digital Ocean kubernetes guide! ## Digital Ocean Kubernetes
(Use web interface to set up a Kubernetes cluster, (Use web interface to set up a Kubernetes cluster,
then use `kubectl` to connect with Digital Ocean then use `kubectl` to connect with Digital Ocean
via Digital Ocean credentials.) via Digital Ocean credentials.)
## Quickstart
[link](https://www.digitalocean.com/docs/kubernetes/how-to/connect-with-kubectl/) [link](https://www.digitalocean.com/docs/kubernetes/how-to/connect-with-kubectl/)

7
kubernetes_gcp.md

@ -0,0 +1,7 @@
# Kubernetes on Google Cloud Platform
## Google Container Engine
## Quickstart

6
kubernetes_minikube.md

@ -0,0 +1,6 @@
# Minikube on AWS EC2 Nodes
## Quickstart

1
mkdocs-material-dib

@ -1 +0,0 @@
Subproject commit 745d13f187711bc43865dcb44f21a010689d27ac

42
mkdocs.yml

@ -1,42 +0,0 @@
site_name: 2019-snakemake-byok8s
site_url: https://charlesreid1.github.io/2019-snakemake-byok8s
repo_name: 2019-snakemake-byok8s
repo_url: https://github.com/charlesreid1/2019-snakemake-byok8s
edit_uri: ""
copyright: 'Copyright &copy; 2018 <a href="https://charlesreid1.com">Charles Reid</a>, released under the <a href="https://opensource.org/licenses/MIT">MIT license</a> <br /><br />
<div>Icon made by Freepik, obtained from <a href="https://www.flaticon.com/" title="Flaticon">www.flaticon.com</a>, used under a <a href="http://creativecommons.org/licenses/by/3.0/" title="Creative Commons BY 3.0" target="_blank">CC 3.0 BY</a></div> license.'
docs_dir: docs
site_dir: site
extra_css:
- css/custom.css
theme:
name: null
custom_dir: 'mkdocs-material-dib/material'
palette:
primary: 'blue'
accent: 'blue'
logo: 'images/ship.svg'
font:
text: 'Roboto'
code: 'Roboto Mono'
nav:
- 'Index': 'index.md'
- 'Installing': 'installing.md'
- 'Quickstart': 'quickstart.md'
- 'K8s with Minikube' : 'kubernetes_minikube.md'
- 'K8s with GCP' : 'kubernetes_gcp.md'
- 'K8s with AWS' : 'kubernetes_aws.md'
- 'K8s with DigitalOcean' : 'kubernetes_dok.md'
- 'Travis Tests': 'travis_tests.md'
# Extensions
markdown_extensions:
- admonition
- codehilite:
guess_lang: false
- toc:
permalink: true
strict: true

1
requirements-docs.txt

@ -1 +0,0 @@
mkdocs>=1.0
Loading…
Cancel
Save