Skip to content

Commit

Permalink
Merge pull request #4 from QGreenland-Net/run-parsl-on-adc-cluster
Browse files Browse the repository at this point in the history
WIP set up process to run the Parsl script as a k8s Job
  • Loading branch information
mfisher87 authored Apr 15, 2024
2 parents ded1760 + 5919362 commit 53dd96a
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 42 deletions.
47 changes: 47 additions & 0 deletions .github/workflows/build-and-publish-container-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: "Build and publish container image"

on:
push:
paths:
- "Dockerfile"
- "environment.yml"
branches:
- "main"
tags:
- "v[0-9]+.[0-9]+.[0-9]+*"


jobs:

build-and-release-image:
name: "Build and release container image"
runs-on: "ubuntu-latest"
env:
# IMAGE_NAME: "${{ github.repo_name_or_something_like_that }}"
# NOTE: It's important that the image name matches org name / repo name.
# TODO: Calculate image name?
IMAGE_NAME: "qgreenland-net/parsl-exploration"
# GitHub Actions expressions don't have great conditional support, so
# writing a ternary expression looks a lot like bash. In Python, this
# would read as:
# github.ref_name if github.ref_type == 'tag' else 'latest'
# https://docs.github.com/en/actions/learn-github-actions/expressions
IMAGE_TAG: "${{ github.ref_type == 'tag' && github.ref_name || 'latest' }}"
steps:
- name: "Check out repository"
uses: "actions/checkout@v3"

- name: "Build container image"
run: |
docker build --tag "ghcr.io/${IMAGE_NAME}:${IMAGE_TAG}" .
- name: "GHCR login"
uses: "docker/login-action@v2"
with:
registry: "ghcr.io"
username: "${{ github.repository_owner }}"
password: "${{ secrets.GITHUB_TOKEN }}"

- name: "Push to GHCR"
run: |
docker push "ghcr.io/${IMAGE_NAME}:${IMAGE_TAG}"
8 changes: 8 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM mambaorg/micromamba:1.5.8 AS micromamba

COPY --chown=$MAMBA_USER:$MAMBA_USER . .

RUN micromamba install --yes --name "base" --file "environment.yml"
RUN micromamba clean --all --yes

# ENV PATH "/opt/conda/bin:${PATH}"
7 changes: 2 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,8 @@ workers need to be able to connect back to the host running the Parsl program. I
behind a firewall you don't control, this may not be possible!

The workaround we're using is to submit a Kubernetes Job that runs the Parsl init
program from a ConfigMap. See `hello-world-job.yml` for a generic example of this.

Run it with `kubectl apply -f hello-world-job.yml`.

We haven't yet got this working with Parsl, but that's the next step!
program from a ConfigMap. See `run-on-remote-cluster.sh` and `job.yml` for an
example of this.


## Troubleshooting
Expand Down
34 changes: 10 additions & 24 deletions hello-world-job.yml → job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ spec:
# TODO: when completions is 1, parallelism must be 1, but do we need to specify it?
parallelism: 1
completions: 1
# TODO: Supported in k8s 1.23, but ADC has 1.22; this would be nice :)
# ttlSecondsAfterFinished: 60
template:
metadata:
name: "parsl-init"
Expand All @@ -14,38 +16,22 @@ spec:
# MountVolume.SetUp failed for volume "kube-api-access-xxxxx" : object "qgnet"/"kube-root-ca.crt" not registered
automountServiceAccountToken: false
volumes:
- name: "parsl-init-scripts-volume"
- name: "parsl-init-script-volume"
configMap:
name: "parsl-init-scripts"
name: "parsl-init-script"
containers:
- name: "parsl-init"
image: "python"
volumeMounts:
- mountPath: "/parsl-init-scripts"
name: "parsl-init-scripts-volume"
- mountPath: "/parsl-init-script"
name: "parsl-init-script-volume"
env:
# TODO: Do we need this?
- name: "HOME"
value: "/tmp"
command:
- "python"
- "/parsl-init-scripts/run.py" # Filename from ConfigMap
- "bash"
- "-c"
# TODO: Bake parsl into an image
- "pip install parsl && python /parsl-init-script/run.py" # << Filename from ConfigMap
restartPolicy: "Never"

---

apiVersion: "v1"
# TODO: WHY? Just express ConfigMap alone at the top level?
kind: "List"
items:
- apiVersion: "v1"
kind: "ConfigMap"
data:
run.py: |
print("I'm the Python script (run.py)!")
print("Hello world :|")
metadata:
creationTimestamp: null
name: "parsl-init-scripts"
# TODO: WHY? Omit?
metadata: {}
16 changes: 16 additions & 0 deletions run-on-remote-cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -euo pipefail

# Send our Parsl init script to the cluster. This will update the ConfigMap if
# there are any changes. Note that "age" represents the time since the
# ConfigMap was created, not since it was last updated.
kubectl create configmap parsl-init-script --from-file run.py \
-o yaml --dry-run=client \
| kubectl apply -f -

# Submit a "Job" to the cluster which runs our script
# TODO: Should we delete any pre-existing job? We're manually doing `kubectl delete` now.
kubectl apply -f job.yml


# TODO: Can we also attach to monitor `kubectl describe job` or something?
24 changes: 11 additions & 13 deletions run.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
"""Example parsl workflow to be executed on kubernetes."""
"""Example parsl workflow to be executed on kubernetes.
TODO:
* Less printing more logging
"""

import subprocess

Expand Down Expand Up @@ -28,16 +33,8 @@ def get_k8s_context() -> str:

context = result.stdout.decode("utf8").strip()

assert context in ("rancher-desktop", "dev-qgnet")
if context == "dev-qgnet":
raise NotImplementedError(
"Running on the 'dev-qgnet' namespace fails due to container"
" communication issues. Symptom: This script hangs. Remove this check from"
" the code to re-test."
)

print(f"Detected context: {context}")

assert context in ("rancher-desktop", "dev-qgnet")
return context


Expand All @@ -62,13 +59,14 @@ def get_parsl_config():
cores_per_worker=1,
max_workers_per_node=1,
worker_logdir_root="/tmp/",
# Address for the pod worker to connect back
address=address_by_route(),
# Address for the pod worker to connect back to the "interchange"
address="8.44.147.13",
# address=address_by_route(),
# https://parsl.readthedocs.io/en/stable/stubs/parsl.providers.KubernetesProvider.html#parsl.providers.KubernetesProvider
provider=KubernetesProvider(
namespace=k8s_namespace,
# Docker image url to use for pods
image="python",
image="gchr.io/mbjones/k8sparsl:0.3",
# Command to be run upon pod start, such as:
# "module load Anaconda; source activate parsl_env".
# or "pip install parsl"
Expand Down

0 comments on commit 53dd96a

Please sign in to comment.