From 0efc7360396637f163fb231ff5d647cc6bc53959 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Fri, 12 Apr 2024 17:18:34 -0600 Subject: [PATCH 1/5] WIP set up process to run the Parsl script as a k8s Job Currently we're running in to an error mounting the config map which didn't happen on the parent commit which used a config map to run a hello world Python script. We have no idea why, but it's the end of the day! --- README.md | 7 ++----- hello-world-job.yml => job.yml | 34 ++++++++++------------------------ run-on-remote-cluster.sh | 16 ++++++++++++++++ run.py | 24 +++++++++++------------- 4 files changed, 39 insertions(+), 42 deletions(-) rename hello-world-job.yml => job.yml (56%) create mode 100755 run-on-remote-cluster.sh diff --git a/README.md b/README.md index bf45851..759ceb3 100644 --- a/README.md +++ b/README.md @@ -59,11 +59,8 @@ workers need to be able to connect back to the host running the Parsl program. I behind a firewall you don't control, this may not be possible! The workaround we're using is to submit a Kubernetes Job that runs the Parsl init -program from a ConfigMap. See `hello-world-job.yml` for a generic example of this. - -Run it with `kubectl apply -f hello-world-job.yml`. - -We haven't yet got this working with Parsl, but that's the next step! +program from a ConfigMap. See `run-on-remote-cluster.sh` and `job.yml` for an +example of this. ## Troubleshooting diff --git a/hello-world-job.yml b/job.yml similarity index 56% rename from hello-world-job.yml rename to job.yml index 086ed27..21c1bec 100644 --- a/hello-world-job.yml +++ b/job.yml @@ -6,6 +6,8 @@ spec: # TODO: when completions is 1, parallelism must be 1, but do we need to specify it? parallelism: 1 completions: 1 + # TODO: Supported in k8s 1.23, but ADC has 1.22; this would be nice :) + # ttlSecondsAfterFinished: 60 template: metadata: name: "parsl-init" @@ -14,38 +16,22 @@ spec: # MountVolume.SetUp failed for volume "kube-api-access-xxxxx" : object "qgnet"/"kube-root-ca.crt" not registered automountServiceAccountToken: false volumes: - - name: "parsl-init-scripts-volume" + - name: "parsl-init-script-volume" configMap: - name: "parsl-init-scripts" + name: "parsl-init-script" containers: - name: "parsl-init" image: "python" volumeMounts: - - mountPath: "/parsl-init-scripts" - name: "parsl-init-scripts-volume" + - mountPath: "/parsl-init-script" + name: "parsl-init-script-volume" env: # TODO: Do we need this? - name: "HOME" value: "/tmp" command: - - "python" - - "/parsl-init-scripts/run.py" # Filename from ConfigMap + - "bash" + - "-c" + # TODO: Bake parsl into an image + - "pip install parsl && python /parsl-init-script/run.py" # << Filename from ConfigMap restartPolicy: "Never" - ---- - -apiVersion: "v1" -# TODO: WHY? Just express ConfigMap alone at the top level? -kind: "List" -items: -- apiVersion: "v1" - kind: "ConfigMap" - data: - run.py: | - print("I'm the Python script (run.py)!") - print("Hello world :|") - metadata: - creationTimestamp: null - name: "parsl-init-scripts" -# TODO: WHY? Omit? -metadata: {} diff --git a/run-on-remote-cluster.sh b/run-on-remote-cluster.sh new file mode 100755 index 0000000..35006c0 --- /dev/null +++ b/run-on-remote-cluster.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Send our Parsl init script to the cluster. This will update the ConfigMap if +# there are any changes. Note that "age" represents the time since the +# ConfigMap was created, not since it was last updated. +kubectl create configmap parsl-init-script --from-file run.py \ + -o yaml --dry-run=client \ + | kubectl apply -f - + +# Submit a "Job" to the cluster which runs our script +# TODO: Should we delete any pre-existing job? We're manually doing `kubectl delete` now. +kubectl apply -f job.yml + + +# TODO: Can we also attach to monitor `kubectl describe job` or something? diff --git a/run.py b/run.py index ce9a902..d08a964 100644 --- a/run.py +++ b/run.py @@ -1,4 +1,9 @@ -"""Example parsl workflow to be executed on kubernetes.""" +"""Example parsl workflow to be executed on kubernetes. + +TODO: + +* Less printing more logging +""" import subprocess @@ -28,16 +33,8 @@ def get_k8s_context() -> str: context = result.stdout.decode("utf8").strip() - assert context in ("rancher-desktop", "dev-qgnet") - if context == "dev-qgnet": - raise NotImplementedError( - "Running on the 'dev-qgnet' namespace fails due to container" - " communication issues. Symptom: This script hangs. Remove this check from" - " the code to re-test." - ) - print(f"Detected context: {context}") - + assert context in ("rancher-desktop", "dev-qgnet") return context @@ -62,13 +59,14 @@ def get_parsl_config(): cores_per_worker=1, max_workers_per_node=1, worker_logdir_root="/tmp/", - # Address for the pod worker to connect back - address=address_by_route(), + # Address for the pod worker to connect back to the "interchange" + address="8.44.147.13", + # address=address_by_route(), # https://parsl.readthedocs.io/en/stable/stubs/parsl.providers.KubernetesProvider.html#parsl.providers.KubernetesProvider provider=KubernetesProvider( namespace=k8s_namespace, # Docker image url to use for pods - image="python", + image="gchr.io/mbjones/k8sparsl:0.3", # Command to be run upon pod start, such as: # "module load Anaconda; source activate parsl_env". # or "pip install parsl" From 466a520bb726522d8f4ea84edeb6ca2d6e541754 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Mon, 15 Apr 2024 10:41:24 -0600 Subject: [PATCH 2/5] Add Dockerfile and GitHub Actions build --- .../build-and-publish-container-image.yml | 45 +++++++++++++++++++ Dockerfile | 8 ++++ 2 files changed, 53 insertions(+) create mode 100644 .github/workflows/build-and-publish-container-image.yml create mode 100644 Dockerfile diff --git a/.github/workflows/build-and-publish-container-image.yml b/.github/workflows/build-and-publish-container-image.yml new file mode 100644 index 0000000..47a97ce --- /dev/null +++ b/.github/workflows/build-and-publish-container-image.yml @@ -0,0 +1,45 @@ +name: "Build and publish container image" + +on: + push: + paths: + - "Dockerfile" + - "environment.yml" + branches: + - "main" + tags: + - "v[0-9]+.[0-9]+.[0-9]+*" + + +jobs: + + build-and-release-image: + name: "Build and release container image" + runs-on: "ubuntu-latest" + env: + # IMAGE_NAME: "${{ github.repo_name_or_something_like_that }}" + IMAGE_NAME: "parsl-exploration" + # GitHub Actions expressions don't have great conditional support, so + # writing a ternary expression looks a lot like bash. In Python, this + # would read as: + # github.ref_name if github.ref_type == 'tag' else 'latest' + # https://docs.github.com/en/actions/learn-github-actions/expressions + IMAGE_TAG: "${{ github.ref_type == 'tag' && github.ref_name || 'latest' }}" + steps: + - name: "Check out repository" + uses: "actions/checkout@v3" + + - name: "Build container image" + run: | + docker build --tag "ghcr.io/${IMAGE_NAME}:${IMAGE_TAG}" . + + - name: "GHCR login" + uses: "docker/login-action@v2" + with: + registry: "ghcr.io" + username: "${{ github.repository_owner }}" + password: "${{ secrets.GITHUB_TOKEN }}" + + - name: "Push to GHCR" + run: | + docker push "ghcr.io/${IMAGE_NAME}:${IMAGE_TAG}" diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..029f96a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,8 @@ +FROM mambaorg/micromamba:1.5.8 AS micromamba + +COPY --chown=$MAMBA_USER:$MAMBA_USER . . + +RUN micromamba install --yes --name "base" --file "environment.yml" +RUN micromamba clean --all --yes + +# ENV PATH "/opt/conda/bin:${PATH}" From fded01a54790f7219e23afa3669425b928586465 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Mon, 15 Apr 2024 10:42:29 -0600 Subject: [PATCH 3/5] REVERT ME: Allow build on this branch --- .github/workflows/build-and-publish-container-image.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-and-publish-container-image.yml b/.github/workflows/build-and-publish-container-image.yml index 47a97ce..2ccbd4c 100644 --- a/.github/workflows/build-and-publish-container-image.yml +++ b/.github/workflows/build-and-publish-container-image.yml @@ -2,11 +2,12 @@ name: "Build and publish container image" on: push: - paths: - - "Dockerfile" - - "environment.yml" + # paths: + # - "Dockerfile" + # - "environment.yml" branches: - "main" + - "run-parsl-on-adc-cluster" tags: - "v[0-9]+.[0-9]+.[0-9]+*" From 10dfe1eba606545bce3d6c765bbf6aca23ceff3d Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Mon, 15 Apr 2024 10:50:53 -0600 Subject: [PATCH 4/5] Fix image name --- .github/workflows/build-and-publish-container-image.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-and-publish-container-image.yml b/.github/workflows/build-and-publish-container-image.yml index 2ccbd4c..623838e 100644 --- a/.github/workflows/build-and-publish-container-image.yml +++ b/.github/workflows/build-and-publish-container-image.yml @@ -19,7 +19,9 @@ jobs: runs-on: "ubuntu-latest" env: # IMAGE_NAME: "${{ github.repo_name_or_something_like_that }}" - IMAGE_NAME: "parsl-exploration" + # NOTE: It's important that the image name matches org name / repo name. + # TODO: Calculate image name? + IMAGE_NAME: "qgreenland-net/parsl-exploration" # GitHub Actions expressions don't have great conditional support, so # writing a ternary expression looks a lot like bash. In Python, this # would read as: From 5919362d9c2c77560fa3a25af4bb240b627c3337 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Mon, 15 Apr 2024 10:53:17 -0600 Subject: [PATCH 5/5] Revert "REVERT ME: Allow build on this branch" This reverts commit fded01a54790f7219e23afa3669425b928586465. --- .github/workflows/build-and-publish-container-image.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-and-publish-container-image.yml b/.github/workflows/build-and-publish-container-image.yml index 623838e..9435ed5 100644 --- a/.github/workflows/build-and-publish-container-image.yml +++ b/.github/workflows/build-and-publish-container-image.yml @@ -2,12 +2,11 @@ name: "Build and publish container image" on: push: - # paths: - # - "Dockerfile" - # - "environment.yml" + paths: + - "Dockerfile" + - "environment.yml" branches: - "main" - - "run-parsl-on-adc-cluster" tags: - "v[0-9]+.[0-9]+.[0-9]+*"