Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .aws-architecture
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
linux/arm64
{
"gpu": "linux/amd64",
"cpu": "linux/arm64"
}
135 changes: 97 additions & 38 deletions .github/workflows/dev-build.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
### This is the Terraform-generated dev-build.yml workflow for the ###
### timdex-embeddings-dev app repository. ###
### If this is a Lambda repo, uncomment the FUNCTION line at the end of ###
### the document. If the container requires any additional pre-build ###
### commands, uncomment and edit the PREBUILD line at the end of the ###
### document. ###
### This is a custom dev-build.yml workflow for the ###
### timdex-embeddings-dev app repository. It is customized in order to ###
### support parallel builds for amd64 and/or arm64 for AWS Batch compute ###
### environments that have GPUs and compute environments that do NOT have ###
### GPUs. ###


name: Dev Container Build and Deploy
on:
Expand All @@ -13,48 +13,107 @@ on:
- main
paths-ignore:
- '.github/**'
- 'docs/**'
- 'tests/**'
- 'README.md'

permissions:
id-token: write
contents: read

env:
AWS_REGION: "us-east-1"
GHA_ROLE: "timdex-embeddings-gha-dev"
REPOSITORY: "timdex-embeddings-dev"

jobs:
prep:
name: Prep for Build
choose-runners:
name: Determine Runner architecture from .aws-architecture file
runs-on: ubuntu-latest
outputs:
cpuarch: ${{ steps.setarch.outputs.cpuarch }}
outputs:
gpu_arch: ${{ steps.out.outputs.gpu_arch }}
cpu_arch: ${{ steps.out.outputs.cpu_arch }}
steps:
- uses: actions/checkout@v5

- id: out
run: |
GPU_ARCH=$(jq -r '.gpu // "linux/amd64"' .aws-architecture)
CPU_ARCH=$(jq -r '.cpu // "linux/amd64"' .aws-architecture)
echo "gpu_arch=$GPU_ARCH" >> $GITHUB_OUTPUT
echo "cpu_arch=$CPU_ARCH" >> $GITHUB_OUTPUT

build:
name: Build ${{ matrix.variant }} (${{ matrix.arch }})
needs: choose-runners
strategy:
fail-fast: false
matrix:
include:
- variant: gpu
arch: ${{ needs.choose-runners.outputs.gpu_arch }}
- variant: cpu
arch: ${{ needs.choose-runners.outputs.cpu_arch }}
runs-on: ${{ matrix.arch == 'linux/arm64' && 'ubuntu-24.04-arm' || 'ubuntu-latest' }}

steps:
- name: Checkout
- name: Checkout code
uses: actions/checkout@v5

- name: Set CPU Architecture
id: setarch
- name: Set Tags
id: tags
run: |
echo "### :abacus: Architecture Selection" >> $GITHUB_STEP_SUMMARY
if [[ -f .aws-architecture ]]; then
ARCH=$(cat .aws-architecture)
echo "\`$ARCH\` was read from \`.aws-architecture\` and passed to the deploy job." >> $GITHUB_STEP_SUMMARY
KEY=${{ matrix.variant }}
ARCH=${{ matrix.arch }}
TAG_ARCH=$(echo "$ARCH" | cut -d'/' -f2)
if [ "$GITHUB_EVENT_NAME" = "workflow_dispatch" ]; then
TAG_SHA=$(echo $GITHUB_SHA | cut -c 1-8)
TAG_PR=$GITHUB_EVENT_NAME
else
ARCH="linux/amd64"
echo "No \`.aws-architecture\` file, so default \`$ARCH\` was passed to the deploy job." >> $GITHUB_STEP_SUMMARY
TAG_SHA=$(echo ${{ github.event.pull_request.head.sha }} | cut -c 1-8)
TAG_PR="PR-${{ github.event.pull_request.number }}"
fi
if [[ "$ARCH" != "linux/arm64" && "$ARCH" != "linux/amd64" ]]; then
echo "$ARCH is INVALID architecture!"
echo "$ARCH is INVALID architecture!" >> $GITHUB_STEP_SUMMARY
exit 1
fi
echo "cpuarch=$ARCH" >> $GITHUB_OUTPUT

deploy:
needs: prep
name: Dev Deploy
uses: mitlibraries/.github/.github/workflows/ecr-multi-arch-deploy-dev.yml@main
secrets: inherit
with:
AWS_REGION: "us-east-1"
GHA_ROLE: "timdex-embeddings-gha-dev"
ECR: "timdex-embeddings-dev"
CPU_ARCH: ${{ needs.prep.outputs.cpuarch }}
# FUNCTION: ""
# PREBUILD:
echo "arch=${ARCH}" >> $GITHUB_OUTPUT
echo "tag_latest=latest-${TAG_ARCH}-${KEY}" >> $GITHUB_OUTPUT
echo "tag_sha=${TAG_SHA}-${TAG_ARCH}-${KEY}" >> $GITHUB_OUTPUT
echo "tag_pr=${TAG_PR}-${TAG_ARCH}-${KEY}" >> $GITHUB_OUTPUT

- name: Free Disk Space
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf "$AGENT_TOOLSDIRECTORY"

- name: Configure AWS Dev credentials
uses: aws-actions/configure-aws-credentials@v5
with:
aws-region: ${{ env.AWS_REGION }}
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCT_DEV }}:role/${{ env.GHA_ROLE }}

- name: Login to Dev ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2

- name: Setup BuildX Builder
id: buildx
uses: docker/setup-buildx-action@v3
with:
driver: docker-container

- name: Build and push (${{ matrix.variant }})
uses: docker/build-push-action@v6
with:
builder: ${{ steps.buildx.outputs.name }}
cache-from: type=gha
cache-to: type=gha,mode=max
context: .
file: ./Dockerfile-${{ matrix.variant }}
pull: true
push: true
sbom: false
provenance: false
tags: |
${{ steps.login-ecr.outputs.registry }}/${{ env.REPOSITORY }}:${{ steps.tags.outputs.tag_latest }}
${{ steps.login-ecr.outputs.registry }}/${{ env.REPOSITORY }}:${{ steps.tags.outputs.tag_sha }}
${{ steps.login-ecr.outputs.registry }}/${{ env.REPOSITORY }}:${{ steps.tags.outputs.tag_pr }}
56 changes: 0 additions & 56 deletions .github/workflows/prod-deploy.yml

This file was deleted.

129 changes: 90 additions & 39 deletions .github/workflows/stage-build.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
### This is the Terraform-generated stage-build.yml workflow for the ###
### timdex-embeddings-stage app repository. ###
### If this is a Lambda repo, uncomment the FUNCTION line at the end of ###
### the document. If the container requires any additional pre-build ###
### commands, uncomment and edit the PREBUILD line at the end of the ###
### document. ###
### This is a custom stage-build.yml workflow for the ###
### timdex-embeddings-stage app repository. It is customized in order to ###
### support parallel builds for amd64 and/or arm64 for AWS Batch compute ###
### environments that have GPUs and compute environments that do NOT have ###
### GPUs. ###

name: Stage Container Build and Deploy
on:
Expand All @@ -13,47 +12,99 @@ on:
- main
paths-ignore:
- '.github/**'
- 'docs/**'
- 'tests/**'
- 'README.md'

permissions:
id-token: write
contents: read

env:
AWS_REGION: "us-east-1"
GHA_ROLE: "timdex-embeddings-gha-stage"
REPOSITORY: "timdex-embeddings-stage"

jobs:
prep:
name: Prep for Build
choose-runners:
name: Determine Runner architecture from .aws-architecture file
runs-on: ubuntu-latest
outputs:
cpuarch: ${{ steps.setarch.outputs.cpuarch }}
outputs:
gpu_arch: ${{ steps.out.outputs.gpu_arch }}
cpu_arch: ${{ steps.out.outputs.cpu_arch }}
steps:
- uses: actions/checkout@v5

- id: out
run: |
GPU_ARCH=$(jq -r '.gpu // "linux/amd64"' .aws-architecture)
CPU_ARCH=$(jq -r '.cpu // "linux/amd64"' .aws-architecture)
echo "gpu_arch=$GPU_ARCH" >> $GITHUB_OUTPUT
echo "cpu_arch=$CPU_ARCH" >> $GITHUB_OUTPUT

build:
name: Build ${{ matrix.variant }} (${{ matrix.arch }})
needs: choose-runners
strategy:
fail-fast: false
matrix:
include:
- variant: gpu
arch: ${{ needs.choose-runners.outputs.gpu_arch }}
- variant: cpu
arch: ${{ needs.choose-runners.outputs.cpu_arch }}
runs-on: ${{ matrix.arch == 'linux/arm64' && 'ubuntu-24.04-arm' || 'ubuntu-latest' }}

steps:
- name: Checkout
- name: Checkout code
uses: actions/checkout@v5

- name: Set CPU Architecture
id: setarch
- name: Set Tags
id: tags
run: |
echo "### :abacus: Architecture Selection" >> $GITHUB_STEP_SUMMARY
if [[ -f .aws-architecture ]]; then
ARCH=$(cat .aws-architecture)
echo "\`$ARCH\` was read from \`.aws-architecture\` and passed to the deploy job." >> $GITHUB_STEP_SUMMARY
else
ARCH="linux/amd64"
echo "No \`.aws-architecture\` file, so default \`$ARCH\` was passed to the deploy job." >> $GITHUB_STEP_SUMMARY
fi
if [[ "$ARCH" != "linux/arm64" && "$ARCH" != "linux/amd64" ]]; then
echo "$ARCH is INVALID architecture!"
echo "$ARCH is INVALID architecture!" >> $GITHUB_STEP_SUMMARY
exit 1
fi
echo "cpuarch=$ARCH" >> $GITHUB_OUTPUT

deploy:
needs: prep
name: Stage Deploy
uses: mitlibraries/.github/.github/workflows/ecr-multi-arch-deploy-stage.yml@main
secrets: inherit
with:
AWS_REGION: "us-east-1"
GHA_ROLE: "timdex-embeddings-gha-stage"
ECR: "timdex-embeddings-stage"
CPU_ARCH: ${{ needs.prep.outputs.cpuarch }}
# PREBUILD:
KEY=${{ matrix.variant }}
ARCH=${{ matrix.arch }}
TAG_ARCH=$(echo "$ARCH" | cut -d'/' -f2)
TAG_SHA=$(echo $GITHUB_SHA | cut -c 1-8)
echo "arch=${ARCH}" >> $GITHUB_OUTPUT
echo "tag_latest=latest-${TAG_ARCH}-${KEY}" >> $GITHUB_OUTPUT
echo "tag_sha=${TAG_SHA}-${TAG_ARCH}-${KEY}" >> $GITHUB_OUTPUT

- name: Free Disk Space
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf "$AGENT_TOOLSDIRECTORY"

- name: Configure AWS Stage credentials
uses: aws-actions/configure-aws-credentials@v5
with:
aws-region: ${{ env.AWS_REGION }}
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCT_STAGE }}:role/${{ env.GHA_ROLE }}

- name: Login to Stage ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2

- name: Setup BuildX Builder
id: buildx
uses: docker/setup-buildx-action@v3
with:
driver: docker-container

- name: Build and push (${{ matrix.variant }})
uses: docker/build-push-action@v6
with:
builder: ${{ steps.buildx.outputs.name }}
cache-from: type=gha
cache-to: type=gha,mode=max
context: .
file: ./Dockerfile-${{ matrix.variant }}
pull: true
push: true
sbom: false
provenance: false
tags: |
${{ steps.login-ecr.outputs.registry }}/${{ env.REPOSITORY }}:${{ steps.tags.outputs.tag_latest }}
${{ steps.login-ecr.outputs.registry }}/${{ env.REPOSITORY }}:${{ steps.tags.outputs.tag_sha }}
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ repos:
types: ["python"]
- id: pip-audit
name: pip-audit
entry: uv run pip-audit
entry: uv run pip-audit --ignore-vuln CVE-2025-2953 --ignore-vuln CVE-2025-3730
language: system
pass_filenames: false
6 changes: 3 additions & 3 deletions Dockerfile → Dockerfile-cpu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.12-slim
FROM public.ecr.aws/deep-learning-containers/pytorch-inference-arm64:2.6.0-cpu-py312-ubuntu22.04-ec2

RUN apt-get update && \
apt-get install -y --no-install-recommends git ca-certificates && \
Expand All @@ -10,10 +10,10 @@ ENV UV_SYSTEM_PYTHON=1
WORKDIR /app

# Copy project metadata
COPY pyproject.toml uv.lock* ./
COPY pyproject.toml ./

# Install package into system python
RUN uv pip install --system .
RUN uv pip install --group dlc_arm64_cpu --system .
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, what is the effect of this if the list of dependencies in pyproject.toml is empty? 🤔

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great question.

The short answer, is that the empty list has no effect.

The longer answer is that originally during this work I was thinking the CPU and GPU Docker images may need different dependencies. Turns out that was not the case, and we could rely on the torch version in each base image.

But I figured leaving this structure in place couldn't hurt in the event we do eventually want to support different dependencies for CPU vs GPU. Dependencies are heavy and tricky for ML work, and I think we'd be well served to assume there will be easily handle fairly frequent and finicky dependency juggling to perform.


# Copy CLI application
COPY embeddings ./embeddings
Expand Down
Loading