Skip to content

Commit

Permalink
[nightly] Create Nightly Pipeline, make docker-nightly-publish.yml & …
Browse files Browse the repository at this point in the history
…integration.yml more modular (#2628)

Co-authored-by: Siddharth Venkatesan <siddhave@amazon.com>
  • Loading branch information
HappyAmazonian and siddvenk authored Dec 19, 2024
1 parent eaa70af commit 3aebeb5
Show file tree
Hide file tree
Showing 8 changed files with 189 additions and 79 deletions.
84 changes: 42 additions & 42 deletions .github/workflows/docker-nightly-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,19 @@ on:
type: string
required: true
default: 'nightly'
schedule:
- cron: '0 13 * * *'
outputs:
djl_version:
description: "djl version"
value: ${{ jobs.nightly-build.outputs.djl_version }}

permissions:
id-token: write
contents: read

env:
AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp"
DOCKER_HUB_REPO: "deepjavalibrary/djl-serving"

jobs:
create-runners:
runs-on: [ self-hosted, scheduler ]
Expand Down Expand Up @@ -96,6 +102,7 @@ jobs:
cpu_instance_id_5: ${{ steps.create_cpu_5.outputs.action_cpu_instance_id }}
cpu_instance_id_6: ${{ steps.create_cpu_6.outputs.action_cpu_instance_id }}
graviton_instance_id_1: ${{ steps.create_graviton_1.outputs.action_graviton_instance_id }}

nightly-build:
needs: create-runners
strategy:
Expand All @@ -122,6 +129,8 @@ jobs:
- RUN_ID-${{ github.run_id }}
- RUN_NUMBER-${{ github.run_number }}
- SHA-${{ github.sha }}
outputs:
djl_version: ${{ steps.get-versions.outputs.DJL_VERSION }}
steps:
- name: Clean disk space
run: |
Expand All @@ -130,11 +139,6 @@ jobs:
/usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \
$AGENT_TOOLSDIRECTORY
- uses: actions/checkout@v4
- name: Login to Docker
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
- name: install awscli
run: |
sudo apt-get update
Expand All @@ -160,35 +164,8 @@ jobs:
SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)
echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV
echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV
- name: Build serving package for nightly
if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }}
run: |
./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot
- name: Build and push nightly docker image
if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }}
working-directory: serving/docker
run: |
export NIGHTLY="-nightly"
docker compose build --no-cache \
--build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \
--build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \
${{ matrix.containers.name }}
docker compose push ${{ matrix.containers.name }}
- name: Build and push temp image
if: ${{ inputs.mode == 'temp' }}
working-directory: serving/docker
run: |
export NIGHTLY="-nightly"
docker compose build --no-cache \
--build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \
--build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \
${{ matrix.containers.name }}
repo="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp"
aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo
tempTag="$repo:${{ matrix.containers.name }}-${GITHUB_SHA}"
docker tag deepjavalibrary/djl-serving:${{ matrix.containers.name }}-nightly $tempTag
docker push $tempTag
- name: Build and push release docker image
echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_OUTPUT
- name: Build release candidate docker image
if: ${{ inputs.mode == 'release' }}
working-directory: serving/docker
run: |
Expand All @@ -197,14 +174,37 @@ jobs:
docker compose build --no-cache \
--build-arg djl_version=${{ env.DJL_VERSION }} \
--build-arg djl_serving_version=${{ env.SERVING_VERSION }} \
${{ matrix.containers.name }}
docker compose push ${{ matrix.containers.name }}
- name: Retag image for release
if: ${{ matrix.containers.name == 'cpu' && inputs.mode == 'release' }}
${{ matrix.containers.name }}
- name: Build temp docker image
if: ${{ inputs.mode == '' || inputs.mode == 'temp' || inputs.mode == 'nightly' }}
run: |
./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot
cd serving/docker
export NIGHTLY="-nightly"
echo "NIGHTLY=$NIGHTLY" >> $GITHUB_ENV
docker compose build --no-cache \
--build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \
--build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \
${{ matrix.containers.name }}
- name: Tag and push temp image to ECR repo
working-directory: serving/docker
run: |
docker tag deepjavalibrary/djl-serving:${{ env.SERVING_VERSION }} deepjavalibrary/djl-serving:latest
docker push deepjavalibrary/djl-serving:latest
ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}')
aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}}
mode=${{ inputs.mode }}
if [ "${{ inputs.mode }}" == "release" ]; then
mode=${{ env.DJL_VERSION }}
fi
tempRunIdTag="${{ env.AWS_ECR_REPO }}:${{ matrix.containers.name }}-$mode-${GITHUB_RUN_ID}"
tempCommitTag="${{ env.AWS_ECR_REPO }}:${{ matrix.containers.name }}-$mode-${GITHUB_SHA}"
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.containers.name }}${{ env.NIGHTLY }} $tempRunIdTag
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.containers.name }}${{ env.NIGHTLY }} $tempCommitTag
if ${{ inputs.mode == 'nightly' }}; then
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.containers.name }}${{ env.NIGHTLY }} ${{ env.AWS_ECR_REPO }}:${{ matrix.containers.name }}-nightly
fi
time docker push --all-tags ${{ env.AWS_ECR_REPO }}
stop-runners:
if: always()
runs-on: [ self-hosted, scheduler ]
Expand Down
34 changes: 23 additions & 11 deletions .github/workflows/docker_publish.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Build and push docker nightly to temp ECR repo
name: Publish docker nightly to dockerhub & staging ECR repo

on:
workflow_dispatch:
Expand All @@ -11,20 +11,31 @@ on:
options:
- nightly
- release
commit_sha:
description: 'specify which sha value the image was built with.'
required: false
default: ''
workflow_call:
inputs:
mode:
description: 'release/nightly, default is nightly'
type: string
required: true
default: 'nightly'
commit_sha:
type: string
description: 'specify which sha value the image aws built with.'
required: false
default: ''

permissions:
id-token: write
contents: read

env:
AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp"
AWS_TMP_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp"
AWS_STAGING_ECR_REPO: "125045733377.dkr.ecr.us-east-1.amazonaws.com/djl-serving"
ECR_REPO_REGION: "us-east-1"

jobs:
create-aarch64-runner:
Expand Down Expand Up @@ -66,25 +77,26 @@ jobs:
with:
role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving
aws-region: us-east-1
- name: Login to ECR
run: |
aws ecr get-login-password --region ${{ env.ECR_REPO_REGION }} | docker login --username AWS --password-stdin ${{ env.AWS_TMP_ECR_REPO }}
aws ecr get-login-password --region ${{ env.ECR_REPO_REGION }} | docker login --username AWS --password-stdin ${{ env.AWS_STAGING_ECR_REPO }}
- name: Get DJL Version
run: |
DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ./gradle/libs.versions.toml)
echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV
- name: Pull and sync to docker hub
working-directory: serving/docker
run: |
DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml)
ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}')
aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}}
./scripts/pull_and_retag.sh $DJL_VERSION deepjavalibrary/djl-serving ${{ inputs.mode }}
./scripts/push_image_from_ECR.sh $DJL_VERSION deepjavalibrary/djl-serving ${{ inputs.mode }} ${{ inputs.commit_sha }}
- name: Pull and sync to ECR
working-directory: serving/docker
run: |
DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml)
repo="125045733377.dkr.ecr.us-east-1.amazonaws.com/djl-serving"
aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo
./scripts/pull_and_retag.sh $DJL_VERSION $repo ${{ inputs.mode }}
./scripts/push_image_from_ECR.sh $DJL_VERSION $AWS_STAGING_ECR_REPO ${{ inputs.mode }} ${{ inputs.commit_sha }}
- name: Retag image for release latest
if: ${{ inputs.mode == 'release' }}
working-directory: serving/docker
run: |
DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml)
docker tag deepjavalibrary/djl-serving:${DJL_VERSION} deepjavalibrary/djl-serving:latest
docker push deepjavalibrary/djl-serving:latest
- name: Clean docker env
Expand Down
65 changes: 57 additions & 8 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,33 @@ on:
workflow_dispatch:
inputs:
djl-version:
description: 'The released version of DJL'
description: 'The released version of DJL.'
required: false
default: ''
schedule:
- cron: '0 15 * * *'
tag-suffix:
description: 'Run tests on the specific tags suffix i.e. arch-{suffix}'
required: false
type: string
default: 'nightly'
workflow_call:
inputs:
djl-version:
description: 'The released version of DJL.'
required: false
type: string
default: 'nightly'
tag-suffix:
description: 'Run tests on the specific tags suffix i.e. arch-{suffix}'
required: false
type: string
default: ''

permissions:
id-token: write
contents: read

env:
AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp"

jobs:
create-runners:
Expand Down Expand Up @@ -151,6 +172,10 @@ jobs:
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: install awscli
run: |
sudo apt-get update
sudo apt-get install awscli -y
- name: Set up Python3
if: ${{ matrix.test.instance != 'aarch64' }}
uses: actions/setup-python@v5
Expand All @@ -175,12 +200,22 @@ jobs:
wget https://publish.djl.ai/awscurl/awscurl
chmod +x awscurl
mkdir outputs
- name: Configure AWS Credentials
if: matrix.test.instance == 'ubuntu-latest'
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving
aws-region: us-east-1
- name: Test
working-directory: tests/integration
env:
TEST_DJL_VERSION: ${{ inputs.djl-version }}
OVERRIDE_IMAGE_TAG_SUFFIX: ${{ inputs.tag-suffix }}
IMAGE_REPO: ${{ env.AWS_ECR_REPO }}
run: |
python -m pytest -k ${{ matrix.test.test }} tests.py
ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}')
aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}}
python -m pytest -s -k ${{ matrix.test.test }} tests.py
- name: Cleanup
working-directory: tests/integration
run: |
Expand Down Expand Up @@ -224,11 +259,25 @@ jobs:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests numpy pillow wheel
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }}
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving
aws-region: us-east-1
- name: Download models and dockers
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
if [ "${{ github.event.inputs.djl-version }}" == "temp" ]; then
DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}:pytorch-inf2-temp-${GITHUB_SHA}"
elif [ -n "${{ inputs.tag-suffix }}" ]; then
DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}:pytorch-inf2-${{ inputs.tag-suffix }}"
else
DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}:pytorch-inf2-nightly"
fi
echo "DOCKER_IMAGE_URI=$DOCKER_IMAGE_URI" >>$GITHUB_ENV
ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}')
aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}}
echo $DOCKER_IMAGE_URI
docker pull $DOCKER_IMAGE_URI
- name: Run djl_python unit/integration tests on container
working-directory: engines/python/setup
run: |
Expand All @@ -241,7 +290,7 @@ jobs:
-v $PWD/:/opt/ml/model/ \
-w /opt/ml/model \
--device=/dev/neuron0:/dev/neuron0 \
deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG \
$DOCKER_IMAGE_URI \
/bin/bash -c "'pip install /opt/ml/model/dist/*.whl pytest' && \
pytest djl_python/tests/neuron_test_scripts/ | tee logs/results.log"
Expand Down
20 changes: 11 additions & 9 deletions .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ on:
permissions:
id-token: write
contents: read

jobs:
build:
uses: ./.github/workflows/docker-nightly-publish.yml
Expand All @@ -41,22 +41,24 @@ jobs:
- name: get_image_tag_suffix
id: get_image_tag_suffix
run: |
if ${{ inputs.mode == 'nightly'}}; then
test_image_tag_suffix='nightly'
fi
if ${{ inputs.mode == 'release'}}; then
test_image_tag_suffix='${{ needs.build.outputs.djl_version}}-${GITHUB_RUN_ID}'
if [[ "${{ inputs.mode }}" == "nightly" ]]; then
echo "test_image_tag_suffix=nightly" >> $GITHUB_OUTPUT
elif [[ "${{ inputs.mode }}" == "release" ]]; then
echo "test_image_tag_suffix=${{ needs.build.outputs.djl_version }}-${GITHUB_RUN_ID}" >> $GITHUB_OUTPUT
else
echo "Invalid mode specified"
exit 1
fi
echo "test_image_tag_suffix=$test_image_tag_suffix" >> $GITHUB_OUTPUT
integration-test:
needs: [get_image_tag_suffix]
uses: ./.github/workflows/integration.yml
secrets: inherit
with:
tag-suffix:: ${{ needs.get_image_tag_suffix.outputs.test_image_tag_suffix }}
tag-suffix: ${{ needs.get_image_tag_suffix.outputs.test_image_tag_suffix }}
publish:
needs: [integration-test, get_image_tag_suffix]
uses: ./.github/workflows/docker_publish.yml
secrets: inherit
with:
mode: ${{ inputs.mode }}
mode: ${{ inputs.mode }}
commit_sha: ${{ github.sha }}
1 change: 1 addition & 0 deletions serving/docker/scripts/pull_and_retag.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env bash
# for djl-serving/.github/workflows/nightly-docker-ecr-sync.yml

version=$1
repo=$2
Expand Down
Loading

0 comments on commit 3aebeb5

Please sign in to comment.