Skip to content

Commit

Permalink
Add docker build CICD (#5)
Browse files Browse the repository at this point in the history
Adding fondant build for custom component to github actions.
  • Loading branch information
mrchtr authored Nov 14, 2023
1 parent 576b6de commit b09a974
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 5 deletions.
36 changes: 36 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Build dev images

on:
push:
branches:
- main
workflow_dispatch:

jobs:
docker:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: Set buildx alias
run: docker buildx install

- name: Login to GitHub Container Registry
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Install fondant
run: |
pip install --upgrade pip
pip install fondant[docker]@git+https://github.com/ml6team/fondant
- name: Build components
run: ./scripts/build_components.sh -r ghcr.io -n ml6team -t $GITHUB_SHA --label org.opencontainers.image.source=https://github.com/ml6team/fondant-usecase-RAG
File renamed without changes.
63 changes: 63 additions & 0 deletions scripts/build_components.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/bin/bash
set -e

function usage {
echo "Usage: $0 [options]"
echo "Options:"
echo " -t, --tag <value> Tag to add to image
The tag is set in the component specifications"
echo " -d, --components-dir <value> Directory containing components to build as subdirectories.
The path should be relative to the root directory (default:src/components)"
echo " -r, --registry <value> The container registry prefix to use e.g. ghcr(default: null (DockerHub))"
echo " -n, --namespace <value> The registry namespace for the built images (default: ml6team)"
echo " -co, --component <value> Specific component to build. Pass the component subdirectory name(s) to build
certain component(s) or 'all' to build all components in the components
directory (default: all)"
echo " -r, --repo <value> Set the repo (default: ml6team/fondant-usecase-RAG)"
echo " -l, --label <value> Set a container label, repeatable
(e.g. org.opencontainers.image.source=https://github.com/ml6team/fondant-usecase-RAG)"
echo " -h, --help Display this help message"
}

# Parse the arguments
while [[ "$#" -gt 0 ]]; do case $1 in
-r |--registry) registry="$2"; shift;;
-n |--namespace) namespace="$2"; shift;;
-d |--components-dir ) components_dir="$2"; shift;;
-r |--repo) repo="$2"; shift;;
-t |--tag) tag=("$2"); shift;;
-co|--component) components+=("$2"); shift;;
-h |--help) usage; exit;;
-l |--label) labels+=("$2"); shift;;
*) echo "Unknown parameter passed: $1"; exit 1;;
esac; shift; done

# Set default values for optional arguments if not passed
components_dir="${components_dir:-src/components}"
namespace="${namespace:-ml6team}"

# Get the component directory
scripts_dir=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
root_dir=$(dirname "$scripts_dir")
components_dir=$root_dir/$components_dir

# Determine the components to build
# Only directories that contains a Dockerfile will be considered for the component build
for dir in "$components_dir"/*/; do
# Check if a Dockerfile exists in the current subdirectory
if [ -f "$dir/Dockerfile" ]; then
components_to_build+=("$dir")
fi
done

# Loop through all subdirectories
for dir in "${components_to_build[@]}"; do
pushd "$dir"
BASENAME=${dir%/}
BASENAME=${BASENAME##*/}

full_image_name=${registry}/${namespace}/${BASENAME}:${tag}
echo "Tagging image as $full_image_name"
fondant build $dir -t $full_image_name --nocache ${labels[@]/#/--label }
popd
done
2 changes: 1 addition & 1 deletion src/components/text_cleaning/fondant_component.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: Text cleaning component
description: Clean text passages
image: text-cleaning-component:latest
image: ghcr.io/ml6team/text_cleaning:dev

consumes:
text:
Expand Down
6 changes: 4 additions & 2 deletions src/components/text_cleaning/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
logger = logging.getLogger(__name__)


class TextCleaningComponent(PandasTransformComponent):
class TextCleaningComponent(PandasTransformComponent):
def __init__(self, *_):
"""Initialize your component"""

Expand All @@ -16,5 +16,7 @@ def remove_empty_lines(self, text):
return "\n".join(non_empty_lines)

def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
dataframe[("text", "data")] = dataframe[("text", "data")].apply(lambda x: self.remove_empty_lines)
dataframe[("text", "data")] = dataframe[("text", "data")].apply(
lambda x: self.remove_empty_lines
)
return dataframe
4 changes: 2 additions & 2 deletions src/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


pipeline = Pipeline(
pipeline_name="ingestion-pipeline",
pipeline_name="ingestion-pipeline",
pipeline_description="Pipeline to prepare and process \
data for building a RAG solution",
base_path="./data-dir", # The demo pipelines uses a local \
Expand Down Expand Up @@ -43,7 +43,7 @@
name="index_weaviate",
arguments={
"weaviate_url": "http://host.docker.internal:8080",
"class_name": "index"
"class_name": "index",
},
)

Expand Down

0 comments on commit b09a974

Please sign in to comment.