Skip to content

Commit

Permalink
chore: install all extras in Dockerfile (Unstructured-IO#419)
Browse files Browse the repository at this point in the history
* Adds step to install all extras
* Adds smoke test of wikipedia ingest to validate in CI
  • Loading branch information
ryannikolaidis authored Mar 30, 2023
1 parent 32c79ca commit 59785e4
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ jobs:
- name: Test AMD image
run: |
DOCKER_PLATFORM="linux/amd64" DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:amd-$SHORT_SHA" make docker-test
IMAGE_NAME=$DOCKER_BUILD_REPOSITORY:amd-$SHORT_SHA make docker-smoke-test
- name: Push AMD image
run: |
# write to the build repository to cache for the publish-images job
Expand Down Expand Up @@ -80,6 +81,7 @@ jobs:
run: |
# only run a subset of tests on ARM, since they take a long time with emulation
DOCKER_PLATFORM="linux/arm64" DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:arm-$SHORT_SHA" make docker-test TEST_NAME=partition/test_text.py
IMAGE_NAME=$DOCKER_BUILD_REPOSITORY:arm-$SHORT_SHA make docker-smoke-test
- name: Push ARM image
run: |
# write to the build repository to cache for the publish-images job
Expand Down
7 changes: 7 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ RUN python3.8 -m pip install pip==${PIP_VERSION} && \
pip install --no-cache -r requirements/test.txt && \
pip install --no-cache -r requirements/huggingface.txt && \
pip install --no-cache -r requirements/dev.txt && \
pip install --no-cache -r requirements/ingest-azure.txt && \
pip install --no-cache -r requirements/ingest-github.txt && \
pip install --no-cache -r requirements/ingest-gitlab.txt && \
pip install --no-cache -r requirements/ingest-google-drive.txt && \
pip install --no-cache -r requirements/ingest-reddit.txt && \
pip install --no-cache -r requirements/ingest-s3.txt && \
pip install --no-cache -r requirements/ingest-wikipedia.txt && \
pip install --no-cache -r requirements/local-inference.txt && \
pip install --no-cache "detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2"

Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -203,3 +203,7 @@ docker-test:
-v ${CURRENT_DIR}/test_unstructured:/home/test_unstructured \
$(DOCKER_IMAGE) \
bash -c "pytest $(if $(TEST_NAME),-k $(TEST_NAME),) test_unstructured"

.PHONY: docker-smoke-test
docker-smoke-test:
./scripts/docker-smoke-test.sh
45 changes: 45 additions & 0 deletions scripts/docker-smoke-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash

# Start the containerized repository and run ingest tests

# shellcheck disable=SC2317 # Shellcheck complains that trap functions are unreachable...

set -eux -o pipefail

CONTAINER_NAME=unstructured-smoke-test
IMAGE_NAME="${IMAGE_NAME:-unstructured:latest}"

# Change to the root of the repository
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
cd "$SCRIPT_DIR"/.. || exit 1

start_container() {
echo Starting container "$CONTAINER_NAME"
docker run -dt --rm --name "$CONTAINER_NAME" "$IMAGE_NAME"
}

await_container() {
echo Waiting for container to start
until [ "$(docker inspect -f '{{.State.Status}}' $CONTAINER_NAME)" == "running" ]; do
sleep 1
done
}

stop_container() {
echo Stopping container "$CONTAINER_NAME"
docker stop "$CONTAINER_NAME"
}

start_container

# Regardless of test result, stop the container
trap stop_container EXIT

await_container

# Run the tests
docker cp test_unstructured_ingest $CONTAINER_NAME:/home
docker exec "$CONTAINER_NAME" /bin/bash -c "/home/test_unstructured_ingest/test-ingest-wikipedia.sh"

result=$?
exit $result

0 comments on commit 59785e4

Please sign in to comment.