StacklokLabs · aponcedeleonch · Jan 23, 2026 · Jan 23, 2026 · Jan 23, 2026 · Jan 23, 2026
diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml
@@ -10,19 +10,28 @@ jobs:
   code_quality:
     name: Code Quality
     uses: ./.github/workflows/code-quality.yml
+
+  # Download models once, before image build
+  download_models:
+    name: Download Models
+    uses: ./.github/workflows/download-models.yml
+
   image_build:
     name: Build Docker Image
     uses: ./.github/workflows/image-build.yml
-    needs: code_quality
+    needs: [code_quality, download_models]
+
   # Will use the cached layers from image_build job
   integration_tests:
     name: Integration Tests
     uses: ./.github/workflows/integration-tests.yml
     needs: image_build
+
   offline_tests:
     name: Offline Mode Tests
     uses: ./.github/workflows/offline-tests.yml
     needs: image_build
+
   mcp_tef_integration_tests:
     name: MCP TEF Integration Tests
     uses: ./.github/workflows/mcp-tef-integration-tests.yml

diff --git a/.github/workflows/download-models.yml b/.github/workflows/download-models.yml
@@ -0,0 +1,49 @@
+name: Download Models
+
+on:
+  workflow_call:
+
+permissions:
+  contents: read
+
+jobs:
+  download:
+    name: Download ML Models
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Cache ML models
+        id: cache-models
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        with:
+          path: models/
+          # Cache key based on model versions - bump suffix to invalidate
+          key: ml-models-v1-fastembed-bge-small-tiktoken-cl100k-llmlingua2
+
+      - name: Install uv
+        if: steps.cache-models.outputs.cache-hit != 'true'
+        uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0
+        with:
+          enable-cache: true
+          python-version: '3.13'
+
+      - name: Install Task
+        if: steps.cache-models.outputs.cache-hit != 'true'
+        uses: arduino/setup-task@b91d5d2c96a56797b48ac1e0e89220bf64044611 # v2.0.0
+        with:
+          version: 3.44.1
+
+      - name: Download models
+        if: steps.cache-models.outputs.cache-hit != 'true'
+        run: task download-models
+
+      - name: Upload models artifact
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: ml-models
+          path: models/
+          retention-days: 1
+          if-no-files-found: error
diff --git a/.github/workflows/image-build.yml b/.github/workflows/image-build.yml
@@ -10,14 +10,20 @@ jobs:
   build:
     name: Build Docker Image
     runs-on: ubuntu-latest
-    
+
     steps:
     - name: Checkout code
       uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-
+
+    - name: Download models artifact
+      uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
+      with:
+        name: ml-models
+        path: models/
+
     - name: Set up Docker Buildx
       uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0
-      
+
     - name: Build Docker image
       uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
       with:

diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
@@ -15,6 +15,12 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
 
+      - name: Download models artifact
+        uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
+        with:
+          name: ml-models
+          path: models/
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0
 

diff --git a/.github/workflows/mcp-tef-integration-tests.yml b/.github/workflows/mcp-tef-integration-tests.yml
@@ -21,6 +21,12 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
 
+      - name: Download models artifact
+        uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
+        with:
+          name: ml-models
+          path: models/
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0
 

diff --git a/.github/workflows/offline-tests.yml b/.github/workflows/offline-tests.yml
@@ -15,6 +15,12 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
 
+      - name: Download models artifact
+        uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
+        with:
+          name: ml-models
+          path: models/
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0
 

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -9,6 +9,7 @@ jobs:
   code_checks:
     name: Code Checks
     uses: ./.github/workflows/code-checks.yml
+
   release:
     needs: code_checks
     name: Release Container
@@ -21,6 +22,12 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
 
+      - name: Download models artifact
+        uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
+        with:
+          name: ml-models
+          path: models/
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0
 

diff --git a/.gitignore b/.gitignore
@@ -30,3 +30,6 @@ sbom.json
 # Example data and results (download from releases)
 examples/anthropic_comparison/*.json
 examples/anthropic_comparison/*.png
+
+# Pre-downloaded ML models (downloaded by scripts/download_models.py)
+models/
diff --git a/Dockerfile b/Dockerfile
@@ -54,41 +54,15 @@ USER root
 RUN chown app:app /app/.venv/lib/python3.13/site-packages/sqlite_vec/vec0.so
 USER app
 
-# Pre-download fastembed models and tiktoken encodings stage
-FROM builder AS model-downloader
-
-# Switch to root to create cache directory, then switch back to app user
-USER root
-RUN mkdir -p /app/.cache/fastembed /app/.cache/tiktoken && chown -R app:app /app/.cache
-USER app
-
-# Set cache directory for fastembed models and tiktoken
-ENV FASTEMBED_CACHE_PATH=/app/.cache/fastembed
-ENV TIKTOKEN_CACHE_DIR=/app/.cache/tiktoken
-
-# Pre-download the embedding model by instantiating TextEmbedding
-RUN --mount=type=cache,target=/app/.cache/uv,uid=1000,gid=1000 \
-    /app/.venv/bin/python -c "\
-import os; \
-print(f'FASTEMBED_CACHE_PATH: {os.environ.get(\"FASTEMBED_CACHE_PATH\")}'); \
-from fastembed import TextEmbedding; \
-print('Downloading embedding model...'); \
-model = TextEmbedding(model_name='BAAI/bge-small-en-v1.5'); \
-print('Model downloaded successfully')"
-
-# Pre-download tiktoken encodings for offline use
-RUN /app/.venv/bin/python -c "\
-import tiktoken; \
-print('Downloading tiktoken encodings...'); \
-tiktoken.get_encoding('cl100k_base'); \
-print('Tiktoken encodings downloaded successfully')"
-
 FROM python:3.13-slim AS runner
 
 # Create non-root user (same as builder stage)
 RUN groupadd --gid 1000 app && \
     useradd --uid 1000 --gid app --shell /bin/bash --create-home app
 
+# Install system dependencies (jq for JSON query support)
+RUN apt-get update && apt-get install -y --no-install-recommends jq && rm -rf /var/lib/apt/lists/*
+
 # Create app directory and set ownership
 WORKDIR /app
 RUN chown app:app /app
@@ -97,9 +71,11 @@ RUN chown app:app /app
 COPY --from=builder --chown=app:app /app/.venv /app/.venv
 COPY --from=builder --chown=app:app /app/migrations /app/migrations
 
-# Copy pre-downloaded fastembed models and tiktoken encodings
-COPY --from=model-downloader --chown=app:app /app/.cache/fastembed /app/.cache/fastembed
-COPY --from=model-downloader --chown=app:app /app/.cache/tiktoken /app/.cache/tiktoken
+# Copy pre-downloaded models from build context
+# Models are architecture-independent (ONNX format) and downloaded by scripts/download_models.py
+COPY --chown=app:app models/fastembed /app/.cache/fastembed
+COPY --chown=app:app models/tiktoken /app/.cache/tiktoken
+COPY --chown=app:app models/llmlingua /app/.cache/llmlingua
 
 # Switch to non-root user
 USER app
@@ -109,6 +85,7 @@ ENV TOOLHIVE_HOST=host.docker.internal
 ENV RUNNING_IN_DOCKER=1
 ENV FASTEMBED_CACHE_PATH=/app/.cache/fastembed
 ENV TIKTOKEN_CACHE_DIR=/app/.cache/tiktoken
+ENV LLMLINGUA_MODEL_PATH=/app/.cache/llmlingua
 ENV COLORED_LOGS=false
 
 # Run the application

diff --git a/Taskfile.yml b/Taskfile.yml
@@ -74,23 +74,32 @@ tasks:
     deps:
       - install
 
+  download-models:
+    desc: Download ML models for offline/airgapped deployments
+    cmds:
+      - uv sync --group offline-models
+      - uv run python scripts/download_models.py
+
   build:
     desc: Build multi-architecture container image
     cmds:
       - docker buildx create --name mcp-optimizer-builder --use --bootstrap || docker buildx use mcp-optimizer-builder
       - docker buildx build --platform linux/amd64,linux/arm64 -t mcp-optimizer:latest --load .
-      - docker rm -f mcp-optimizer-container || true
+    deps:
+      - download-models
 
   build-local:
     desc: Build container image for local architecture only (faster for development)
     cmds:
       - docker build -t mcp-optimizer:latest .
-      - docker rm -f mcp-optimizer-container || true
+    deps:
+      - download-models
 
   run-container:
     desc: Run the application in a container (builds for local arch only)
     cmds:
       - task: build-local
+      - docker rm -f mcp-optimizer-container || true
       - docker run --network host --name mcp-optimizer-container mcp-optimizer:latest
 
   install-hooks:
@@ -113,8 +122,9 @@ tasks:
     cmds:
       - thv group create optim || true
       - thv rm mcp-optimizer || true
-      - docker build -t mcp-optimizer .
       - thv run mcp-optimizer:latest --transport streamable-http --group optim
+    deps:
+      - build-local
 
   offline-container-tests:
     desc: Run container offline mode tests

diff --git a/pyproject.toml b/pyproject.toml
@@ -43,6 +43,11 @@ examples = [
     "matplotlib>=3.10.8",
     "rich>=14.2.0",
 ]
+offline-models = [
+    "optimum[onnxruntime]>=2.1.0",
+    "fastembed>=0.7.4",
+    "tiktoken>=0.12.0",
+]
 
 [build-system]
 requires = ["hatchling"]