StacklokLabs · aponcedeleonch · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025
diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml
@@ -19,3 +19,7 @@ jobs:
     name: Integration Tests
     uses: ./.github/workflows/integration-tests.yml
     needs: image_build
+  offline_tests:
+    name: Offline Mode Tests
+    uses: ./.github/workflows/offline-tests.yml
+    needs: image_build
diff --git a/.github/workflows/offline-tests.yml b/.github/workflows/offline-tests.yml
@@ -0,0 +1,34 @@
+name: Offline Tests
+
+on:
+  workflow_call:
+
+permissions:
+  contents: read
+
+jobs:
+  offline-tests:
+    name: Offline Mode Tests
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
+
+      - name: Build mcp-optimizer Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
+        with:
+          context: .
+          platforms: linux/amd64
+          push: false
+          load: true
+          cache-from: type=gha
+          tags: mcp-optimizer:latest
+
+      - name: Run offline tests
+        run: ./scripts/test-offline.sh
+        env:
+          SKIP_BUILD: "1"
diff --git a/Dockerfile b/Dockerfile
@@ -54,25 +54,35 @@ USER root
 RUN chown app:app /app/.venv/lib/python3.13/site-packages/sqlite_vec/vec0.so
 USER app
 
-# Pre-download fastembed models stage
+# Pre-download fastembed models and tiktoken encodings stage
 FROM builder AS model-downloader
 
-# Set cache directory for fastembed models
-ENV FASTEMBED_CACHE_PATH=/home/app/.cache/fastembed
-
 # Switch to root to create cache directory, then switch back to app user
 USER root
-RUN mkdir -p /home/app/.cache/fastembed && chown -R app:app /home/app/.cache
+RUN mkdir -p /app/.cache/fastembed /app/.cache/tiktoken && chown -R app:app /app/.cache
 USER app
 
+# Set cache directory for fastembed models and tiktoken
+ENV FASTEMBED_CACHE_PATH=/app/.cache/fastembed
+ENV TIKTOKEN_CACHE_DIR=/app/.cache/tiktoken
+
 # Pre-download the embedding model by instantiating TextEmbedding
-RUN --mount=type=cache,target=/home/app/.cache/uv,uid=1000,gid=1000 \
+RUN --mount=type=cache,target=/app/.cache/uv,uid=1000,gid=1000 \
     /app/.venv/bin/python -c "\
+import os; \
+print(f'FASTEMBED_CACHE_PATH: {os.environ.get(\"FASTEMBED_CACHE_PATH\")}'); \
 from fastembed import TextEmbedding; \
 print('Downloading embedding model...'); \
 model = TextEmbedding(model_name='BAAI/bge-small-en-v1.5'); \
 print('Model downloaded successfully')"
 
+# Pre-download tiktoken encodings for offline use
+RUN /app/.venv/bin/python -c "\
+import tiktoken; \
+print('Downloading tiktoken encodings...'); \
+tiktoken.get_encoding('cl100k_base'); \
+print('Tiktoken encodings downloaded successfully')"
+
 FROM python:3.13-slim AS runner
 
 # Create non-root user (same as builder stage)
@@ -87,15 +97,17 @@ RUN chown app:app /app
 COPY --from=builder --chown=app:app /app/.venv /app/.venv
 COPY --from=builder --chown=app:app /app/migrations /app/migrations
 
-# Copy pre-downloaded fastembed models
-COPY --from=model-downloader --chown=app:app /home/app/.cache/fastembed /home/app/.cache/fastembed
+# Copy pre-downloaded fastembed models and tiktoken encodings
+COPY --from=model-downloader --chown=app:app /app/.cache/fastembed /app/.cache/fastembed
+COPY --from=model-downloader --chown=app:app /app/.cache/tiktoken /app/.cache/tiktoken
 
 # Switch to non-root user
 USER app
 
 # Set default environment variables for container deployment
 ENV TOOLHIVE_HOST=host.docker.internal
-ENV FASTEMBED_CACHE_PATH=/home/app/.cache/fastembed
+ENV FASTEMBED_CACHE_PATH=/app/.cache/fastembed
+ENV TIKTOKEN_CACHE_DIR=/app/.cache/tiktoken
 ENV COLORED_LOGS=false
 
 # Run the application

diff --git a/Taskfile.yml b/Taskfile.yml
@@ -115,3 +115,8 @@ tasks:
       - thv rm mcp-optimizer || true
       - docker build -t mcp-optimizer .
       - thv run mcp-optimizer:latest --transport streamable-http --group optim
+
+  offline-container-tests:
+    desc: Run container offline mode tests
+    cmds:
+      - ./scripts/test-offline.sh
diff --git a/scripts/test-offline.sh b/scripts/test-offline.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# Test script for offline Docker container functionality
+# This simulates a completely airgapped/offline environment
+
+set -e
+
+# Build image only if SKIP_BUILD is not set (useful for CI where image is already built)
+if [ -z "${SKIP_BUILD}" ]; then
+  echo "🔧 Building Docker image..."
+  docker build -t mcp-optimizer:latest .
+else
+  echo "⏭️  Skipping Docker build (SKIP_BUILD is set)"
+fi
+
+echo ""
+echo "🔌 Testing offline mode (no network access)..."
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+docker run --rm --network none mcp-optimizer:latest /app/.venv/bin/python -c "
+import os
+print('Environment variables:')
+fastembed_cache_path = os.environ.get('FASTEMBED_CACHE_PATH')
+print(f'  FASTEMBED_CACHE_PATH: {fastembed_cache_path}')
+print(f'  TIKTOKEN_CACHE_DIR: {os.environ.get(\"TIKTOKEN_CACHE_DIR\")}')
+print()
+print('Testing embeddings...')
+from mcp_optimizer.embeddings import EmbeddingManager
+manager = EmbeddingManager(model_name='BAAI/bge-small-en-v1.5', enable_cache=True, threads=2, fastembed_cache_path=fastembed_cache_path)
+embedding = manager.generate_embedding(['test offline mode'])
+print(f'  ✓ Fastembed works! Embedding shape: {embedding.shape}')
+print()
+print('Testing tiktoken...')
+import tiktoken
+enc = tiktoken.get_encoding('cl100k_base')
+tokens = enc.encode('test tiktoken offline')
+print(f'  ✓ Tiktoken works! Encoded {len(tokens)} tokens')
+"
+
+echo ""
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "✅ All offline tests passed!"
diff --git a/src/mcp_optimizer/cli.py b/src/mcp_optimizer/cli.py
@@ -224,6 +224,7 @@ def main(**kwargs: Any) -> None:
             model_name=config.embedding_model_name,
             enable_cache=config.enable_embedding_cache,
             threads=config.embedding_threads,
+            fastembed_cache_path=config.fastembed_cache_path,
         )
         ingestion_service = IngestionService(
             db_config,

diff --git a/src/mcp_optimizer/config.py b/src/mcp_optimizer/config.py
@@ -157,7 +157,7 @@ def normalize_runtime_mode(cls, v) -> str:
         ge=1,
         le=16,
         description="Number of threads for embedding generation (1-16). "
-        "Lower values reduce CPU usage. Set to None to use all CPU cores. "
+        "Lower values reduce CPU usage. Set to None to use all CPU cores. ",
     )
 
     # Token counting configuration
@@ -246,6 +246,12 @@ def normalize_runtime_mode(cls, v) -> str:
             "When disabled, only find_tool, call_tool, and list_tools are available."
         ),
     )
+    fastembed_cache_path: str | None = Field(
+        default=None, description="Path to FastEmbed cache directory"
+    )
+    tiktoken_cache_dir: str | None = Field(
+        default=None, description="Path to Tiktoken cache directory"
+    )
 
     @field_validator("skipped_workloads", mode="before")
     @classmethod
@@ -496,6 +502,8 @@ def _populate_config_from_env() -> dict[str, Any]:
         "K8S_NAMESPACE": "k8s_namespace",
         "K8S_ALL_NAMESPACES": "k8s_all_namespaces",
         "ENABLE_DYNAMIC_INSTALL": "enable_dynamic_install",
+        "FASTEMBED_CACHE_PATH": "fastembed_cache_path",
+        "TIKTOKEN_CACHE_DIR": "tiktoken_cache_dir",
     }
 
     for env_var, field_name in env_mappings.items():

diff --git a/src/mcp_optimizer/embeddings.py b/src/mcp_optimizer/embeddings.py
@@ -34,7 +34,13 @@ class EmbeddingManager:
     See database migration file for the configured dimension in vector tables.
     """
 
-    def __init__(self, model_name: str, enable_cache: bool, threads: int | None = None):
+    def __init__(
+        self,
+        model_name: str,
+        enable_cache: bool,
+        threads: int | None,
+        fastembed_cache_path: str | None,
+    ) -> None:
         """Initialize with specified embedding model.
 
         Args:
@@ -46,17 +52,26 @@ def __init__(self, model_name: str, enable_cache: bool, threads: int | None = No
             threads: Number of threads to use for embedding generation.
                     None = use all available CPU cores (default FastEmbed behavior).
                     Set to 1-4 to limit CPU usage in production.
+            fastembed_cache_path: Optional path to FastEmbed model cache directory.
         """
         self.model_name = model_name
         self._model: TextEmbedding | None = None
         self.enable_cache = enable_cache
         self.threads = threads
+        self.fastembed_cache_path = fastembed_cache_path
 
     @property
     def model(self) -> TextEmbedding:
         """Lazy load the embedding model."""
         if self._model is None:
-            self._model = TextEmbedding(model_name=self.model_name, threads=self.threads)
+            # Enable local_files_only when cache_dir is set for offline/airgapped deployments
+            local_files_only = self.fastembed_cache_path is not None
+            self._model = TextEmbedding(
+                model_name=self.model_name,
+                threads=self.threads,
+                cache_dir=self.fastembed_cache_path,
+                local_files_only=local_files_only,
+            )
         return self._model
 
     def _generate_single_cached_embedding(self, text: str) -> np.ndarray:

diff --git a/src/mcp_optimizer/polling_manager.py b/src/mcp_optimizer/polling_manager.py
@@ -160,6 +160,7 @@ def configure_polling(toolhive_client: ToolhiveClient, config: MCPOptimizerConfi
         model_name=config.embedding_model_name,
         enable_cache=config.enable_embedding_cache,
         threads=config.embedding_threads,
+        fastembed_cache_path=config.fastembed_cache_path,
     )
 
     _polling_state.polling_manager = PollingManager(

diff --git a/src/mcp_optimizer/server.py b/src/mcp_optimizer/server.py
@@ -149,6 +149,7 @@ def initialize_server_components(config: MCPOptimizerConfig) -> None:
         model_name=config.embedding_model_name,
         enable_cache=config.enable_embedding_cache,
         threads=config.embedding_threads,
+        fastembed_cache_path=config.fastembed_cache_path,
     )
     mcp.settings.port = config.mcp_port
     toolhive_client = ToolhiveClient(