NillionNetwork · jcabrero · Mar 26, 2025 · Mar 5, 2025 · Mar 10, 2025 · Mar 10, 2025
diff --git a/.env.ci b/.env.ci
@@ -0,0 +1,29 @@
+# Copy this file to nilai/.env and set the values accordingly
+# This file is used to set the environment variables for the project
+# You shouldn't commit this file to the repository
+# (!) You should replace the values with your own and not share them with anyone
+
+# Hugging Face API Token
+HF_TOKEN="Hugging Face API Token"
+
+ENVIRONMENT = "mainnet"
+NILAI_GUNICORN_WORKERS = 10
+
+
+# Postgres Docker Compose Config
+POSTGRES_HOST = "postgres"
+POSTGRES_USER = "user"
+POSTGRES_PASSWORD = "password"
+POSTGRES_DB = "mydb"
+POSTGRES_PORT = 5432
+
+# Redis Docker Compose Config
+REDIS_URL = "redis://redis:6379"
+
+# Etcd Docker Compose Config
+ETCD_HOST = "etcd"
+ETCD_PORT = 2379
+
+# Grafana Docker Compose Config
+GF_SECURITY_ADMIN_USER = "admin"
+GF_SECURITY_ADMIN_PASSWORD = "password"
diff --git a/.env.sample b/.env.sample
@@ -7,6 +7,7 @@
 HF_TOKEN="Hugging Face API Token"
 
 ENVIRONMENT = "testnet"
+NILAI_GUNICORN_WORKERS = 50
 
 # Postgres Docker Compose Config
 POSTGRES_HOST = "postgres"
@@ -16,7 +17,7 @@ POSTGRES_DB = "mydb"
 POSTGRES_PORT = 5432
 
 # Redis Docker Compose Config
-REDIS_URL = "redis://localhost:6379"
+REDIS_URL = "redis://redis:6379"
 
 # Etcd Docker Compose Config
 ETCD_HOST = "etcd"

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -9,7 +9,7 @@ on:
 jobs:
   test:
     runs-on: ubuntu-latest
-    
+
     steps:
       - uses: actions/checkout@v4
 
@@ -29,15 +29,127 @@ jobs:
       - name: Install dependencies
         run: |
           uv sync
-          
+
       - name: Run Ruff format check
         run: uv run ruff format --check
 
       - name: Run Ruff linting
         run: uv run ruff check --exclude packages/verifier/
-      
+
       - name: Run tests
-        run: uv run pytest -v
-        
+        run: uv run pytest -v tests/unit
+
       - name: pyright
-        run: uv run pyright
+        run: uv run pyright
+
+  start-runner:
+    name: Start self-hosted EC2 runner
+    runs-on: ubuntu-24.04
+    outputs:
+      label: ${{ steps.start-ec2-runner.outputs.label }}
+      ec2-instances-ids: ${{ steps.start-ec2-runner.outputs.ec2-instances-ids }}
+    steps:
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4.0.2
+        with:
+          aws-access-key-id: ${{ secrets.GH_AWS_ACCESS_KEY }}
+          aws-secret-access-key: ${{ secrets.GH_AWS_SECRET_KEY }}
+          aws-region: "eu-west-1"
+      - name: Start EC2 runner
+        id: start-ec2-runner
+        uses: NillionNetwork/ec2-github-runner@v2.2
+        with:
+          mode: start
+          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+          runners-per-machine: 1
+          number-of-machines: 1
+          ec2-image-id: ami-0ac221d824dd88706
+          ec2-instance-type: g4dn.xlarge
+          subnet-id: subnet-0ec4c353621eabae2
+          security-group-id: sg-03ee5c56e1f467aa0
+          key-name: production-github-runner-key
+          iam-role-name: github-runners-production-github-runner-ec2
+          aws-resource-tags: >
+            [
+              {"Key": "Name", "Value": "github-runner-${{ github.run_id }}-${{ github.run_number }}"},
+              {"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
+              {"Key": "KeyName", "Value": "github-runners-key"},
+              {"Key": "Deployment", "Value": "github-runners"},
+              {"Key": "Type", "Value": "GithubRunner"},
+              {"Key": "User", "Value": "ec2-user"},
+              {"Key": "Environment", "Value": "production"}
+            ]
+
+  e2e-tests:
+    name: E2E Tests
+    needs: start-runner
+    runs-on: ${{ needs.start-runner.outputs.label }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - uses: astral-sh/setup-uv@v4
+        with:
+          enable-cache: true
+          cache-dependency-glob: "**/pyproject.toml"
+
+      - name: Install dependencies
+        run: uv sync
+
+      - name: Build vllm
+        run: docker build -t nillion/nilai-vllm:latest -f docker/vllm.Dockerfile .
+
+      - name: Build nilal API
+        run: docker build -t nillion/nilai-api:latest -f docker/api.Dockerfile --target nilai .
+
+      - name: Create .env
+        run: |
+         cp .env.ci .env
+         # Copy secret into .env replacing the existing HF_TOKEN
+         sed -i 's/HF_TOKEN=.*/HF_TOKEN=${{ secrets.HF_TOKEN }}/' .env
+
+      - name: Start Services
+        run: |
+          docker-compose -f docker-compose.yml \
+          -f docker-compose.dev.yml \
+          -f docker/compose/docker-compose.llama-1b-gpu.ci.yml \
+          up -d
+          docker ps -a
+
+      - name: Wait for services to be healthy
+        run: bash scripts/wait_for_ci_services.sh
+
+      - name: Run E2E tests
+        run: |
+          set -e
+          export AUTH_TOKEN=$(docker exec nilai-api uv run src/nilai_api/commands/add_user.py --name test1 --email test1@test.com --ratelimit-minute 1000 --ratelimit-hour 1000 --ratelimit-day 1000 | jq ".apikey" -r)
+          export ENVIRONMENT=ci
+          uv run pytest -v tests/e2e
+
+      - name: Stop Services
+        run: |
+          docker-compose -f docker-compose.yml \
+          -f docker-compose.dev.yml \
+          -f docker/compose/docker-compose.llama-1b-gpu.ci.yml \
+          down -v
+
+  stop-runner:
+    name: Stop self-hosted EC2 runner
+    needs: [ start-runner, e2e-tests ]
+    runs-on: ubuntu-24.04
+    if: ${{ always() }}
+    steps:
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v1
+        with:
+          aws-access-key-id: ${{ secrets.GH_AWS_ACCESS_KEY }}
+          aws-secret-access-key: ${{ secrets.GH_AWS_SECRET_KEY }}
+          aws-region: "eu-west-1"
+
+      - name: Stop EC2 runner
+        uses: NillionNetwork/ec2-github-runner@v2.2
+        with:
+          mode: stop
+          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+          label: ${{ needs.start-runner.outputs.label }}
+          ec2-instances-ids: ${{ needs.start-runner.outputs.ec2-instances-ids }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -15,12 +15,21 @@ repos:
       - id: ruff  # Runs Ruff's linter
         exclude: "packages/verifier/"  # Excludes the "packages/verifier/" directory from linting
 
-  # Second repository: Local hooks (custom pre-commit checks)
+  # Second repository: Pyright for static type checking
+  - repo: local
+    hooks:
+      - id: pyright
+        name: pyright
+        entry: pyright
+        language: system
+        types: [python]
+  # Third repository: Local hooks (custom pre-commit checks)
   - repo: local
     hooks:
       - id: pytest  # Defines a local hook for running pytest
         name: pytest  # A human-readable name for the hook
         entry: pytest  # The command to execute
         language: system  # Uses the system-installed pytest instead of a virtual environment
         types: [python]  # Applies only to Python files
-        pass_filenames: false  # Prevents passing filenames, so pytest runs on all tests
+        pass_filenames: false  # Run pytest on the entire test suite rather than just changed files
+        args: ["-vvv", "tests/unit"]  # Runs pytest with verbose output and only unit tests
diff --git a/README.md b/README.md
@@ -21,6 +21,14 @@ nilAI is a platform designed to run on Confidential VMs with Trusted Execution E
 ### 1. Docker Compose Deployment (Recommended)
 
 #### Development Environment
+```shell
+# Build vLLM docker container
+docker build -t nillion/nilai-vllm:latest -f docker/vllm.Dockerfile .
+# Build nilai_api container
+docker build -t nillion/nilai-api:latest -f docker/api.Dockerfile --target nilai .
+```
+Then, to deploy:
+
 ```shell
 docker compose -f docker-compose.yml \
   -f docker-compose.dev.yml \
@@ -33,17 +41,37 @@ docker compose -f docker-compose.yml \
 
 #### Production Environment
 ```shell
+# Build vLLM docker container
+docker build -t nillion/nilai-vllm:latest -f docker/vllm.Dockerfile .
+# Build nilai_api container
+docker build -t nillion/nilai-api:latest -f docker/api.Dockerfile --target nilai .
+```
+To deploy:
+```shell
 docker compose -f docker-compose.yml \
-  -f docker-compose.prod.yml \
-  -f docker/compose/docker-compose.llama-3b-gpu.yml \
-  -f docker/compose/docker-compose.llama-8b-gpu.yml \
-  -f docker/compose/docker-compose.dolphin-8b-gpu.yml \
-  -f docker/compose/docker-compose.deepseek-14b-gpu.yml \
-  up -d --build
+-f docker-compose.prod.yml \
+-f docker/compose/docker-compose.llama-3b-gpu.yml \
+-f docker/compose/docker-compose.llama-8b-gpu.yml \
+up -d
 ```
-
 **Note**: Remove lines for models you do not wish to deploy.
 
+For testing environment:
+
+```shell
+# Build vLLM docker container
+docker build -t nillion/nilai-vllm:latest -f docker/vllm.Dockerfile .
+# Build nilai_api container
+docker build -t nillion/nilai-api:latest -f docker/api.Dockerfile --target nilai .
+```
+To deploy:
+```shell
+docker compose -f docker-compose.yml \
+-f docker-compose.dev.yml \
+-f docker/compose/docker-compose.llama-1b-gpu.yml \
+up -d
+```
+
 ### 2. Manual Deployment
 
 #### Components
@@ -77,7 +105,7 @@ docker compose -f docker-compose.yml \
 2. **Run API Server**
    ```shell
    # Development Environment
-   uv run fastapi dev nilai-api/src/nilai_api/__main__.py --port 8080
+    fastapi dev nilai-api/src/nilai_api/__main__.py --port 8080
 
    # Production Environment
    uv run fastapi run nilai-api/src/nilai_api/__main__.py --port 8080

diff --git a/caddy/Caddyfile b/caddy/Caddyfile
@@ -4,7 +4,7 @@
 	}
  }
 
- https://nilai.sandbox.nilogy.xyz {
+ https://test.nilai.sandbox.nilogy.xyz {
 	import ssl_config
 	reverse_proxy api:8443
  }
diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
@@ -1,8 +1,12 @@
 services:
   api:
     privileged: true
+    ports:
+      - "8080:8080"
+      - "8443:8443"
     volumes:
       - /dev/sev-guest:/dev/sev-guest # for AMD SEV
+      - ./nilai-api/:/app/nilai-api/
     networks:
       - proxy_net
   redis:

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -104,10 +104,7 @@ services:
 
   api:
     container_name: nilai-api
-    build:
-      context: .
-      dockerfile: docker/api.Dockerfile
-      target: nilai
+    image: nillion/nilai-api:latest
     depends_on:
       etcd:
         condition: service_healthy

diff --git a/docker/compose/docker-compose.deepseek-14b-gpu.yml b/docker/compose/docker-compose.deepseek-14b-gpu.yml
@@ -1,8 +1,6 @@
 services:
   deepseek_14b_gpu:
-    build:
-      context: .
-      dockerfile: docker/vllm.Dockerfile
+    image: nillion/nilai-vllm:latest
     deploy:
       resources:
         reservations:

diff --git a/docker/compose/docker-compose.dolphin-8b-gpu.yml b/docker/compose/docker-compose.dolphin-8b-gpu.yml
@@ -1,8 +1,6 @@
 services:
   dolphin_8b_gpu:
-    build:
-      context: .
-      dockerfile: docker/vllm.Dockerfile
+    image: nillion/nilai-vllm:latest
     deploy:
       resources:
         reservations:

diff --git a/docker/compose/docker-compose.llama-1b-gpu.ci.yml b/docker/compose/docker-compose.llama-1b-gpu.ci.yml
@@ -0,0 +1,50 @@
+services:
+  llama_1b_gpu:
+    image: nillion/nilai-vllm:latest
+    container_name: nilai-llama_1b_gpu
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    ipc: host
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    env_file:
+      - .env
+    restart: unless-stopped
+    depends_on:
+      etcd:
+        condition: service_healthy
+    command: >
+      --model meta-llama/Llama-3.2-1B-Instruct
+      --tensor-parallel-size 1
+      --enable-auto-tool-choice
+      --tool-call-parser llama3_json
+      --uvicorn-log-level warning
+      --dtype half
+    environment:
+      - SVC_HOST=llama_1b_gpu
+      - SVC_PORT=8000
+      - ETCD_HOST=etcd
+      - ETCD_PORT=2379
+      - TOOL_SUPPORT=true
+      - CUDA_LAUNCH_BLOCKING=1
+    volumes:
+      - hugging_face_models:/root/.cache/huggingface  # cache models
+    networks:
+      - backend_net
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
+      interval: 30s
+      retries: 3
+      start_period: 60s
+      timeout: 10s
+volumes:
+  hugging_face_models:
+
+networks:
+  backend_net: