Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/backend/pii/detectors/onnx_model_detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"math"
"os"
"runtime"
"sort"
"strings"

Expand Down Expand Up @@ -59,10 +60,15 @@ func NewONNXModelDetectorSimple(modelPath string, tokenizerPath string) (*ONNXMo

if onnxLibPath == "" {
onnxPaths := []string{
// macOS paths (.dylib)
"./libonnxruntime.1.24.2.dylib", // CWD (legacy)
"./resources/libonnxruntime.1.24.2.dylib", // Production DMG: CWD is Contents/Resources
"./build/libonnxruntime.1.24.2.dylib", // Development: in build directory
"../libonnxruntime.1.24.2.dylib", // Alternative location
// Linux paths (.so)
"./lib/libonnxruntime.so.1.24.2", // Linux release tarball layout
"./build/libonnxruntime.so.1.24.2", // Development: in build directory
"./libonnxruntime.so.1.24.2", // CWD
}

for _, p := range onnxPaths {
Expand All @@ -77,7 +83,11 @@ func NewONNXModelDetectorSimple(modelPath string, tokenizerPath string) (*ONNXMo
onnxruntime.SetSharedLibraryPath(onnxLibPath)
} else {
// Fall back to default path, might work if library is in system path
onnxruntime.SetSharedLibraryPath("./build/libonnxruntime.1.24.2.dylib")
if runtime.GOOS == "linux" {
onnxruntime.SetSharedLibraryPath("./lib/libonnxruntime.so.1.24.2")
} else {
onnxruntime.SetSharedLibraryPath("./build/libonnxruntime.1.24.2.dylib")
}
}

// Initialize ONNX Runtime environment only if not already initialized
Expand Down
49 changes: 49 additions & 0 deletions src/scripts/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
FROM --platform=linux/amd64 ubuntu:24.04

ARG KIJI_VERSION=""

RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
curl \
jq \
&& rm -rf /var/lib/apt/lists/*

# Download and install the latest (or specified) Linux release from GitHub
RUN set -eux; \
REPO="dataiku/kiji-proxy"; \
if [ -n "$KIJI_VERSION" ]; then \
TAG="v${KIJI_VERSION}"; \
else \
TAG=$(curl -fsSL "https://api.github.com/repos/${REPO}/releases/latest" | jq -r '.tag_name'); \
KIJI_VERSION="${TAG#v}"; \
fi; \
ARCHIVE="kiji-privacy-proxy-${KIJI_VERSION}-linux-amd64.tar.gz"; \
URL="https://github.com/${REPO}/releases/download/${TAG}/${ARCHIVE}"; \
echo "Downloading ${URL}"; \
curl -fsSL -o "/tmp/${ARCHIVE}" "${URL}"; \
# Verify checksum
curl -fsSL -o "/tmp/${ARCHIVE}.sha256" "${URL}.sha256"; \
cd /tmp && sha256sum -c "${ARCHIVE}.sha256"; \
# Extract to /opt
mkdir -p /opt/kiji-proxy; \
tar -xzf "/tmp/${ARCHIVE}" -C /opt/kiji-proxy --strip-components=1; \
rm -f "/tmp/${ARCHIVE}" "/tmp/${ARCHIVE}.sha256"; \
chmod +x /opt/kiji-proxy/bin/kiji-proxy /opt/kiji-proxy/run.sh

ENV LD_LIBRARY_PATH="/opt/kiji-proxy/lib"
ENV ONNXRUNTIME_SHARED_LIBRARY_PATH="/opt/kiji-proxy/lib/libonnxruntime.so.1.24.2"

EXPOSE 8080

HEALTHCHECK --interval=5s --timeout=3s --start-period=10s --retries=3 \
CMD curl -f http://localhost:8080/health || exit 1

# Copy and run the test script
COPY src/scripts/docker/docker-test-api.sh /opt/kiji-proxy/docker-test-api.sh
RUN chmod +x /opt/kiji-proxy/docker-test-api.sh

WORKDIR /opt/kiji-proxy

# Default command runs the proxy; override with the test script path to run tests
CMD ["/opt/kiji-proxy/bin/kiji-proxy"]
55 changes: 55 additions & 0 deletions src/scripts/docker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Kiji Privacy Proxy - Docker

Run the Kiji Privacy Proxy Linux release in a Docker container (Ubuntu 24.04, amd64).

## Build

From the repository root:

```bash
# Latest release
docker build -f src/scripts/docker/Dockerfile -t kiji-proxy .

# Specific version
docker build -f src/scripts/docker/Dockerfile --build-arg KIJI_VERSION=0.4.9 -t kiji-proxy .
```

## Run

```bash
# Start the proxy server (API on port 8080)
docker run -p 8080:8080 kiji-proxy

# Verify it's running
curl http://localhost:8080/health
curl http://localhost:8080/version
```

## API Smoke Tests

Run the included test script to validate all API endpoints:

```bash
docker run --rm kiji-proxy /opt/kiji-proxy/docker-test-api.sh
```

This tests `/health`, `/version`, `/stats`, `/mappings`, `/api/model/info`,
`/api/pii/check` (with PII detection verification), and `/api/pii/confidence`.

## Configuration

Pass environment variables to configure the proxy:

```bash
docker run -p 8080:8080 \
-e OPENAI_API_KEY=sk-... \
-e ANTHROPIC_API_KEY=sk-ant-... \
kiji-proxy
```

See the [Getting Started Guide](../../../docs/01-getting-started.md) for all configuration options.

## Apple Silicon

The container runs as `linux/amd64`. On Apple Silicon Macs, enable Rosetta in
Docker Desktop (Settings > General > "Use Rosetta for x86_64/amd64 emulation").
140 changes: 140 additions & 0 deletions src/scripts/docker/docker-test-api.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/bin/bash
#
# Test script for Kiji Privacy Proxy API endpoints.
# Starts the server in the background, waits for it to be ready,
# runs smoke tests against core endpoints, then exits.
#
set -euo pipefail

PASS=0
FAIL=0
BASE_URL="http://localhost:8080"

pass() { echo " ✓ $1"; PASS=$((PASS + 1)); }
fail() { echo " ✗ $1"; FAIL=$((FAIL + 1)); }

check_status() {
local name="$1" url="$2" expected_status="${3:-200}"
local status
status=$(curl -s -o /dev/null -w "%{http_code}" "$url") || true
if [ "$status" = "$expected_status" ]; then
pass "$name (HTTP $status)"
else
fail "$name — expected $expected_status, got $status"
fi
}

check_json_field() {
local name="$1" url="$2" field="$3"
local body
body=$(curl -sf "$url") || { fail "$name — request failed"; return; }
if echo "$body" | jq -e "$field" > /dev/null 2>&1; then
pass "$name (field $field present)"
else
fail "$name — field $field missing in response"
fi
}

echo "Starting kiji-proxy in background..."
/opt/kiji-proxy/bin/kiji-proxy &
SERVER_PID=$!

# Wait for server to become ready
echo "Waiting for server to be ready..."
for i in $(seq 1 30); do
if curl -sf "$BASE_URL/health" > /dev/null 2>&1; then
echo "Server ready after ${i}s"
break
fi
if ! kill -0 "$SERVER_PID" 2>/dev/null; then
echo "Server process exited unexpectedly"
exit 1
fi
sleep 1
done

if ! curl -sf "$BASE_URL/health" > /dev/null 2>&1; then
echo "Server failed to start within 30s"
kill "$SERVER_PID" 2>/dev/null || true
exit 1
fi

echo ""
echo "=== API Smoke Tests ==="
echo ""

# Health endpoint
echo "Health & Info:"
check_status "GET /health" "$BASE_URL/health"
check_status "GET /version" "$BASE_URL/version"
check_json_field "GET /version has version" "$BASE_URL/version" ".version"

# Stats & mappings
echo ""
echo "Management:"
check_status "GET /stats" "$BASE_URL/stats"
# /mappings only supports DELETE (clears PII mappings)
DEL_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X DELETE "$BASE_URL/mappings") || true
if [ "$DEL_STATUS" = "200" ]; then
pass "DELETE /mappings (HTTP $DEL_STATUS)"
else
fail "DELETE /mappings — expected 200, got $DEL_STATUS"
fi

# Model info
echo ""
echo "Model:"
check_status "GET /api/model/info" "$BASE_URL/api/model/info"

# PII check endpoint
echo ""
echo "PII Detection:"
PII_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
-X POST "$BASE_URL/api/pii/check" \
-H "Content-Type: application/json" \
-d '{"message": "My name is John Smith and my email is john@example.com"}') || true
if [ "$PII_STATUS" = "200" ]; then
pass "POST /api/pii/check (HTTP $PII_STATUS)"

# Verify PII was found
PII_BODY=$(curl -sf -X POST "$BASE_URL/api/pii/check" \
-H "Content-Type: application/json" \
-d '{"message": "My name is John Smith and my email is john@example.com"}') || true
if echo "$PII_BODY" | jq -e '.pii_found == true' > /dev/null 2>&1; then
ENTITY_COUNT=$(echo "$PII_BODY" | jq '.entities | keys | length')
pass "PII detection found $ENTITY_COUNT masked entities"
else
fail "PII detection did not find PII (pii_found != true)"
fi
else
fail "POST /api/pii/check — expected 200, got $PII_STATUS"
fi

# PII confidence endpoint (GET returns current threshold)
check_status "GET /api/pii/confidence" "$BASE_URL/api/pii/confidence"
check_json_field "GET /api/pii/confidence has threshold" "$BASE_URL/api/pii/confidence" ".confidence"

# Set confidence threshold via POST
CONF_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
-X POST "$BASE_URL/api/pii/confidence" \
-H "Content-Type: application/json" \
-d '{"confidence": 0.5}') || true
if [ "$CONF_STATUS" = "200" ]; then
pass "POST /api/pii/confidence (HTTP $CONF_STATUS)"
else
fail "POST /api/pii/confidence — expected 200, got $CONF_STATUS"
fi

# Summary
echo ""
echo "=== Results: $PASS passed, $FAIL failed ==="
echo ""

# Cleanup
kill "$SERVER_PID" 2>/dev/null || true
wait "$SERVER_PID" 2>/dev/null || true

if [ "$FAIL" -gt 0 ]; then
exit 1
fi
echo "All tests passed."
Loading