Skip to content

Commit 0e60a9c

Browse files
authored
Improve image tests (#315)
* Improve Image Tests
1 parent dd9a1e5 commit 0e60a9c

File tree

8 files changed

+117
-67
lines changed

8 files changed

+117
-67
lines changed

.github/workflows/image_smoke.yml

Lines changed: 86 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ jobs:
6464
context: .
6565
file: ${{ matrix.build.dockerfile }}
6666
tags: ${{ matrix.build.name }}:${{ github.sha }}
67-
load: ${{ matrix.build.name == 'aio' }}
67+
load: true # Load all images for smoke testing
6868
push: false
6969
cache-from: type=local,src=/tmp/.buildx-cache
7070
cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max
@@ -97,36 +97,92 @@ jobs:
9797
sarif_file: trivy-results-aio.sarif
9898
category: trivy-aio
9999

100-
# Smoke test - only AIO (contains both server and client)
101-
- name: Smoke Test - Verify Container Starts
100+
# Smoke test - Server container
101+
- name: Smoke Test - Server Container
102+
if: matrix.build.name == 'server'
103+
run: |
104+
echo "Testing Server container startup..."
105+
106+
# Start server container in background
107+
docker run -d --name test-server \
108+
-e API_SERVER_KEY=test-key-for-ci \
109+
server:${{ github.sha }}
110+
111+
# Wait and verify container is still running (max 30 seconds)
112+
echo "Waiting for container to stabilize..."
113+
sleep 10
114+
115+
if docker ps --filter "name=test-server" --filter "status=running" | grep -q test-server; then
116+
echo "✅ Server container started and is running"
117+
docker stop test-server
118+
exit 0
119+
else
120+
echo "❌ Server container failed to start or crashed"
121+
docker logs test-server 2>&1
122+
exit 1
123+
fi
124+
125+
# Smoke test - Client container
126+
- name: Smoke Test - Client Container
127+
if: matrix.build.name == 'client'
128+
run: |
129+
echo "Testing Client container startup..."
130+
131+
# Start client container in background
132+
# Client requires API_SERVER_* env vars to be set
133+
docker run -d --name test-client \
134+
-e API_SERVER_KEY=test-key-for-ci \
135+
-e API_SERVER_URL=http://localhost \
136+
-e API_SERVER_PORT=8000 \
137+
client:${{ github.sha }}
138+
139+
# Wait and verify container is still running
140+
echo "Waiting for container to stabilize..."
141+
sleep 10
142+
143+
if docker ps --filter "name=test-client" --filter "status=running" | grep -q test-client; then
144+
echo "✅ Client container started and is running"
145+
docker stop test-client
146+
exit 0
147+
else
148+
echo "❌ Client container failed to start or crashed"
149+
docker logs test-client 2>&1
150+
exit 1
151+
fi
152+
153+
# Smoke test - AIO container (both server and client)
154+
- name: Smoke Test - AIO Container
102155
if: matrix.build.name == 'aio'
103156
run: |
104-
echo "Testing AIO image startup..."
157+
echo "Testing AIO container startup..."
105158
106159
# Start container in background
107160
docker run -d --name test-aio \
108161
-e API_SERVER_KEY=test-key-for-ci \
109162
aio:${{ github.sha }}
110163
111-
# Wait for startup (max 30 seconds)
112-
for i in {1..30}; do
113-
if docker logs test-aio 2>&1 | grep -qE "Application startup complete|Uvicorn running|Starting server|Streamlit"; then
114-
echo "✅ Container started successfully"
115-
docker logs test-aio | tail -20
116-
docker stop test-aio
117-
exit 0
118-
fi
119-
sleep 1
120-
done
121-
122-
echo "❌ Container failed to start within 30 seconds"
123-
docker logs test-aio
124-
docker stop test-aio || true
125-
exit 1
126-
127-
- name: Cleanup Test Container
128-
if: always() && matrix.build.name == 'aio'
164+
# Wait and verify container is still running
165+
echo "Waiting for container to stabilize..."
166+
sleep 10
167+
168+
if docker ps --filter "name=test-aio" --filter "status=running" | grep -q test-aio; then
169+
echo "✅ AIO container started and is running"
170+
docker stop test-aio
171+
exit 0
172+
else
173+
echo "❌ AIO container failed to start or crashed"
174+
docker logs test-aio 2>&1
175+
exit 1
176+
fi
177+
178+
# Cleanup test containers
179+
- name: Cleanup Test Containers
180+
if: always()
129181
run: |
182+
docker stop test-server 2>/dev/null || true
183+
docker rm test-server 2>/dev/null || true
184+
docker stop test-client 2>/dev/null || true
185+
docker rm test-client 2>/dev/null || true
130186
docker stop test-aio 2>/dev/null || true
131187
docker rm test-aio 2>/dev/null || true
132188
@@ -140,9 +196,13 @@ jobs:
140196
steps:
141197
- name: All Validations Passed
142198
run: |
143-
echo "================================================"
199+
echo "========================================================"
144200
echo "✅ All container image validations passed!"
145-
echo "================================================"
201+
echo "========================================================"
146202
echo "- Image builds (aio, client, server): ✅"
147-
echo "- Security scan (AIO only): ✅"
148-
echo "- Smoke test (AIO only): ✅"
203+
echo "- Security scan (AIO): ✅"
204+
echo "- Smoke tests:"
205+
echo " - Server container startup: ✅"
206+
echo " - Client container startup: ✅"
207+
echo " - AIO container startup: ✅"
208+
echo "========================================================"

opentofu/modules/kubernetes/iam.tf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ resource "oci_identity_policy" "workers_policies" {
4141
format("allow any-user to inspect buckets in compartment id %s where all {request.principal.type = 'workload', request.principal.namespace = '%s', request.principal.cluster_id = '%s'}", var.compartment_id, var.label_prefix, oci_containerengine_cluster.default_cluster.id),
4242
format("allow any-user to read objects in compartment id %s where all {request.principal.type = 'workload', request.principal.namespace = '%s', request.principal.cluster_id = '%s'}", var.compartment_id, var.label_prefix, oci_containerengine_cluster.default_cluster.id),
4343
format("allow any-user to manage repos in compartment id %s where all {request.principal.type = 'workload', request.principal.namespace = '%s', request.principal.cluster_id = '%s'}", var.compartment_id, var.label_prefix, oci_containerengine_cluster.default_cluster.id),
44-
# Instance Principles
44+
format("allow any-user to use generative-ai-family in tenancy where all {request.principal.type = 'workload', request.principal.namespace = '%s', request.principal.cluster_id = '%s'}", var.label_prefix, oci_containerengine_cluster.default_cluster.id),
45+
# Instance Principals (required to pull images)
4546
format("allow dynamic-group %s to manage repos in compartment id %s", oci_identity_dynamic_group.workers_dynamic_group.name, var.compartment_id),
46-
format("allow dynamic-group %s to use generative-ai-family in tenancy", oci_identity_dynamic_group.workers_dynamic_group.name),
4747
]
4848
provider = oci.home_region
4949
}

src/.streamlit/config.toml

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,6 @@
33
[global]
44
disableWidgetStateDuplicationWarning = true
55

6-
[theme]
7-
font = "sans-serif-pro"
8-
headingFont = "sans-serif"
9-
codeFont = "monospace"
10-
11-
[theme.sidebar]
12-
font = "sans-serif"
13-
headingFont = "sans-serif"
14-
codeFont = "monospace"
15-
166
[browser]
177
gatherUsageStats = false
188
serverAddress = "localhost"

src/Dockerfile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,16 @@
33
# spell-checker:disable
44
##################################################
55
# Base - All-In-One
6+
# Build from the project root directory:
7+
# podman build -f src/Dockerfile -t ai-optimizer-aio:latest .
68
##################################################
79
FROM container-registry.oracle.com/os/oraclelinux:8-slim AS all_in_one_pyenv
810
ENV RUNUSER=oracleai \
911
VIRTUAL_ENV=/opt/.venv
1012

11-
RUN groupadd $RUNUSER && \
13+
RUN groupadd -g 10001 $RUNUSER && \
1214
useradd -u 10001 -g $RUNUSER -md /app $RUNUSER && \
13-
microdnf --nodocs -y install python3.11 python3.11-pip python3.11-devel && \
15+
microdnf --nodocs -y install python3.11 python3.11-pip && \
1416
microdnf clean all && \
1517
python3.11 -m venv --symlinks --upgrade-deps $VIRTUAL_ENV
1618

src/client/Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,16 @@
33
# spell-checker: disable
44
#############################################################
55
# Base - Web GUI
6-
# Build from the / directory:
6+
# Build from the project root directory:
77
# podman build -f src/client/Dockerfile -t ai-optimizer-client:latest .
88
#############################################################
99
FROM container-registry.oracle.com/os/oraclelinux:8-slim AS optimizer_base
1010
ENV RUNUSER=oracleai \
1111
VIRTUAL_ENV=/opt/.venv
1212

13-
RUN groupadd $RUNUSER && \
13+
RUN groupadd -g 10001 $RUNUSER && \
1414
useradd -u 10001 -g $RUNUSER -md /app $RUNUSER && \
15-
microdnf --nodocs -y install python3.11 python3.11-pip python3.11-devel && \
15+
microdnf --nodocs -y install python3.11 python3.11-pip && \
1616
microdnf clean all && \
1717
python3.11 -m venv --symlinks --upgrade-deps $VIRTUAL_ENV
1818

src/launch_client.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@ def main() -> None:
9494
.stAppHeader img[alt="Logo"] {
9595
width: 50%;
9696
}
97+
/* Fix emoji rendering in tab labels */
98+
[data-testid="stMarkdownContainer"] p {
99+
font-family: "sans-serif-pro" !important;
100+
}
97101
</style>
98102
""",
99103
)

src/server/Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
## Licensed under the Universal Permissive License v1.0 as shown at http://oss.oracle.com/licenses/upl.
33
#############################################################
44
# Base - API Server
5-
# Build from the / directory:
5+
# Build from the project root directory:
66
# podman build -f src/server/Dockerfile -t ai-optimizer-server:latest .
77
#############################################################
88
# spell-checker: disable
@@ -12,9 +12,9 @@ ENV RUNUSER=oracleai
1212
ENV RUNUSER=oracleai \
1313
VIRTUAL_ENV=/opt/.venv
1414

15-
RUN groupadd $RUNUSER && \
15+
RUN groupadd -g 10001 $RUNUSER && \
1616
useradd -u 10001 -g $RUNUSER -md /app $RUNUSER && \
17-
microdnf --nodocs -y install python3.11 python3.11-pip python3.11-devel && \
17+
microdnf --nodocs -y install python3.11 python3.11-pip && \
1818
microdnf clean all && \
1919
python3.11 -m venv --symlinks --upgrade-deps $VIRTUAL_ENV
2020

src/server/patches/litellm_patch.py

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -102,14 +102,14 @@ def custom_validate_environment(
102102
api_base: Optional[str] = None,
103103
) -> dict:
104104
"""
105-
Custom validate_environment to support instance principals.
106-
When using instance principals, skip the validation of user/fingerprint/key.
105+
Custom validate_environment to support instance principals and workload identity.
106+
If oci_signer is present, use signer-based auth; otherwise use credential-based auth.
107107
"""
108-
oci_auth_type = optional_params.get("oci_auth_type")
108+
oci_signer = optional_params.get("oci_signer")
109109

110-
# If using instance principals or workload identity, skip credential validation
111-
if oci_auth_type in ("instance_principal", "oke_workload_identity"):
112-
logger.info("Using OCI %s - skipping credential validation", oci_auth_type)
110+
# If signer is provided, use signer-based authentication (instance principals/workload identity)
111+
if oci_signer:
112+
logger.info("OCI signer detected - using signer-based authentication")
113113
oci_region = optional_params.get("oci_region", "us-ashburn-1")
114114
api_base = (
115115
api_base or litellm.api_base or f"https://inference.generativeai.{oci_region}.oci.oraclecloud.com"
@@ -133,7 +133,7 @@ def custom_validate_environment(
133133

134134
return headers
135135

136-
# For standard auth, use original validation
136+
# For credential-based auth, use original validation
137137
return original_validate_environment(
138138
self, headers, model, messages, optional_params, litellm_params, api_key, api_base
139139
)
@@ -161,20 +161,14 @@ def custom_sign_request(
161161
fake_stream: Optional[bool] = None,
162162
) -> Tuple[dict, Optional[bytes]]:
163163
"""
164-
Custom sign_request to support instance principals.
165-
Uses OCI SDK's native signers for instance principals.
164+
Custom sign_request to support instance principals and workload identity.
165+
If oci_signer is present, use it for signing; otherwise use credential-based auth.
166166
"""
167-
oci_auth_type = optional_params.get("oci_auth_type")
167+
oci_signer = optional_params.get("oci_signer")
168168

169-
# If using instance principals or workload identity, use OCI SDK signers
170-
if oci_auth_type in ("instance_principal", "oke_workload_identity"):
171-
logger.info("Using OCI %s for request signing", oci_auth_type)
172-
173-
# Get the appropriate signer
174-
if oci_auth_type == "instance_principal":
175-
signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
176-
else: # oke_workload_identity
177-
signer = oci.auth.signers.get_oke_workload_identity_resource_principal_signer()
169+
# If signer is provided, use it for request signing
170+
if oci_signer:
171+
logger.info("Using OCI signer for request signing")
178172

179173
# Prepare the request
180174
from urllib.parse import urlparse
@@ -202,8 +196,8 @@ def __init__(self, method, url, headers, body):
202196

203197
mock_request = MockRequest(method=method, url=api_base, headers=prepared_headers, body=body)
204198

205-
# Sign the request using OCI SDK
206-
signer.do_request_sign(mock_request, enforce_content_headers=True)
199+
# Sign the request using the provided OCI signer
200+
oci_signer.do_request_sign(mock_request, enforce_content_headers=True)
207201

208202
# Update headers with signed headers
209203
headers.update(mock_request.headers)

0 commit comments

Comments
 (0)