Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
404f46b
Create mcp-malt-integration-tests.yml
tgrunnagle Dec 10, 2025
58651d8
Add gh authentication to "Install mcp-malt CLI" step
tgrunnagle Dec 10, 2025
6fee98a
Address claude feedback
tgrunnagle Dec 10, 2025
33ee370
Install specific version of cli
tgrunnagle Dec 10, 2025
bccb72a
Update mcp-malt-cli -> mtef
tgrunnagle Dec 11, 2025
9a8d0b0
Add access token for installing mtef
tgrunnagle Dec 11, 2025
107ddcc
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 15, 2025
24ec49c
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 15, 2025
9fe83cd
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 15, 2025
954ef58
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 15, 2025
f8e9eb5
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 15, 2025
ea30c7a
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 15, 2025
ccd47ad
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 15, 2025
9c8e5ad
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 15, 2025
295fc94
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 15, 2025
f86b2d6
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 15, 2025
8a527b2
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 15, 2025
7abdc0c
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 15, 2025
3d429d5
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 15, 2025
32d6848
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 15, 2025
acce640
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 15, 2025
a198e30
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 16, 2025
bdf3525
Update mcp-tef-integration-tests.yml
tgrunnagle Dec 16, 2025
94512c5
SHA pin upload-artifact action
tgrunnagle Dec 16, 2025
cac638a
Move to on: workflow_call
tgrunnagle Dec 16, 2025
3b76dda
Add missing action secret to workflow
tgrunnagle Dec 16, 2025
fe62c54
Pass secret from code-checks
tgrunnagle Dec 16, 2025
37d8304
inherit secrets from triggered workflow
tgrunnagle Dec 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/code-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,8 @@ jobs:
name: Offline Mode Tests
uses: ./.github/workflows/offline-tests.yml
needs: image_build
mcp_tef_integration_tests:
name: MCP TEF Integration Tests
uses: ./.github/workflows/mcp-tef-integration-tests.yml
secrets: inherit
needs: image_build
1 change: 1 addition & 0 deletions .github/workflows/main-and-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ jobs:
code_checks:
name: Code Checks
uses: ./.github/workflows/code-checks.yml
secrets: inherit
339 changes: 339 additions & 0 deletions .github/workflows/mcp-tef-integration-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,339 @@
name: MCP-TEF Integration Tests

on:
workflow_call:
secrets:
OPENROUTER_API_KEY:
required: true
description: "API key for OpenRouter, used in mtef test runs"

permissions:
contents: read

jobs:
mcp-tef-integration-tests:
name: MCP-TEF Query Alignment Tests
runs-on: ubuntu-latest
# Don't block PR merge on failure
continue-on-error: true

steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1

- name: Build mcp-optimizer Docker image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
with:
context: .
platforms: linux/amd64
push: false
load: true
cache-from: type=gha
tags: mcp-optimizer:latest

- name: Install uv
uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # v7.1.5
with:
enable-cache: true
python-version: '3.13'

- name: Install mtef CLI
run: |
uv tool install "mcp-tef-cli @ git+https://github.com/StacklokLabs/mcp-tef.git@cli-v0.0.3#subdirectory=cli"

- name: Install ToolHive
uses: StacklokLabs/toolhive-actions/install@6a095f99aa2fd6cd92cf0bb94bdf509b99820c06 # v0.0.3

- name: Run ToolHive server
run: |
thv serve --host 0.0.0.0 --port 9090 &
echo "Waiting for ToolHive server to start..."
sleep 5
echo "Checking ToolHive API is accessible..."
curl -v http://localhost:9090/api/v1beta/version || echo "Failed to connect to ToolHive API"

- name: Install MCP servers (time and fetch)
run: |
echo "Installing time MCP server..."
thv run time --host 0.0.0.0
echo "Installing fetch MCP server..."
thv run fetch --host 0.0.0.0

- name: Install mcp-optimizer server
run: >-
thv run mcp-optimizer:latest
--transport streamable-http
--proxy-port 8080
--host 0.0.0.0
--env TOOLHIVE_HOST=172.17.0.1
--env TOOLHIVE_PORT=9090
--env WORKLOAD_POLLING_INTERVAL=2
--env TOOLHIVE_SKIP_BACKOFF=1

- name: Wait for mcp-optimizer to be ready
run: |
echo "Waiting for mcp-optimizer to be ready..."
MAX_ATTEMPTS=180
ATTEMPT=0
READY=false

while [ $ATTEMPT -lt $MAX_ATTEMPTS ]; do
if docker logs mcp-optimizer 2>&1 | grep -q "Uvicorn running on "; then
echo "mcp-optimizer is ready!"
READY=true
break
fi
ATTEMPT=$((ATTEMPT + 1))
echo "Waiting... (attempt $ATTEMPT/$MAX_ATTEMPTS)"
sleep 1
done

if [ "$READY" = false ]; then
echo "ERROR: mcp-optimizer did not become ready within 3 minutes"
echo "=== mcp-optimizer logs ==="
docker logs mcp-optimizer 2>&1 || true
exit 1
fi

- name: Install MCP SDK for ingestion check
run: |
uv venv
uv pip install mcp

- name: Wait for workload ingestion to complete
run: |
echo "Waiting for workload ingestion to complete..."
MAX_ATTEMPTS=60
ATTEMPT=0
INGESTED=false

cat > /tmp/check_ingestion.py << 'EOF'
import asyncio
import json
import sys
from mcp import ClientSession
from mcp.client.streamable_http import streamablehttp_client

async def check():
try:
async with streamablehttp_client("http://localhost:8080/mcp") as (read_stream, write_stream, _):
async with ClientSession(read_stream, write_stream) as session:
await session.initialize()
result = await session.call_tool("list_tools", {})
print(f"Tool call 'list_tools' completed with result: {result}")
if result and result.content and len(result.content) > 0:
print("Workload ingestion completed - server tools found!")
return 0
return 1
except Exception as e:
return 1

sys.exit(asyncio.run(check()))
EOF

while [ $ATTEMPT -lt $MAX_ATTEMPTS ]; do
if uv run /tmp/check_ingestion.py 2>/dev/null; then
echo "Workload ingestion completed!"
INGESTED=true
break
fi
ATTEMPT=$((ATTEMPT + 1))
echo "Waiting for ingestion... (attempt $ATTEMPT/$MAX_ATTEMPTS)"
sleep 1
done

if [ "$INGESTED" = false ]; then
echo "ERROR: Workload ingestion did not complete within 1 minute"
echo "=== mcp-optimizer logs ==="
docker logs mcp-optimizer 2>&1 | tail -50 || true
exit 1
fi

- name: Get mcp-optimizer URL
id: get-url
run: |
MCP_OPTIMIZER_URL=$(thv list --format json | jq -r '.[] | select(.name == "mcp-optimizer") | .url')
echo "MCP_OPTIMIZER_URL=${MCP_OPTIMIZER_URL}" >> $GITHUB_OUTPUT
echo "mcp-optimizer URL: ${MCP_OPTIMIZER_URL}"

- name: Deploy mcp-tef server
run: |
mtef deploy \
--name mcp-tef \
--port 8088 \
--env OPENROUTER_API_KEY=${{ secrets.OPENROUTER_API_KEY }} \
--env LOG_LEVEL=INFO
sleep 20

- name: Create test case and run alignment test
id: test-run
run: |
mkdir -p /tmp/mcp-tef-results

# Get MCP optimizer URL for Docker access (rewrite localhost to 172.17.0.1)
MCP_SERVER_URL=${{ steps.get-url.outputs.MCP_OPTIMIZER_URL }}
MCP_SERVER_URL=$(echo "${MCP_SERVER_URL}" | sed 's/127\.0\.0\.1/172\.17\.0\.1/g')
echo "MCP Server URL for mcp-tef: ${MCP_SERVER_URL}"

# Create test case for time query using mcp-optimizer
echo "Creating test case..."
set +e
TEST_CASE_OUTPUT=$(mtef test-case create \
--name "MCP Optimizer Time Query" \
--query "Tell me the current time in Tokyo" \
--servers "${MCP_SERVER_URL}" \
--expected-server "${MCP_SERVER_URL}" \
--expected-tool "find_tool" \
--format json \
--insecure 2>&1)
TEST_CASE_EXIT=$?
set -e

echo "Test case creation output:"
echo "${TEST_CASE_OUTPUT}"

TEST_CASE_ID=$(echo "${TEST_CASE_OUTPUT}" | jq -r '.id // empty')

if [ -z "${TEST_CASE_ID}" ] || [ ${TEST_CASE_EXIT} -ne 0 ]; then
echo "ERROR: Failed to create test case"
echo "${TEST_CASE_OUTPUT}" > /tmp/mcp-tef-results/test-case-error.txt
echo "test_result=error" >> $GITHUB_OUTPUT
echo "test_message=Failed to create test case" >> $GITHUB_OUTPUT
exit 0
fi

echo "Test case ID: ${TEST_CASE_ID}"
echo "${TEST_CASE_OUTPUT}" > /tmp/mcp-tef-results/test-case.json

# Execute test run
echo "Executing test run..."
set +e
TEST_RUN_OUTPUT=$(mtef test-run execute "${TEST_CASE_ID}" \
--model-provider openrouter \
--model-name anthropic/claude-sonnet-4.5 \
--api-key ${{ secrets.OPENROUTER_API_KEY }} \
--format json \
--no-wait \
--insecure 2>&1)
TEST_RUN_EXIT=$?
set -e

echo "Test run output:"
echo "${TEST_RUN_OUTPUT}"

TEST_RUN_ID=$(echo "${TEST_RUN_OUTPUT}" | jq -r '.id // empty')

if [ -z "${TEST_RUN_ID}" ] || [ ${TEST_RUN_EXIT} -ne 0 ]; then
echo "ERROR: Failed to execute test run"
echo "${TEST_RUN_OUTPUT}" > /tmp/mcp-tef-results/test-run-error.txt
echo "test_result=error" >> $GITHUB_OUTPUT
echo "test_message=Failed to execute test run" >> $GITHUB_OUTPUT
exit 0
fi

echo "Test run ID: ${TEST_RUN_ID}"

# Poll for completion
echo "Polling for test run completion..."
POLL_INTERVAL=2
POLL_TIMEOUT=120
POLL_WAITED=0
STATUS="pending"

while [ "${STATUS}" = "pending" ] || [ "${STATUS}" = "running" ]; do
if [ ${POLL_WAITED} -ge ${POLL_TIMEOUT} ]; then
echo "Timeout waiting for test run to complete"
echo "test_result=timeout" >> $GITHUB_OUTPUT
echo "test_message=Test run timed out after ${POLL_TIMEOUT}s" >> $GITHUB_OUTPUT
exit 0
fi

sleep ${POLL_INTERVAL}
POLL_WAITED=$((POLL_WAITED + POLL_INTERVAL))

set +e
POLL_OUTPUT=$(mtef test-run get "${TEST_RUN_ID}" \
--format json \
--insecure 2>&1)
set -e
STATUS=$(echo "${POLL_OUTPUT}" | jq -r '.status // "unknown"')
echo "Status after ${POLL_WAITED}s: ${STATUS}"
done

# Save final results
echo "${POLL_OUTPUT}" > /tmp/mcp-tef-results/test-run-result.json

CLASSIFICATION=$(echo "${POLL_OUTPUT}" | jq -r '.classification // "unknown"')
echo "Final status: ${STATUS}"
echo "Classification: ${CLASSIFICATION}"

# Determine test result
if [ "${STATUS}" = "completed" ]; then
if [ "${CLASSIFICATION}" = "TP" ]; then
echo "test_result=success" >> $GITHUB_OUTPUT
echo "test_message=Test passed with classification: ${CLASSIFICATION}" >> $GITHUB_OUTPUT
else
echo "test_result=warning" >> $GITHUB_OUTPUT
echo "test_message=Test completed but classification was ${CLASSIFICATION}, expected TP" >> $GITHUB_OUTPUT
fi
else
ERROR_MSG=$(echo "${POLL_OUTPUT}" | jq -r '.error_message // "No error message"')
echo "test_result=failed" >> $GITHUB_OUTPUT
echo "test_message=Test failed with status: ${STATUS}. Error: ${ERROR_MSG}" >> $GITHUB_OUTPUT
fi

# Create summary
cat > /tmp/mcp-tef-results/summary.json << EOF
{
"test_case_id": "${TEST_CASE_ID}",
"test_run_id": "${TEST_RUN_ID}",
"status": "${STATUS}",
"classification": "${CLASSIFICATION}",
"mcp_server_url": "${MCP_SERVER_URL}"
}
EOF

- name: Upload test results
if: always()
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: mcp-tef-test-results
path: /tmp/mcp-tef-results/
retention-days: 30

- name: Report test status
if: always()
run: |
echo "=== MCP-TEF Integration Test Results ==="
echo "Result: ${{ steps.test-run.outputs.test_result }}"
echo "Message: ${{ steps.test-run.outputs.test_message }}"

if [ -f /tmp/mcp-tef-results/summary.json ]; then
echo ""
echo "=== Summary ==="
cat /tmp/mcp-tef-results/summary.json | jq .
fi

# Warn but don't fail
if [ "${{ steps.test-run.outputs.test_result }}" != "success" ]; then
echo ""
echo "::warning::MCP-TEF integration test did not pass: ${{ steps.test-run.outputs.test_message }}"
fi

- name: Capture container logs
if: always()
run: |
echo "=== mcp-tef container logs ==="
docker logs mcp-tef 2>&1 | tail -100 || true
echo ""
echo "=== mcp-optimizer container logs ==="
docker logs mcp-optimizer 2>&1 | tail -100 || true
echo ""
echo "=== time container logs ==="
docker logs time 2>&1 | tail -50 || true
echo ""
echo "=== fetch container logs ==="
docker logs fetch 2>&1 | tail -50 || true