Skip to content
Merged
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ export OPENAI_API_KEY="your-evaluation-llm-key"
export API_KEY="your-api-endpoint-key"

# Ensure API is running at configured endpoint
# Default: http://localhost:8080
# Default: http://localhost:8080/v1/

# Run with API-enabled configuration
lightspeed-eval --system-config config/system.yaml --eval-data config/evaluation_data.yaml
Expand Down
3 changes: 2 additions & 1 deletion config/system.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ embedding:
# But can be easily integrated with other APIs with minimal change.
api:
enabled: true # Enable API calls instead of using pre-filled data
api_base: http://localhost:8080 # Base API URL
api_base: http://localhost:8080 # Base API URL (without version)
version: v1 # API version (e.g., v1, v2)
endpoint_type: streaming # Use "streaming" or "query" endpoint
timeout: 300 # API request timeout in seconds

Expand Down
6 changes: 3 additions & 3 deletions lsc_agent_eval/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ Expectation is that, either a third-party inference provider access is there or
```bash
lsc_agent_eval \
--eval_data_yaml agent_goal_eval.yaml \
--agent_endpoint http://localhost:8080 \
--agent_endpoint http://localhost:8080/v1/ \
--endpoint_type streaming \
--agent_provider watsonx \
--agent_model ibm/granite-3-2-8b-instruct \
Expand All @@ -218,7 +218,7 @@ from lsc_agent_eval import AgentGoalEval
class EvalArgs:
def __init__(self):
self.eval_data_yaml = 'data/example_eval.yaml'
self.agent_endpoint = 'http://localhost:8080'
self.agent_endpoint = 'http://localhost:8080/v1/'
self.endpoint_type = 'query' # Non-streaming
self.agent_provider = 'watsonx'
self.agent_model = 'ibm/granite-3-2-8b-instruct'
Expand All @@ -236,7 +236,7 @@ evaluator.run_evaluation()
### Key Arguments

- `--eval_data_yaml`: Path to the YAML file containing evaluation data
- `--agent_endpoint`: Endpoint URL for the agent API (default: <http://localhost:8080>)
- `--agent_endpoint`: Endpoint URL for the agent API (default: <http://localhost:8080/v1/>)
- `--endpoint_type`: Endpoint type to use for agent queries (default: streaming). Options: 'streaming' or 'query'
- `--agent_auth_token_file`: Path to .txt file containing API token (if required). Or set `AGENT_API_TOKEN` env var without using a .txt file
- `--agent_provider`: Provider for the agent API
Expand Down
9 changes: 8 additions & 1 deletion lsc_agent_eval/src/lsc_agent_eval/agent_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,14 @@ def _args_parser(args: list[str]) -> argparse.Namespace:
"--agent_endpoint",
type=str,
default="http://localhost:8080",
help="Agent API endpoint URL",
help="Agent API base URL (without version)",
)

parser.add_argument(
"--agent_api_version",
type=str,
default="v1",
help="Agent API version (e.g., v1, v2)",
)

parser.add_argument(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ def _setup_components(self) -> None:

# Agent HTTP client
self.agent_client = AgentHttpClient(
self.eval_args.agent_endpoint, self.eval_args.agent_auth_token_file
self.eval_args.agent_endpoint,
version=getattr(self.eval_args, "agent_api_version", "v1"),
token_file=self.eval_args.agent_auth_token_file,
)

# Judge model manager (optional)
Expand Down
17 changes: 13 additions & 4 deletions lsc_agent_eval/src/lsc_agent_eval/core/utils/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,18 @@
class AgentHttpClient:
"""HTTP client for agent API communication."""

def __init__(self, endpoint: str, token_file: Optional[str] = None):
"""Initialize HTTP client."""
def __init__(
self, endpoint: str, version: str = "v1", token_file: Optional[str] = None
):
"""Initialize HTTP client.

Args:
endpoint: Base API URL.
version: API version (e.g., v1, v2). Defaults to "v1".
token_file: Optional path to token file for authentication.
"""
self.endpoint = endpoint
self.version = version
self.client: Optional[httpx.Client] = None
self._setup_client(token_file)

Expand Down Expand Up @@ -57,7 +66,7 @@ def query_agent(

try:
response = self.client.post(
"/v1/query",
f"/{self.version}/query",
json=api_input,
timeout=timeout,
)
Expand Down Expand Up @@ -117,7 +126,7 @@ def streaming_query_agent(
try:
with self.client.stream(
"POST",
"/v1/streaming_query",
f"/{self.version}/streaming_query",
json=api_input,
timeout=timeout,
) as response:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def mock_args(self, mocker: MockerFixture):
args = mocker.Mock()
args.eval_data_yaml = "test_data.yaml"
args.agent_endpoint = "http://localhost:8080"
args.agent_api_version = "v1"
args.agent_auth_token_file = None
args.agent_provider = "openai"
args.agent_model = "gpt-4"
Expand Down Expand Up @@ -103,7 +104,9 @@ def test_init_with_judge_manager(

# Verify all components were initialized
mock_config_manager.assert_called_once_with("test_data.yaml")
mock_agent_client.assert_called_once_with("http://localhost:8080", None)
mock_agent_client.assert_called_once_with(
"http://localhost:8080", version="v1", token_file=None
)
mock_judge_manager.assert_called_once_with("openai", "gpt-4")
mock_script_runner.assert_called_once_with(None)
mock_evaluation_runner.assert_called_once_with(
Expand Down
38 changes: 22 additions & 16 deletions lsc_agent_eval/tests/core/utils/test_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ class TestAgentHttpClient:
def test_init_without_token(self, mocker: MockerFixture):
"""Test initializing client without token."""
mock_client = mocker.patch("httpx.Client")
client = AgentHttpClient("http://localhost:8080")
client = AgentHttpClient("http://localhost:8080", version="v1")

assert client.endpoint == "http://localhost:8080"
assert client.version == "v1"
mock_client.assert_called_once_with(
base_url="http://localhost:8080", verify=False
)
Expand All @@ -30,9 +31,12 @@ def test_init_with_token_file(self, mocker: MockerFixture):
mock_client = mocker.patch("httpx.Client")
mocker.patch("builtins.open", mocker.mock_open(read_data=token_content))

client = AgentHttpClient("http://localhost:8080", "token.txt")
client = AgentHttpClient(
"http://localhost:8080", version="v1", token_file="token.txt"
)

assert client.endpoint == "http://localhost:8080"
assert client.version == "v1"
mock_client.assert_called_once_with(
base_url="http://localhost:8080", verify=False
)
Expand All @@ -46,14 +50,16 @@ def test_init_with_missing_token_file(self, mocker: MockerFixture):
mocker.patch("builtins.open", side_effect=FileNotFoundError)

with pytest.raises(AgentAPIError, match="Token file not found"):
AgentHttpClient("http://localhost:8080", "missing.txt")
AgentHttpClient(
"http://localhost:8080", version="v1", token_file="missing.txt"
)

def test_init_with_env_token(self, mocker: MockerFixture):
"""Test initializing client with environment token."""
mock_client = mocker.patch("httpx.Client")
mocker.patch("os.getenv", return_value="env-token-456")

AgentHttpClient("http://localhost:8080")
AgentHttpClient("http://localhost:8080", version="v1")

mock_client.return_value.headers.update.assert_called_once_with(
{"Authorization": "Bearer env-token-456"}
Expand All @@ -77,7 +83,7 @@ def test_query_agent_success(self, mocker: MockerFixture):
mock_client.post.return_value = mock_response

mocker.patch("httpx.Client", return_value=mock_client)
client = AgentHttpClient("http://localhost:8080")
client = AgentHttpClient("http://localhost:8080", version="v1")

api_input = {
"query": "How many namespaces are there?",
Expand Down Expand Up @@ -116,7 +122,7 @@ def test_query_agent_success_empty_tool_calls(self, mocker: MockerFixture):
mock_client.post.return_value = mock_response

mocker.patch("httpx.Client", return_value=mock_client)
client = AgentHttpClient("http://localhost:8080")
client = AgentHttpClient("http://localhost:8080", version="v1")

api_input = {
"query": "What is Openshift Virtualization?",
Expand All @@ -143,7 +149,7 @@ def test_query_agent_http_error(self, mocker: MockerFixture):
)

mocker.patch("httpx.Client", return_value=mock_client)
client = AgentHttpClient("http://localhost:8080")
client = AgentHttpClient("http://localhost:8080", version="v1")

api_input = {"query": "Test query", "provider": "openai", "model": "gpt-4"}
with pytest.raises(AgentAPIError, match="Agent API error: 500"):
Expand All @@ -156,7 +162,7 @@ def test_query_agent_timeout(self, mocker: MockerFixture):
mock_client.post.side_effect = httpx.TimeoutException("Request timeout")

mocker.patch("httpx.Client", return_value=mock_client)
client = AgentHttpClient("http://localhost:8080")
client = AgentHttpClient("http://localhost:8080", version="v1")

api_input = {
"query": "Test query",
Expand All @@ -178,7 +184,7 @@ def test_query_agent_missing_response_field(self, mocker: MockerFixture):
mock_client.post.return_value = mock_response

mocker.patch("httpx.Client", return_value=mock_client)
client = AgentHttpClient("http://localhost:8080")
client = AgentHttpClient("http://localhost:8080", version="v1")

api_input = {"query": "Test query", "provider": "openai", "model": "gpt-4"}
with pytest.raises(
Expand All @@ -190,14 +196,14 @@ def test_query_agent_client_not_initialized(self, mocker: MockerFixture):
"""Test agent query when client is not initialized."""
mocker.patch("httpx.Client", side_effect=Exception("Setup failed"))
with pytest.raises(AgentAPIError, match="Failed to setup HTTP client"):
AgentHttpClient("http://localhost:8080")
AgentHttpClient("http://localhost:8080", version="v1")

def test_close_client_success(self, mocker: MockerFixture):
"""Test closing client successfully."""
mock_client = mocker.Mock()

mocker.patch("httpx.Client", return_value=mock_client)
client = AgentHttpClient("http://localhost:8080")
client = AgentHttpClient("http://localhost:8080", version="v1")
client.close()

mock_client.close.assert_called_once()
Expand All @@ -208,7 +214,7 @@ def test_client_setup_exception(self, mocker: MockerFixture):
with pytest.raises(
AgentAPIError, match="Failed to setup HTTP client: Setup failed"
):
AgentHttpClient("http://localhost:8080")
AgentHttpClient("http://localhost:8080", version="v1")

# Streaming Query Tests
def test_streaming_query_agent_success(self, mocker: MockerFixture):
Expand Down Expand Up @@ -237,7 +243,7 @@ def test_streaming_query_agent_success(self, mocker: MockerFixture):
)

mock_parser.return_value = expected_result
client = AgentHttpClient("http://localhost:8080")
client = AgentHttpClient("http://localhost:8080", version="v1")

api_input = {
"query": "What is OpenShift?",
Expand Down Expand Up @@ -278,7 +284,7 @@ def test_streaming_query_agent_parser_error(self, mocker: MockerFixture):
# Mock the parser to raise the specific error
mock_parser.side_effect = ValueError("No Conversation ID found")

client = AgentHttpClient("http://localhost:8080")
client = AgentHttpClient("http://localhost:8080", version="v1")
api_input = {"query": "Test query", "provider": "openai", "model": "gpt-4"}

with pytest.raises(
Expand All @@ -294,7 +300,7 @@ def test_streaming_query_agent_timeout(self, mocker: MockerFixture):
mock_client.stream.side_effect = httpx.TimeoutException("Request timeout")

mocker.patch("httpx.Client", return_value=mock_client)
client = AgentHttpClient("http://localhost:8080")
client = AgentHttpClient("http://localhost:8080", version="v1")

api_input = {
"query": "Test query",
Expand Down Expand Up @@ -326,7 +332,7 @@ def test_streaming_query_agent_http_error(self, mocker: MockerFixture):
mock_client.stream.return_value = mock_stream_response

mocker.patch("httpx.Client", return_value=mock_client)
client = AgentHttpClient("http://localhost:8080")
client = AgentHttpClient("http://localhost:8080", version="v1")

api_input = {"query": "Test query", "provider": "openai", "model": "gpt-4"}

Expand Down
4 changes: 4 additions & 0 deletions lsc_agent_eval/tests/test_agent_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def test_args_parser_minimal(self):
assert parsed.agent_provider == "openai"
assert parsed.agent_model == "gpt-4"
assert parsed.agent_endpoint == "http://localhost:8080" # default
assert parsed.agent_api_version == "v1" # default
assert parsed.result_dir == "eval_output/" # default
assert parsed.endpoint_type == "streaming" # default

Expand All @@ -38,6 +39,8 @@ def test_args_parser_all_arguments(self):
"test.yaml",
"--agent_endpoint",
"http://custom:9090",
"--agent_api_version",
"v2",
"--agent_provider",
"watsonx",
"--agent_model",
Expand All @@ -60,6 +63,7 @@ def test_args_parser_all_arguments(self):

assert parsed.eval_data_yaml == "test.yaml"
assert parsed.agent_endpoint == "http://custom:9090"
assert parsed.agent_api_version == "v2"
assert parsed.agent_provider == "watsonx"
assert parsed.agent_model == "granite-3-8b-instruct"
assert parsed.agent_auth_token_file == "token.txt"
Expand Down
1 change: 1 addition & 0 deletions src/generate_answers/eval_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class EvalConfig(BaseModel):
"""Evaluation configuration."""

lightspeed_url: str
lightspeed_api_version: str = "v1"
models: list[EvalModel]
models_to_evaluate: set[str]

Expand Down
1 change: 1 addition & 0 deletions src/generate_answers/eval_config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
lightspeed_url: "http://localhost:8080"
lightspeed_api_version: "v1"
models:
- display_name: "granite-3-3-8b-instruct"
provider: "watsonx"
Expand Down
1 change: 1 addition & 0 deletions src/generate_answers/generate_answers.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ def main( # pylint: disable=R0913,R0917,R0914
config.lightspeed_url,
provider=model.provider,
model=model.model,
version=config.lightspeed_api_version,
cache_dir=llm_cache_dir,
),
)
Expand Down
6 changes: 4 additions & 2 deletions src/generate_answers/ls_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,17 @@
class LSClient: # pylint: disable=too-few-public-methods
"""LightSpeed client."""

def __init__(
def __init__( # pylint: disable=R0913,R0917
self,
ls_url: str,
provider: str,
model: str,
version: str = "v1",
cache_dir: str = ".caches/llm_cache",
):
"""Init LightSpeed."""
self.url = ls_url
self.version = version
self.provider = provider
self.model = model
self.client = Client(base_url=ls_url, verify=False)
Expand Down Expand Up @@ -65,7 +67,7 @@ def get_answer(self, query: str, skip_cache: bool = False) -> str:

logging.info("Calling LightSpeed service for query '%s'", query)
response = self.client.post(
"/v1/query",
f"/{self.version}/query",
json={
"query": query,
"provider": self.provider,
Expand Down
5 changes: 3 additions & 2 deletions src/lightspeed_evaluation/core/api/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def __init__(
"""Initialize the client with configuration."""
self.config = config
self.api_base = config.api_base
self.version = config.version
self.endpoint_type = config.endpoint_type
self.timeout = config.timeout

Expand Down Expand Up @@ -127,7 +128,7 @@ def _standard_query(self, api_request: APIRequest) -> APIResponse:
raise APIError("HTTP client not initialized")
try:
response = self.client.post(
"/v1/query",
f"/{self.version}/query",
json=api_request.model_dump(exclude_none=True),
)
response.raise_for_status()
Expand Down Expand Up @@ -177,7 +178,7 @@ def _streaming_query(self, api_request: APIRequest) -> APIResponse:
try:
with self.client.stream(
"POST",
"/v1/streaming_query",
f"/{self.version}/streaming_query",
json=api_request.model_dump(exclude_none=True),
) as response:
self._handle_response_errors(response)
Expand Down
1 change: 1 addition & 0 deletions src/lightspeed_evaluation/core/constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Common constants for evaluation framework."""

DEFAULT_API_BASE = "http://localhost:8080"
DEFAULT_API_VERSION = "v1"
DEFAULT_API_TIMEOUT = 300
DEFAULT_ENDPOINT_TYPE = "streaming"
SUPPORTED_ENDPOINT_TYPES = ["streaming", "query"]
Expand Down
Loading
Loading