lightspeed-core · asamal4 · Dec 8, 2025 · Nov 27, 2025 · Nov 27, 2025 · Nov 27, 2025
diff --git a/README.md b/README.md
@@ -54,7 +54,7 @@ export OPENAI_API_KEY="your-evaluation-llm-key"
 export API_KEY="your-api-endpoint-key"
 
 # Ensure API is running at configured endpoint
-# Default: http://localhost:8080
+# Default: http://localhost:8080/v1/
 
 # Run with API-enabled configuration
 lightspeed-eval --system-config config/system.yaml --eval-data config/evaluation_data.yaml

diff --git a/config/system.yaml b/config/system.yaml
@@ -30,7 +30,8 @@ embedding:
 # But can be easily integrated with other APIs with minimal change.
 api:
   enabled: true                        # Enable API calls instead of using pre-filled data
-  api_base: http://localhost:8080      # Base API URL
+  api_base: http://localhost:8080      # Base API URL (without version)
+  version: v1                          # API version (e.g., v1, v2)
   endpoint_type: streaming             # Use "streaming" or "query" endpoint
   timeout: 300                         # API request timeout in seconds
 

diff --git a/lsc_agent_eval/README.md b/lsc_agent_eval/README.md
@@ -200,7 +200,7 @@ Expectation is that, either a third-party inference provider access is there or
 ```bash
 lsc_agent_eval \
     --eval_data_yaml agent_goal_eval.yaml \
-    --agent_endpoint http://localhost:8080 \
+    --agent_endpoint http://localhost:8080/v1/ \
     --endpoint_type streaming \
     --agent_provider watsonx \
     --agent_model ibm/granite-3-2-8b-instruct \
@@ -218,7 +218,7 @@ from lsc_agent_eval import AgentGoalEval
 class EvalArgs:
     def __init__(self):
         self.eval_data_yaml = 'data/example_eval.yaml'
-        self.agent_endpoint = 'http://localhost:8080'
+        self.agent_endpoint = 'http://localhost:8080/v1/'
         self.endpoint_type = 'query'  # Non-streaming
         self.agent_provider = 'watsonx'
         self.agent_model = 'ibm/granite-3-2-8b-instruct'
@@ -236,7 +236,7 @@ evaluator.run_evaluation()
 ### Key Arguments
 
 - `--eval_data_yaml`: Path to the YAML file containing evaluation data
-- `--agent_endpoint`: Endpoint URL for the agent API (default: <http://localhost:8080>)
+- `--agent_endpoint`: Endpoint URL for the agent API (default: <http://localhost:8080/v1/>)
 - `--endpoint_type`: Endpoint type to use for agent queries (default: streaming). Options: 'streaming' or 'query'
 - `--agent_auth_token_file`: Path to .txt file containing API token (if required). Or set `AGENT_API_TOKEN` env var without using a .txt file
 - `--agent_provider`: Provider for the agent API

diff --git a/lsc_agent_eval/src/lsc_agent_eval/agent_eval.py b/lsc_agent_eval/src/lsc_agent_eval/agent_eval.py
@@ -32,7 +32,14 @@ def _args_parser(args: list[str]) -> argparse.Namespace:
         "--agent_endpoint",
         type=str,
         default="http://localhost:8080",
-        help="Agent API endpoint URL",
+        help="Agent API base URL (without version)",
+    )
+
+    parser.add_argument(
+        "--agent_api_version",
+        type=str,
+        default="v1",
+        help="Agent API version (e.g., v1, v2)",
     )
 
     parser.add_argument(

diff --git a/lsc_agent_eval/src/lsc_agent_eval/core/agent_goal_eval/agent_goal_eval.py b/lsc_agent_eval/src/lsc_agent_eval/core/agent_goal_eval/agent_goal_eval.py
@@ -54,7 +54,9 @@ def _setup_components(self) -> None:
 
         # Agent HTTP client
         self.agent_client = AgentHttpClient(
-            self.eval_args.agent_endpoint, self.eval_args.agent_auth_token_file
+            self.eval_args.agent_endpoint,
+            version=getattr(self.eval_args, "agent_api_version", "v1"),
+            token_file=self.eval_args.agent_auth_token_file,
         )
 
         # Judge model manager (optional)

diff --git a/lsc_agent_eval/src/lsc_agent_eval/core/utils/api_client.py b/lsc_agent_eval/src/lsc_agent_eval/core/utils/api_client.py
@@ -16,9 +16,18 @@
 class AgentHttpClient:
     """HTTP client for agent API communication."""
 
-    def __init__(self, endpoint: str, token_file: Optional[str] = None):
-        """Initialize HTTP client."""
+    def __init__(
+        self, endpoint: str, version: str = "v1", token_file: Optional[str] = None
+    ):
+        """Initialize HTTP client.
+
+        Args:
+            endpoint: Base API URL.
+            version: API version (e.g., v1, v2). Defaults to "v1".
+            token_file: Optional path to token file for authentication.
+        """
         self.endpoint = endpoint
+        self.version = version
         self.client: Optional[httpx.Client] = None
         self._setup_client(token_file)
 
@@ -57,7 +66,7 @@ def query_agent(
 
         try:
             response = self.client.post(
-                "/v1/query",
+                f"/{self.version}/query",
                 json=api_input,
                 timeout=timeout,
             )
@@ -117,7 +126,7 @@ def streaming_query_agent(
         try:
             with self.client.stream(
                 "POST",
-                "/v1/streaming_query",
+                f"/{self.version}/streaming_query",
                 json=api_input,
                 timeout=timeout,
             ) as response:

diff --git a/lsc_agent_eval/tests/core/agent_goal_eval/test_agent_goal_eval.py b/lsc_agent_eval/tests/core/agent_goal_eval/test_agent_goal_eval.py
@@ -23,6 +23,7 @@ def mock_args(self, mocker: MockerFixture):
         args = mocker.Mock()
         args.eval_data_yaml = "test_data.yaml"
         args.agent_endpoint = "http://localhost:8080"
+        args.agent_api_version = "v1"
         args.agent_auth_token_file = None
         args.agent_provider = "openai"
         args.agent_model = "gpt-4"
@@ -103,7 +104,9 @@ def test_init_with_judge_manager(
 
         # Verify all components were initialized
         mock_config_manager.assert_called_once_with("test_data.yaml")
-        mock_agent_client.assert_called_once_with("http://localhost:8080", None)
+        mock_agent_client.assert_called_once_with(
+            "http://localhost:8080", version="v1", token_file=None
+        )
         mock_judge_manager.assert_called_once_with("openai", "gpt-4")
         mock_script_runner.assert_called_once_with(None)
         mock_evaluation_runner.assert_called_once_with(

diff --git a/lsc_agent_eval/tests/core/utils/test_api_client.py b/lsc_agent_eval/tests/core/utils/test_api_client.py
@@ -16,9 +16,10 @@ class TestAgentHttpClient:
     def test_init_without_token(self, mocker: MockerFixture):
         """Test initializing client without token."""
         mock_client = mocker.patch("httpx.Client")
-        client = AgentHttpClient("http://localhost:8080")
+        client = AgentHttpClient("http://localhost:8080", version="v1")
 
         assert client.endpoint == "http://localhost:8080"
+        assert client.version == "v1"
         mock_client.assert_called_once_with(
             base_url="http://localhost:8080", verify=False
         )
@@ -30,9 +31,12 @@ def test_init_with_token_file(self, mocker: MockerFixture):
         mock_client = mocker.patch("httpx.Client")
         mocker.patch("builtins.open", mocker.mock_open(read_data=token_content))
 
-        client = AgentHttpClient("http://localhost:8080", "token.txt")
+        client = AgentHttpClient(
+            "http://localhost:8080", version="v1", token_file="token.txt"
+        )
 
         assert client.endpoint == "http://localhost:8080"
+        assert client.version == "v1"
         mock_client.assert_called_once_with(
             base_url="http://localhost:8080", verify=False
         )
@@ -46,14 +50,16 @@ def test_init_with_missing_token_file(self, mocker: MockerFixture):
         mocker.patch("builtins.open", side_effect=FileNotFoundError)
 
         with pytest.raises(AgentAPIError, match="Token file not found"):
-            AgentHttpClient("http://localhost:8080", "missing.txt")
+            AgentHttpClient(
+                "http://localhost:8080", version="v1", token_file="missing.txt"
+            )
 
     def test_init_with_env_token(self, mocker: MockerFixture):
         """Test initializing client with environment token."""
         mock_client = mocker.patch("httpx.Client")
         mocker.patch("os.getenv", return_value="env-token-456")
 
-        AgentHttpClient("http://localhost:8080")
+        AgentHttpClient("http://localhost:8080", version="v1")
 
         mock_client.return_value.headers.update.assert_called_once_with(
             {"Authorization": "Bearer env-token-456"}
@@ -77,7 +83,7 @@ def test_query_agent_success(self, mocker: MockerFixture):
         mock_client.post.return_value = mock_response
 
         mocker.patch("httpx.Client", return_value=mock_client)
-        client = AgentHttpClient("http://localhost:8080")
+        client = AgentHttpClient("http://localhost:8080", version="v1")
 
         api_input = {
             "query": "How many namespaces are there?",
@@ -116,7 +122,7 @@ def test_query_agent_success_empty_tool_calls(self, mocker: MockerFixture):
         mock_client.post.return_value = mock_response
 
         mocker.patch("httpx.Client", return_value=mock_client)
-        client = AgentHttpClient("http://localhost:8080")
+        client = AgentHttpClient("http://localhost:8080", version="v1")
 
         api_input = {
             "query": "What is Openshift Virtualization?",
@@ -143,7 +149,7 @@ def test_query_agent_http_error(self, mocker: MockerFixture):
         )
 
         mocker.patch("httpx.Client", return_value=mock_client)
-        client = AgentHttpClient("http://localhost:8080")
+        client = AgentHttpClient("http://localhost:8080", version="v1")
 
         api_input = {"query": "Test query", "provider": "openai", "model": "gpt-4"}
         with pytest.raises(AgentAPIError, match="Agent API error: 500"):
@@ -156,7 +162,7 @@ def test_query_agent_timeout(self, mocker: MockerFixture):
         mock_client.post.side_effect = httpx.TimeoutException("Request timeout")
 
         mocker.patch("httpx.Client", return_value=mock_client)
-        client = AgentHttpClient("http://localhost:8080")
+        client = AgentHttpClient("http://localhost:8080", version="v1")
 
         api_input = {
             "query": "Test query",
@@ -178,7 +184,7 @@ def test_query_agent_missing_response_field(self, mocker: MockerFixture):
         mock_client.post.return_value = mock_response
 
         mocker.patch("httpx.Client", return_value=mock_client)
-        client = AgentHttpClient("http://localhost:8080")
+        client = AgentHttpClient("http://localhost:8080", version="v1")
 
         api_input = {"query": "Test query", "provider": "openai", "model": "gpt-4"}
         with pytest.raises(
@@ -190,14 +196,14 @@ def test_query_agent_client_not_initialized(self, mocker: MockerFixture):
         """Test agent query when client is not initialized."""
         mocker.patch("httpx.Client", side_effect=Exception("Setup failed"))
         with pytest.raises(AgentAPIError, match="Failed to setup HTTP client"):
-            AgentHttpClient("http://localhost:8080")
+            AgentHttpClient("http://localhost:8080", version="v1")
 
     def test_close_client_success(self, mocker: MockerFixture):
         """Test closing client successfully."""
         mock_client = mocker.Mock()
 
         mocker.patch("httpx.Client", return_value=mock_client)
-        client = AgentHttpClient("http://localhost:8080")
+        client = AgentHttpClient("http://localhost:8080", version="v1")
         client.close()
 
         mock_client.close.assert_called_once()
@@ -208,7 +214,7 @@ def test_client_setup_exception(self, mocker: MockerFixture):
         with pytest.raises(
             AgentAPIError, match="Failed to setup HTTP client: Setup failed"
         ):
-            AgentHttpClient("http://localhost:8080")
+            AgentHttpClient("http://localhost:8080", version="v1")
 
     # Streaming Query Tests
     def test_streaming_query_agent_success(self, mocker: MockerFixture):
@@ -237,7 +243,7 @@ def test_streaming_query_agent_success(self, mocker: MockerFixture):
         )
 
         mock_parser.return_value = expected_result
-        client = AgentHttpClient("http://localhost:8080")
+        client = AgentHttpClient("http://localhost:8080", version="v1")
 
         api_input = {
             "query": "What is OpenShift?",
@@ -278,7 +284,7 @@ def test_streaming_query_agent_parser_error(self, mocker: MockerFixture):
         # Mock the parser to raise the specific error
         mock_parser.side_effect = ValueError("No Conversation ID found")
 
-        client = AgentHttpClient("http://localhost:8080")
+        client = AgentHttpClient("http://localhost:8080", version="v1")
         api_input = {"query": "Test query", "provider": "openai", "model": "gpt-4"}
 
         with pytest.raises(
@@ -294,7 +300,7 @@ def test_streaming_query_agent_timeout(self, mocker: MockerFixture):
         mock_client.stream.side_effect = httpx.TimeoutException("Request timeout")
 
         mocker.patch("httpx.Client", return_value=mock_client)
-        client = AgentHttpClient("http://localhost:8080")
+        client = AgentHttpClient("http://localhost:8080", version="v1")
 
         api_input = {
             "query": "Test query",
@@ -326,7 +332,7 @@ def test_streaming_query_agent_http_error(self, mocker: MockerFixture):
         mock_client.stream.return_value = mock_stream_response
 
         mocker.patch("httpx.Client", return_value=mock_client)
-        client = AgentHttpClient("http://localhost:8080")
+        client = AgentHttpClient("http://localhost:8080", version="v1")
 
         api_input = {"query": "Test query", "provider": "openai", "model": "gpt-4"}
 

diff --git a/lsc_agent_eval/tests/test_agent_eval.py b/lsc_agent_eval/tests/test_agent_eval.py
@@ -28,6 +28,7 @@ def test_args_parser_minimal(self):
         assert parsed.agent_provider == "openai"
         assert parsed.agent_model == "gpt-4"
         assert parsed.agent_endpoint == "http://localhost:8080"  # default
+        assert parsed.agent_api_version == "v1"  # default
         assert parsed.result_dir == "eval_output/"  # default
         assert parsed.endpoint_type == "streaming"  # default
 
@@ -38,6 +39,8 @@ def test_args_parser_all_arguments(self):
             "test.yaml",
             "--agent_endpoint",
             "http://custom:9090",
+            "--agent_api_version",
+            "v2",
             "--agent_provider",
             "watsonx",
             "--agent_model",
@@ -60,6 +63,7 @@ def test_args_parser_all_arguments(self):
 
         assert parsed.eval_data_yaml == "test.yaml"
         assert parsed.agent_endpoint == "http://custom:9090"
+        assert parsed.agent_api_version == "v2"
         assert parsed.agent_provider == "watsonx"
         assert parsed.agent_model == "granite-3-8b-instruct"
         assert parsed.agent_auth_token_file == "token.txt"

diff --git a/src/generate_answers/eval_config.py b/src/generate_answers/eval_config.py
@@ -16,6 +16,7 @@ class EvalConfig(BaseModel):
     """Evaluation configuration."""
 
     lightspeed_url: str
+    lightspeed_api_version: str = "v1"
     models: list[EvalModel]
     models_to_evaluate: set[str]
 

diff --git a/src/generate_answers/eval_config.yaml b/src/generate_answers/eval_config.yaml
@@ -1,4 +1,5 @@
 lightspeed_url: "http://localhost:8080"
+lightspeed_api_version: "v1"
 models:
   - display_name: "granite-3-3-8b-instruct"
     provider: "watsonx"

diff --git a/src/generate_answers/generate_answers.py b/src/generate_answers/generate_answers.py
@@ -173,6 +173,7 @@ def main(  # pylint: disable=R0913,R0917,R0914
                 config.lightspeed_url,
                 provider=model.provider,
                 model=model.model,
+                version=config.lightspeed_api_version,
                 cache_dir=llm_cache_dir,
             ),
         )

diff --git a/src/generate_answers/ls_response.py b/src/generate_answers/ls_response.py
@@ -15,15 +15,17 @@
 class LSClient:  # pylint: disable=too-few-public-methods
     """LightSpeed client."""
 
-    def __init__(
+    def __init__(  # pylint: disable=R0913,R0917
         self,
         ls_url: str,
         provider: str,
         model: str,
+        version: str = "v1",
         cache_dir: str = ".caches/llm_cache",
     ):
         """Init LightSpeed."""
         self.url = ls_url
+        self.version = version
         self.provider = provider
         self.model = model
         self.client = Client(base_url=ls_url, verify=False)
@@ -65,7 +67,7 @@ def get_answer(self, query: str, skip_cache: bool = False) -> str:
 
         logging.info("Calling LightSpeed service for query '%s'", query)
         response = self.client.post(
-            "/v1/query",
+            f"/{self.version}/query",
             json={
                 "query": query,
                 "provider": self.provider,

diff --git a/src/lightspeed_evaluation/core/api/client.py b/src/lightspeed_evaluation/core/api/client.py
@@ -29,6 +29,7 @@ def __init__(
         """Initialize the client with configuration."""
         self.config = config
         self.api_base = config.api_base
+        self.version = config.version
         self.endpoint_type = config.endpoint_type
         self.timeout = config.timeout
 
@@ -127,7 +128,7 @@ def _standard_query(self, api_request: APIRequest) -> APIResponse:
             raise APIError("HTTP client not initialized")
         try:
             response = self.client.post(
-                "/v1/query",
+                f"/{self.version}/query",
                 json=api_request.model_dump(exclude_none=True),
             )
             response.raise_for_status()
@@ -177,7 +178,7 @@ def _streaming_query(self, api_request: APIRequest) -> APIResponse:
         try:
             with self.client.stream(
                 "POST",
-                "/v1/streaming_query",
+                f"/{self.version}/streaming_query",
                 json=api_request.model_dump(exclude_none=True),
             ) as response:
                 self._handle_response_errors(response)

diff --git a/src/lightspeed_evaluation/core/constants.py b/src/lightspeed_evaluation/core/constants.py
@@ -1,6 +1,7 @@
 """Common constants for evaluation framework."""
 
 DEFAULT_API_BASE = "http://localhost:8080"
+DEFAULT_API_VERSION = "v1"
 DEFAULT_API_TIMEOUT = 300
 DEFAULT_ENDPOINT_TYPE = "streaming"
 SUPPORTED_ENDPOINT_TYPES = ["streaming", "query"]