AKSW · seebi · May 2, 2025 · May 1, 2025 · May 2, 2025 · May 2, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](https://semver.org/)
 
+## [1.4.1] 2025-05-02
+
+### Added
+
+- evaluate command
+  - tests for the evaluate command with a generic response file
+
+### Changed
+
+- evaluate command
+  - fixed bug with reponse file variable
+
 ## [1.4.0] 2025-05-01
 
 ### Added

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -94,3 +94,15 @@ def is_json_file(file: str) -> bool:
     except json.JSONDecodeError:
         return False
     return True
+
+
+class ResponsesFiles:
+    """Fixture response data for test"""
+
+    responses = FIXTURE_DIR / "responses.json"
+
+
+@pytest.fixture
+def responses_files() -> ResponsesFiles:
+    """Provide FixtureData"""
+    return ResponsesFiles()
diff --git a/tests/fixtures/responses.json b/tests/fixtures/responses.json
@@ -0,0 +1,26 @@
+[
+    {
+      "dataset": "https://text2sparql.aksw.org/2025/corporate/",
+      "question": "In which department is Ms. Müller?",
+      "query": "select distinct ?Concept where {[] a ?Concept} LIMIT 100",
+      "endpoint": "http://127.0.0.1:8000",
+      "qname": "cd25:1-en",
+      "uri": "https://text2sparql.aksw.org/2025/corporate/1-en"
+    },
+    {
+        "dataset": "https://text2sparql.aksw.org/2025/corporate/",
+        "question": "What is the meaning of life?",
+        "query": "select distinct ?Concept where {[] a ?Concept} LIMIT 100",
+        "endpoint": "http://127.0.0.1:8000",
+        "qname": "cd25:2-en",
+        "uri": "https://text2sparql.aksw.org/2025/corporate/2-en"
+    },
+    {
+        "dataset": "https://text2sparql.aksw.org/2025/corporate/",
+        "question": "How many inhabitants does Leipzig have?",
+        "query": "select distinct ?Concept where {[] a ?Concept} LIMIT 100",
+        "endpoint": "http://127.0.0.1:8000",
+        "qname": "cd25:3-en",
+        "uri": "https://text2sparql.aksw.org/2025/corporate/3-en"
+    }
+]
diff --git a/tests/fixtures/with-ids.yml b/tests/fixtures/with-ids.yml
@@ -10,12 +10,22 @@ questions:
     question:
       en: In which department is Ms. Müller?
       de: In welcher Abteilung ist Frau Müller?
+    query:
+      sparql: |
+        select distinct ?Concept where {[] a ?Concept} LIMIT 100
 
   - id: 2
     question:
+      en: What is the meaning of life?
       de: Was ist der Sinn des Lebens?
+    query:
+      sparql: |
+        select distinct ?Concept where {[] a ?Concept} LIMIT 100
 
   - id: 3
     question:
+      en: How many inhabitants does Leipzig have?
       de: Wieviele Einwohner hat Leipzig?
-
+    query:
+      sparql: |
+        select distinct ?Concept where {[] a ?Concept} LIMIT 100
diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
@@ -0,0 +1,60 @@
+"""Test evaluate"""
+
+from tests import run, run_asserting_error
+from tests.conftest import QuestionsFiles, ResponsesFiles
+
+
+def test_successful_evaluation(
+    questions_files: QuestionsFiles, responses_files: ResponsesFiles
+) -> None:
+    """Test successful evaluation."""
+    run(
+        command=(
+            "evaluate",
+            "api_name",
+            str(questions_files.with_ids),
+            str(responses_files.responses),
+        )
+    )
+
+
+def test_non_successful_evaluation(
+    questions_files: QuestionsFiles, responses_files: ResponsesFiles
+) -> None:
+    """Test non-successful evaluation."""
+    run_asserting_error(
+        command=("evaluate", "api_name", str(questions_files.non_unique_ids)),
+        match="Missing argument",
+    )
+    run_asserting_error(
+        command=("evaluate", str(questions_files.partial_ids), str(responses_files.responses)),
+        match="Missing argument",
+    )
+
+
+def test_output_evaluation(
+    questions_files: QuestionsFiles, responses_files: ResponsesFiles
+) -> None:
+    """Test evaluation with output file."""
+    output = "evaluation.json"
+    run(
+        command=(
+            "evaluate",
+            "api_name",
+            str(questions_files.with_ids),
+            str(responses_files.responses),
+            "-o",
+            output,
+        )
+    )
+    run_asserting_error(
+        command=(
+            "evaluate",
+            "api_name",
+            str(questions_files.with_ids),
+            str(responses_files.responses),
+            "-o",
+            output,
+        ),
+        match="already exists.",
+    )
diff --git a/text2sparql_client/commands/evaluate.py b/text2sparql_client/commands/evaluate.py
@@ -60,7 +60,7 @@ def check_output_file(file: str) -> None:
 @click.option(
     "--output",
     "-o",
-    type=click.Path(file_okay=False, allow_dash=True),
+    type=click.Path(allow_dash=True, dir_okay=False),
     default="-",
     show_default=True,
     help="Which file to save the results.",
@@ -76,7 +76,7 @@ def check_output_file(file: str) -> None:
 def evaluate_command(  # noqa: PLR0913
     api_name: str,
     questions_file: TextIOWrapper,
-    response_file: TextIOWrapper,
+    responses_file: TextIOWrapper,
     endpoint: str,
     output: str,
     languages: list,
@@ -87,7 +87,7 @@ def evaluate_command(  # noqa: PLR0913
     This command will create a JSON file with the metric values using the pytrec_eval library.
     """
     test_dataset = yaml.safe_load(questions_file)
-    json_file = json.load(response_file)
+    json_file = json.load(responses_file)
 
     dataset_prefix = test_dataset["dataset"]["prefix"]
 
@@ -118,9 +118,7 @@ def evaluate_command(  # noqa: PLR0913
     evaluation = Evaluation(api_name)
     results = evaluation.evaluate(predicted, ground_truth)
 
-    logger.info(f"\n-------\nResults: {results}\n-------\n")
-
     check_output_file(file=output)
-    logger.info(f"Writing {len(results)} responses to {output if output != '-' else 'stdout'}.")
+    logger.info(f"Writing {len(results)} results to {output if output != '-' else 'stdout'}.")
     with click.open_file(filename=output, mode="w", encoding="UTF-8") as file:
         json.dump(results, file, indent=2)