Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](https://semver.org/)

## [1.4.1] 2025-05-02

### Added

- evaluate command
- tests for the evaluate command with a generic response file

### Changed

- evaluate command
- fixed bug with reponse file variable

## [1.4.0] 2025-05-01

### Added
Expand Down
12 changes: 12 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,15 @@ def is_json_file(file: str) -> bool:
except json.JSONDecodeError:
return False
return True


class ResponsesFiles:
"""Fixture response data for test"""

responses = FIXTURE_DIR / "responses.json"


@pytest.fixture
def responses_files() -> ResponsesFiles:
"""Provide FixtureData"""
return ResponsesFiles()
26 changes: 26 additions & 0 deletions tests/fixtures/responses.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[
{
"dataset": "https://text2sparql.aksw.org/2025/corporate/",
"question": "In which department is Ms. Müller?",
"query": "select distinct ?Concept where {[] a ?Concept} LIMIT 100",
"endpoint": "http://127.0.0.1:8000",
"qname": "cd25:1-en",
"uri": "https://text2sparql.aksw.org/2025/corporate/1-en"
},
{
"dataset": "https://text2sparql.aksw.org/2025/corporate/",
"question": "What is the meaning of life?",
"query": "select distinct ?Concept where {[] a ?Concept} LIMIT 100",
"endpoint": "http://127.0.0.1:8000",
"qname": "cd25:2-en",
"uri": "https://text2sparql.aksw.org/2025/corporate/2-en"
},
{
"dataset": "https://text2sparql.aksw.org/2025/corporate/",
"question": "How many inhabitants does Leipzig have?",
"query": "select distinct ?Concept where {[] a ?Concept} LIMIT 100",
"endpoint": "http://127.0.0.1:8000",
"qname": "cd25:3-en",
"uri": "https://text2sparql.aksw.org/2025/corporate/3-en"
}
]
12 changes: 11 additions & 1 deletion tests/fixtures/with-ids.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,22 @@ questions:
question:
en: In which department is Ms. Müller?
de: In welcher Abteilung ist Frau Müller?
query:
sparql: |
select distinct ?Concept where {[] a ?Concept} LIMIT 100

- id: 2
question:
en: What is the meaning of life?
de: Was ist der Sinn des Lebens?
query:
sparql: |
select distinct ?Concept where {[] a ?Concept} LIMIT 100

- id: 3
question:
en: How many inhabitants does Leipzig have?
de: Wieviele Einwohner hat Leipzig?

query:
sparql: |
select distinct ?Concept where {[] a ?Concept} LIMIT 100
60 changes: 60 additions & 0 deletions tests/test_evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""Test evaluate"""

from tests import run, run_asserting_error
from tests.conftest import QuestionsFiles, ResponsesFiles


def test_successful_evaluation(
questions_files: QuestionsFiles, responses_files: ResponsesFiles
) -> None:
"""Test successful evaluation."""
run(
command=(
"evaluate",
"api_name",
str(questions_files.with_ids),
str(responses_files.responses),
)
)


def test_non_successful_evaluation(
questions_files: QuestionsFiles, responses_files: ResponsesFiles
) -> None:
"""Test non-successful evaluation."""
run_asserting_error(
command=("evaluate", "api_name", str(questions_files.non_unique_ids)),
match="Missing argument",
)
run_asserting_error(
command=("evaluate", str(questions_files.partial_ids), str(responses_files.responses)),
match="Missing argument",
)


def test_output_evaluation(
questions_files: QuestionsFiles, responses_files: ResponsesFiles
) -> None:
"""Test evaluation with output file."""
output = "evaluation.json"
run(
command=(
"evaluate",
"api_name",
str(questions_files.with_ids),
str(responses_files.responses),
"-o",
output,
)
)
run_asserting_error(
command=(
"evaluate",
"api_name",
str(questions_files.with_ids),
str(responses_files.responses),
"-o",
output,
),
match="already exists.",
)
10 changes: 4 additions & 6 deletions text2sparql_client/commands/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def check_output_file(file: str) -> None:
@click.option(
"--output",
"-o",
type=click.Path(file_okay=False, allow_dash=True),
type=click.Path(allow_dash=True, dir_okay=False),
default="-",
show_default=True,
help="Which file to save the results.",
Expand All @@ -76,7 +76,7 @@ def check_output_file(file: str) -> None:
def evaluate_command( # noqa: PLR0913
api_name: str,
questions_file: TextIOWrapper,
response_file: TextIOWrapper,
responses_file: TextIOWrapper,
endpoint: str,
output: str,
languages: list,
Expand All @@ -87,7 +87,7 @@ def evaluate_command( # noqa: PLR0913
This command will create a JSON file with the metric values using the pytrec_eval library.
"""
test_dataset = yaml.safe_load(questions_file)
json_file = json.load(response_file)
json_file = json.load(responses_file)

dataset_prefix = test_dataset["dataset"]["prefix"]

Expand Down Expand Up @@ -118,9 +118,7 @@ def evaluate_command( # noqa: PLR0913
evaluation = Evaluation(api_name)
results = evaluation.evaluate(predicted, ground_truth)

logger.info(f"\n-------\nResults: {results}\n-------\n")

check_output_file(file=output)
logger.info(f"Writing {len(results)} responses to {output if output != '-' else 'stdout'}.")
logger.info(f"Writing {len(results)} results to {output if output != '-' else 'stdout'}.")
with click.open_file(filename=output, mode="w", encoding="UTF-8") as file:
json.dump(results, file, indent=2)