Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions lib/crewai/src/crewai/utilities/evaluators/task_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ class TaskEvaluation(BaseModel):
description="Suggestions to improve future similar tasks."
)
quality: float = Field(
description="A score from 0 to 10 evaluating on completion, quality, and overall performance, all taking into account the task description, expected output, and the result of the task."
default=5.0,
description="A score from 0 to 10 evaluating on completion, quality, and overall performance, all taking into account the task description, expected output, and the result of the task. Defaults to 5.0 if not provided."
)
entities: list[Entity] = Field(
description="Entities extracted from the task output."
Expand Down Expand Up @@ -86,9 +87,9 @@ def evaluate(self, task: Task, output: str) -> TaskEvaluation:
f"Task Description:\n{task.description}\n\n"
f"Expected Output:\n{task.expected_output}\n\n"
f"Actual Output:\n{output}\n\n"
"Please provide:\n"
"Please provide ALL of the following (all fields are required):\n"
"- Bullet points suggestions to improve future similar tasks\n"
"- A score from 0 to 10 evaluating on completion, quality, and overall performance"
"- A quality score from 0 to 10 evaluating on completion, quality, and overall performance (REQUIRED - must be a numeric value)\n"
"- Entities extracted from the task output, if any, their type, description, and relationships"
)

Expand Down
60 changes: 60 additions & 0 deletions lib/crewai/tests/utilities/evaluators/test_task_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

from crewai.utilities.converter import ConverterError
from crewai.utilities.evaluators.task_evaluator import (
Entity,
TaskEvaluation,
TaskEvaluator,
TrainingTaskEvaluation,
)
Expand Down Expand Up @@ -103,3 +105,61 @@ def test_training_converter_fallback_mechanism(
assert result == expected_result
to_pydantic_mock.assert_called_once()
convert_field_by_field_mock.assert_called_once()


def test_task_evaluation_with_missing_quality_field():
"""Test that TaskEvaluation defaults quality to 5.0 when not provided."""
# Simulate LLM output without quality field
evaluation_data = {
"suggestions": ["Test suggestion"],
"entities": [],
}

# Should not raise validation error and should default quality to 5.0
evaluation = TaskEvaluation(**evaluation_data)

assert evaluation.quality == 5.0
assert evaluation.suggestions == ["Test suggestion"]
assert evaluation.entities == []


def test_task_evaluation_with_provided_quality_field():
"""Test that TaskEvaluation works correctly when quality is provided."""
# Simulate LLM output with quality field
evaluation_data = {
"suggestions": ["Test suggestion"],
"quality": 8.5,
"entities": [
{
"name": "Test Entity",
"type": "Person",
"description": "A test entity",
"relationships": ["related_to_entity"],
}
],
}

evaluation = TaskEvaluation(**evaluation_data)

assert evaluation.quality == 8.5
assert evaluation.suggestions == ["Test suggestion"]
assert len(evaluation.entities) == 1
assert evaluation.entities[0].name == "Test Entity"


def test_task_evaluation_validation_with_partial_json():
"""Test that TaskEvaluation can be created from partial JSON missing quality."""
import json

# Simulate partial JSON response from LLM (missing quality)
partial_json = json.dumps({
"suggestions": ["Suggestion 1", "Suggestion 2"],
"entities": [],
})

# Should parse successfully with default quality
evaluation = TaskEvaluation.model_validate_json(partial_json)

assert evaluation.quality == 5.0
assert len(evaluation.suggestions) == 2
assert evaluation.entities == []