Skip to content

Commit 5d1f444

Browse files
committed
fix: Make TaskEvaluation.quality optional with default value
- Add default value of 5.0 to quality field in TaskEvaluation model - Improve evaluation prompt to emphasize quality field requirement - Add tests to verify default behavior and backward compatibility Fixes validation errors when LLM output omits quality field during streaming, preventing memory save failures in long-term storage. Fixes #3915
1 parent d7bdac1 commit 5d1f444

File tree

2 files changed

+64
-3
lines changed

2 files changed

+64
-3
lines changed

lib/crewai/src/crewai/utilities/evaluators/task_evaluator.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ class TaskEvaluation(BaseModel):
2929
description="Suggestions to improve future similar tasks."
3030
)
3131
quality: float = Field(
32-
description="A score from 0 to 10 evaluating on completion, quality, and overall performance, all taking into account the task description, expected output, and the result of the task."
32+
default=5.0,
33+
description="A score from 0 to 10 evaluating on completion, quality, and overall performance, all taking into account the task description, expected output, and the result of the task. Defaults to 5.0 if not provided."
3334
)
3435
entities: list[Entity] = Field(
3536
description="Entities extracted from the task output."
@@ -86,9 +87,9 @@ def evaluate(self, task: Task, output: str) -> TaskEvaluation:
8687
f"Task Description:\n{task.description}\n\n"
8788
f"Expected Output:\n{task.expected_output}\n\n"
8889
f"Actual Output:\n{output}\n\n"
89-
"Please provide:\n"
90+
"Please provide ALL of the following (all fields are required):\n"
9091
"- Bullet points suggestions to improve future similar tasks\n"
91-
"- A score from 0 to 10 evaluating on completion, quality, and overall performance"
92+
"- A quality score from 0 to 10 evaluating on completion, quality, and overall performance (REQUIRED - must be a numeric value)\n"
9293
"- Entities extracted from the task output, if any, their type, description, and relationships"
9394
)
9495

lib/crewai/tests/utilities/evaluators/test_task_evaluator.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
from crewai.utilities.converter import ConverterError
55
from crewai.utilities.evaluators.task_evaluator import (
6+
Entity,
7+
TaskEvaluation,
68
TaskEvaluator,
79
TrainingTaskEvaluation,
810
)
@@ -103,3 +105,61 @@ def test_training_converter_fallback_mechanism(
103105
assert result == expected_result
104106
to_pydantic_mock.assert_called_once()
105107
convert_field_by_field_mock.assert_called_once()
108+
109+
110+
def test_task_evaluation_with_missing_quality_field():
111+
"""Test that TaskEvaluation defaults quality to 5.0 when not provided."""
112+
# Simulate LLM output without quality field
113+
evaluation_data = {
114+
"suggestions": ["Test suggestion"],
115+
"entities": [],
116+
}
117+
118+
# Should not raise validation error and should default quality to 5.0
119+
evaluation = TaskEvaluation(**evaluation_data)
120+
121+
assert evaluation.quality == 5.0
122+
assert evaluation.suggestions == ["Test suggestion"]
123+
assert evaluation.entities == []
124+
125+
126+
def test_task_evaluation_with_provided_quality_field():
127+
"""Test that TaskEvaluation works correctly when quality is provided."""
128+
# Simulate LLM output with quality field
129+
evaluation_data = {
130+
"suggestions": ["Test suggestion"],
131+
"quality": 8.5,
132+
"entities": [
133+
{
134+
"name": "Test Entity",
135+
"type": "Person",
136+
"description": "A test entity",
137+
"relationships": ["related_to_entity"],
138+
}
139+
],
140+
}
141+
142+
evaluation = TaskEvaluation(**evaluation_data)
143+
144+
assert evaluation.quality == 8.5
145+
assert evaluation.suggestions == ["Test suggestion"]
146+
assert len(evaluation.entities) == 1
147+
assert evaluation.entities[0].name == "Test Entity"
148+
149+
150+
def test_task_evaluation_validation_with_partial_json():
151+
"""Test that TaskEvaluation can be created from partial JSON missing quality."""
152+
import json
153+
154+
# Simulate partial JSON response from LLM (missing quality)
155+
partial_json = json.dumps({
156+
"suggestions": ["Suggestion 1", "Suggestion 2"],
157+
"entities": [],
158+
})
159+
160+
# Should parse successfully with default quality
161+
evaluation = TaskEvaluation.model_validate_json(partial_json)
162+
163+
assert evaluation.quality == 5.0
164+
assert len(evaluation.suggestions) == 2
165+
assert evaluation.entities == []

0 commit comments

Comments
 (0)