fix(llmobs): fix assessment argument description for custom evals (#15032)

Yun-Kim · Yun-Kim · commit 173bb1886f8e · 2025-10-24T15:07:31.000-04:00
Corrects the docstring description of the `assessment` argument in `submit_evaluation()`. There is no functional change but the docstring description was incorrect.     (cherry picked from commit 314c76e)
diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
@@ -1630,7 +1630,7 @@ def submit_evaluation(
                                     If not set, the current time will be used.
         :param dict metadata: A JSON serializable dictionary of key-value metadata pairs relevant to the
                                 evaluation metric.
-        :param str assessment: An assessment of the validity of this evaluation. Must be either "pass" or "fail".
+        :param str assessment: An assessment of this evaluation. Must be either "pass" or "fail".
         :param str reasoning: An explanation of the evaluation result.
         """
         if span_context is not None:
diff --git a/releasenotes/notes/fix-llmobs-evals-assessment-98f964a9f249913e.yaml b/releasenotes/notes/fix-llmobs-evals-assessment-98f964a9f249913e.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+  - |
+    LLM Observability: Corrected the description of the ``assessment`` argument in ``submit_evaluation()``.
+     ``assessment`` now refers to whether the evaluation itself passes or fails according to your application, rather than the validity of the evaluation result.
diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py
@@ -1879,7 +1879,7 @@ def test_submit_evaluation_enqueues_writer_with_assessment(llmobs, mock_llmobs_e
         tags={"foo": "bar", "bee": "baz", "ml_app": "ml_app_override"},
         ml_app="ml_app_override",
         metadata={"foo": ["bar", "baz"]},
-        assessment="pass",
+        assessment="fail",
     )
     mock_llmobs_eval_metric_writer.enqueue.assert_called_with(
         _expected_llmobs_eval_metric_event(
@@ -1891,7 +1891,7 @@ def test_submit_evaluation_enqueues_writer_with_assessment(llmobs, mock_llmobs_e
             categorical_value="high",
             tags=["ddtrace.version:{}".format(ddtrace.__version__), "ml_app:ml_app_override", "foo:bar", "bee:baz"],
             metadata={"foo": ["bar", "baz"]},
-            assessment="pass",
+            assessment="fail",
         )
     )
     mock_llmobs_eval_metric_writer.reset()

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +fixes:
 +  - |
 +    LLM Observability: Corrected the description of the ``assessment`` argument in ``submit_evaluation()``.
 +     ``assessment`` now refers to whether the evaluation itself passes or fails according to your application, rather than the validity of the evaluation result.
Original file line number	Diff line number	Diff line change
`@@ -1879,7 +1879,7 @@ def test_submit_evaluation_enqueues_writer_with_assessment(llmobs, mock_llmobs_e`
`1879`	`1879`	`tags={"foo": "bar", "bee": "baz", "ml_app": "ml_app_override"},`
`1880`	`1880`	`ml_app="ml_app_override",`
`1881`	`1881`	`metadata={"foo": ["bar", "baz"]},`
`1882`		`- assessment="pass",`
	`1882`	`+ assessment="fail",`
`1883`	`1883`	`)`
`1884`	`1884`	`mock_llmobs_eval_metric_writer.enqueue.assert_called_with(`
`1885`	`1885`	`_expected_llmobs_eval_metric_event(`
`@@ -1891,7 +1891,7 @@ def test_submit_evaluation_enqueues_writer_with_assessment(llmobs, mock_llmobs_e`
`1891`	`1891`	`categorical_value="high",`
`1892`	`1892`	`tags=["ddtrace.version:{}".format(ddtrace.__version__), "ml_app:ml_app_override", "foo:bar", "bee:baz"],`
`1893`	`1893`	`metadata={"foo": ["bar", "baz"]},`
`1894`		`- assessment="pass",`
	`1894`	`+ assessment="fail",`
`1895`	`1895`	`)`
`1896`	`1896`	`)`
`1897`	`1897`	`mock_llmobs_eval_metric_writer.reset()`