Skip to content

Commit 3ac82ea

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
fix: GenAI Client(evals) - Support multiple metrics in Detailed View show method for EvaluationRun for Vertex AI GenAI SDK evals
PiperOrigin-RevId: 822139220
1 parent 7a1262b commit 3ac82ea

File tree

2 files changed

+18
-21
lines changed

2 files changed

+18
-21
lines changed

tests/unit/vertexai/genai/replays/test_get_evaluation_run.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ def check_run_1957799200510967808_evaluation_item_results(
256256
assert isinstance(eval_case_result, types.EvalCaseResult)
257257
# Check the response candidate results.
258258
response_candidate_result = eval_case_result.response_candidate_results[0]
259-
assert isinstance(response_candidate_result, types.ResponseCandidateResult)
259+
assert response_candidate_result.response_index == 0
260260
universal_metric_result = response_candidate_result.metric_results["universal"]
261261
assert isinstance(universal_metric_result, types.EvalCaseMetricResult)
262262
assert universal_metric_result.metric_name == "universal"

vertexai/_genai/_evals_common.py

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,28 +1338,24 @@ def _get_eval_case_result_from_eval_item(
13381338
eval_item: types.EvaluationItem,
13391339
) -> types.EvalCaseResult:
13401340
"""Transforms EvaluationItem to EvalCaseResult."""
1341-
response_candidate_results = []
1342-
for candidate_index, candidate_result in enumerate(
1343-
eval_item.evaluation_response.candidate_results
1344-
):
1345-
response_candidate_results.append(
1346-
types.ResponseCandidateResult(
1347-
response_index=candidate_index,
1348-
metric_results={
1349-
candidate_result.metric: types.EvalCaseMetricResult(
1350-
metric_name=candidate_result.metric,
1351-
score=candidate_result.score,
1352-
explanation=candidate_result.explanation,
1353-
rubric_verdicts=candidate_result.rubric_verdicts,
1354-
error_message=(
1355-
eval_item.error.message if eval_item.error else None
1356-
),
1357-
),
1358-
},
1341+
metric_results = {}
1342+
if eval_item.evaluation_response.candidate_results:
1343+
for candidate_result in eval_item.evaluation_response.candidate_results:
1344+
metric_results[candidate_result.metric] = types.EvalCaseMetricResult(
1345+
metric_name=candidate_result.metric,
1346+
score=candidate_result.score,
1347+
explanation=candidate_result.explanation,
1348+
rubric_verdicts=candidate_result.rubric_verdicts,
1349+
error_message=(eval_item.error.message if eval_item.error else None),
13591350
)
1360-
)
13611351
return types.EvalCaseResult(
1362-
eval_case_index=index, response_candidate_results=response_candidate_results
1352+
eval_case_index=index,
1353+
response_candidate_results=[
1354+
types.ResponseCandidateResult(
1355+
response_index=0,
1356+
metric_results=metric_results,
1357+
)
1358+
],
13631359
)
13641360

13651361

@@ -1421,6 +1417,7 @@ def _get_eval_cases_eval_dfs_from_eval_items(
14211417
eval_item
14221418
and eval_item.evaluation_response
14231419
and eval_item.evaluation_response.request
1420+
and eval_item.evaluation_response.candidate_results
14241421
):
14251422
eval_case_results.append(
14261423
_get_eval_case_result_from_eval_item(index, eval_item)

0 commit comments

Comments
 (0)