Skip to content

Commit b05e5b3

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI SDK client(evals) - Add predefined metrics for Gecko text-to-image and text-to-video evaluations
PiperOrigin-RevId: 822235488
1 parent 748286b commit b05e5b3

File tree

3 files changed

+131
-0
lines changed

3 files changed

+131
-0
lines changed

tests/unit/vertexai/genai/replays/test_evaluate_predefined_metrics.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,127 @@ def test_evaluation_grounding_metric(client):
248248
assert case_result.response_candidate_results is not None
249249

250250

251+
def test_evaluation_gecko_text2image_metric(client):
252+
"""Tests that Gecko text2image metric produces a correctly structured EvaluationResult."""
253+
prompts_df = pd.DataFrame(
254+
{
255+
"prompt": ["sunset over a calm ocean"],
256+
"response": [
257+
{
258+
"parts": [
259+
{
260+
"file_data": {
261+
"mime_type": "image/png",
262+
"file_uri": (
263+
"gs://cloud-samples-data/generative-ai/evaluation/"
264+
"images/sunset.png"
265+
),
266+
}
267+
}
268+
],
269+
"role": "model",
270+
},
271+
],
272+
}
273+
)
274+
275+
data_with_rubrics = client.evals.generate_rubrics(
276+
src=prompts_df,
277+
rubric_group_name="gecko_image_rubrics",
278+
predefined_spec_name=types.RubricMetric.GECKO_TEXT2IMAGE,
279+
)
280+
281+
assert isinstance(data_with_rubrics, types.EvaluationDataset)
282+
assert data_with_rubrics.eval_dataset_df is not None
283+
assert len(data_with_rubrics.eval_dataset_df) == 1
284+
for _, case in data_with_rubrics.eval_dataset_df.iterrows():
285+
assert case.rubric_groups is not None
286+
assert "gecko_image_rubrics" in case.rubric_groups
287+
288+
evaluation_result = client.evals.evaluate(
289+
dataset=data_with_rubrics,
290+
metrics=[
291+
types.RubricMetric.GECKO_TEXT2IMAGE,
292+
],
293+
)
294+
295+
assert isinstance(evaluation_result, types.EvaluationResult)
296+
297+
assert evaluation_result.summary_metrics is not None
298+
for summary in evaluation_result.summary_metrics:
299+
assert isinstance(summary, types.AggregatedMetricResult)
300+
assert summary.metric_name is not None
301+
assert summary.mean_score is not None
302+
303+
assert evaluation_result.eval_case_results is not None
304+
for case_result in evaluation_result.eval_case_results:
305+
assert isinstance(case_result, types.EvalCaseResult)
306+
assert case_result.eval_case_index is not None
307+
assert case_result.response_candidate_results is not None
308+
309+
310+
def test_evaluation_gecko_text2video_metric(client):
311+
"""Tests that Gecko text2video metric produces a correctly structured EvaluationResult."""
312+
prompts_df = pd.DataFrame(
313+
{
314+
"prompt": [
315+
"A boat sailing leisurely along the Seine River with the Eiffel Tower "
316+
"in background"
317+
],
318+
"response": [
319+
{
320+
"parts": [
321+
{
322+
"file_data": {
323+
"mime_type": "video/mp4",
324+
"file_uri": (
325+
"gs://cloud-samples-data/generative-ai/evaluation/"
326+
"videos/boat.mp4"
327+
),
328+
}
329+
}
330+
],
331+
"role": "model",
332+
},
333+
],
334+
}
335+
)
336+
337+
data_with_rubrics = client.evals.generate_rubrics(
338+
src=prompts_df,
339+
rubric_group_name="gecko_video_rubrics",
340+
predefined_spec_name=types.RubricMetric.GECKO_TEXT2VIDEO,
341+
)
342+
343+
assert isinstance(data_with_rubrics, types.EvaluationDataset)
344+
assert data_with_rubrics.eval_dataset_df is not None
345+
assert len(data_with_rubrics.eval_dataset_df) == 1
346+
for _, case in data_with_rubrics.eval_dataset_df.iterrows():
347+
assert case.rubric_groups is not None
348+
assert "gecko_video_rubrics" in case.rubric_groups
349+
350+
evaluation_result = client.evals.evaluate(
351+
dataset=data_with_rubrics,
352+
metrics=[
353+
types.RubricMetric.GECKO_TEXT2VIDEO,
354+
],
355+
)
356+
357+
assert isinstance(evaluation_result, types.EvaluationResult)
358+
359+
assert evaluation_result.summary_metrics is not None
360+
for summary in evaluation_result.summary_metrics:
361+
assert isinstance(summary, types.AggregatedMetricResult)
362+
assert summary.metric_name is not None
363+
assert summary.mean_score is not None
364+
365+
assert evaluation_result.eval_case_results is not None
366+
for case_result in evaluation_result.eval_case_results:
367+
assert isinstance(case_result, types.EvalCaseResult)
368+
assert case_result.eval_case_index is not None
369+
assert case_result.response_candidate_results is not None
370+
371+
251372
pytestmark = pytest_helper.setup(
252373
file=__file__,
253374
globals_for_file=globals(),

vertexai/_genai/_evals_constant.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
"final_response_quality_v1",
2929
"hallucination_v1",
3030
"tool_use_quality_v1",
31+
"gecko_text2image_v1",
32+
"gecko_text2video_v1",
3133
}
3234
)
3335

vertexai/_genai/_evals_utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,14 @@ def HALLUCINATION(self) -> LazyLoadedPrebuiltMetric:
603603
def TOOL_USE_QUALITY(self) -> LazyLoadedPrebuiltMetric:
604604
return self.__getattr__("TOOL_USE_QUALITY")
605605

606+
@property
607+
def GECKO_TEXT2IMAGE(self) -> LazyLoadedPrebuiltMetric:
608+
return self.__getattr__("GECKO_TEXT2IMAGE")
609+
610+
@property
611+
def GECKO_TEXT2VIDEO(self) -> LazyLoadedPrebuiltMetric:
612+
return self.__getattr__("GECKO_TEXT2VIDEO")
613+
606614

607615
PrebuiltMetric = PrebuiltMetricLoader()
608616
RubricMetric = PrebuiltMetric

0 commit comments

Comments
 (0)