Skip to content

Commit 83553a9

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals) - Add create_evaluation_set and create_evaluation_item methods to Vertex AI GenAI SDK evals
PiperOrigin-RevId: 821656026
1 parent a26171c commit 83553a9

File tree

4 files changed

+2921
-2275
lines changed

4 files changed

+2921
-2275
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
# pylint: disable=protected-access,bad-continuation,missing-function-docstring
16+
17+
from tests.unit.vertexai.genai.replays import pytest_helper
18+
from vertexai import types
19+
import pytest
20+
21+
GCS_URI = (
22+
"gs://lakeyk-limited-bucket/agora_eval_080525/request_4813679498589372416.json"
23+
)
24+
DISPLAY_NAME = "test_eval_item"
25+
26+
27+
def test_create_eval_item(client):
28+
"""Tests that create_evaluation_item() returns a correctly structured EvaluationItem."""
29+
evaluation_item = client.evals.create_evaluation_item(
30+
evaluation_item_type=types.EvaluationItemType.REQUEST,
31+
gcs_uri=GCS_URI,
32+
display_name=DISPLAY_NAME,
33+
)
34+
# Retrieve the evaluation item to check that it was created correctly.
35+
retrieved_evaluation_item = client.evals.get_evaluation_item(
36+
name=evaluation_item.name
37+
)
38+
check_evaluation_item(
39+
evaluation_item,
40+
retrieved_evaluation_item,
41+
)
42+
43+
44+
pytest_plugins = ("pytest_asyncio",)
45+
46+
47+
@pytest.mark.asyncio
48+
async def test_create_eval_item_async(client):
49+
"""Tests that create_evaluation_item() returns a correctly structured EvaluationItem."""
50+
evaluation_item = await client.aio.evals.create_evaluation_item(
51+
evaluation_item_type=types.EvaluationItemType.REQUEST,
52+
gcs_uri=GCS_URI,
53+
display_name=DISPLAY_NAME,
54+
)
55+
# Retrieve the evaluation item to check that it was created correctly.
56+
retrieved_evaluation_item = await client.aio.evals.get_evaluation_item(
57+
name=evaluation_item.name
58+
)
59+
check_evaluation_item(
60+
evaluation_item,
61+
retrieved_evaluation_item,
62+
)
63+
64+
65+
def check_evaluation_item(
66+
evaluation_item: types.EvaluationItem,
67+
retrieved_evaluation_item: types.EvaluationItem,
68+
):
69+
assert isinstance(evaluation_item, types.EvaluationItem)
70+
assert evaluation_item.gcs_uri == GCS_URI
71+
assert evaluation_item.evaluation_item_type == types.EvaluationItemType.REQUEST
72+
assert evaluation_item.display_name == DISPLAY_NAME
73+
assert retrieved_evaluation_item.gcs_uri == GCS_URI
74+
assert (
75+
retrieved_evaluation_item.evaluation_item_type
76+
== types.EvaluationItemType.REQUEST
77+
)
78+
assert retrieved_evaluation_item.display_name == DISPLAY_NAME
79+
# Check the request data.
80+
request = retrieved_evaluation_item.evaluation_request
81+
assert (
82+
"If your ball is curving during flight from left to right"
83+
in request.prompt.text
84+
)
85+
# Check the first candidate response.
86+
assert request.candidate_responses[0].candidate == "gemini-2.0-flash-001@default"
87+
assert (
88+
"Keep your knees bent during the backswing"
89+
in request.candidate_responses[0].text
90+
)
91+
92+
93+
pytestmark = pytest_helper.setup(
94+
file=__file__,
95+
globals_for_file=globals(),
96+
test_method="evals.create_evaluation_item",
97+
)
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
# pylint: disable=protected-access,bad-continuation,missing-function-docstring
16+
17+
from tests.unit.vertexai.genai.replays import pytest_helper
18+
from vertexai import types
19+
import pytest
20+
21+
22+
EVAL_ITEMS = [
23+
"projects/503583131166/locations/us-central1/evaluationItems/4411504533427978240",
24+
"projects/503583131166/locations/us-central1/evaluationItems/8621947972554326016",
25+
]
26+
DISPLAY_NAME = "test_eval_set"
27+
28+
29+
def test_create_eval_set(client):
30+
"""Tests that create_evaluation_set() returns a correctly structured EvaluationSet."""
31+
evaluation_set = client.evals.create_evaluation_set(
32+
evaluation_items=EVAL_ITEMS, display_name=DISPLAY_NAME
33+
)
34+
assert isinstance(evaluation_set, types.EvaluationSet)
35+
assert evaluation_set.display_name == DISPLAY_NAME
36+
assert evaluation_set.evaluation_items == EVAL_ITEMS
37+
38+
39+
pytest_plugins = ("pytest_asyncio",)
40+
41+
42+
@pytest.mark.asyncio
43+
async def test_create_eval_set_async(client):
44+
"""Tests that create_evaluation_set() returns a correctly structured EvaluationSet."""
45+
evaluation_set = await client.aio.evals.create_evaluation_set(
46+
evaluation_items=EVAL_ITEMS,
47+
display_name=DISPLAY_NAME,
48+
)
49+
assert isinstance(evaluation_set, types.EvaluationSet)
50+
assert evaluation_set.display_name == DISPLAY_NAME
51+
assert evaluation_set.evaluation_items == EVAL_ITEMS
52+
53+
54+
pytestmark = pytest_helper.setup(
55+
file=__file__,
56+
globals_for_file=globals(),
57+
test_method="evals.create_evaluation_set",
58+
)

0 commit comments

Comments
 (0)