Skip to content

Commit 67cf80b

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals) - Add labels to EvaluationRun in Vertex AI GenAI SDK evals
PiperOrigin-RevId: 822313437
1 parent db286c4 commit 67cf80b

File tree

3 files changed

+66
-4
lines changed

3 files changed

+66
-4
lines changed

tests/unit/vertexai/genai/replays/test_create_evaluation_run.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from google.genai import types as genai_types
2020
import pytest
2121

22-
GCS_DEST = "gs://lakeyk-test-limited/eval_run_output"
22+
GCS_DEST = "gs://lakeyk-limited-bucket/eval_run_output"
2323
UNIVERSAL_AR_METRIC = types.EvaluationRunMetric(
2424
metric="universal_ar_v1",
2525
metric_config=types.UnifiedMetric(
@@ -51,9 +51,6 @@
5151
# TODO(b/431231205): Re-enable once Unified Metrics are in prod.
5252
# def test_create_eval_run_data_source_evaluation_set(client):
5353
# """Tests that create_evaluation_run() creates a correctly structured EvaluationRun."""
54-
# client._api_client._http_options.base_url = (
55-
# "https://us-central1-autopush-aiplatform.sandbox.googleapis.com/"
56-
# )
5754
# client._api_client._http_options.api_version = "v1beta1"
5855
# tool = genai_types.Tool(
5956
# function_declarations=[
@@ -80,10 +77,12 @@
8077
# LLM_METRIC
8178
# ],
8279
# agent_info=types.AgentInfo(
80+
# agent="project/123/locations/us-central1/reasoningEngines/456",
8381
# name="agent-1",
8482
# instruction="agent-1 instruction",
8583
# tool_declarations=[tool],
8684
# ),
85+
# labels={"label1": "value1"},
8786
# )
8887
# assert isinstance(evaluation_run, types.EvaluationRun)
8988
# assert evaluation_run.display_name == "test4"
@@ -108,6 +107,10 @@
108107
# tools=[tool],
109108
# )
110109
# )
110+
# assert evaluation_run.labels == {
111+
# "vertex-ai-evaluation-agent-engine-id": "456",
112+
# "label1": "value1",
113+
# }
111114
# assert evaluation_run.error is None
112115

113116

@@ -127,6 +130,7 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
127130
},
128131
)
129132
),
133+
labels={"label1": "value1"},
130134
dest=GCS_DEST,
131135
)
132136
assert isinstance(evaluation_run, types.EvaluationRun)
@@ -150,6 +154,9 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
150154
),
151155
)
152156
assert evaluation_run.inference_configs is None
157+
assert evaluation_run.labels == {
158+
"label1": "value1",
159+
}
153160
assert evaluation_run.error is None
154161

155162

@@ -289,6 +296,8 @@ async def test_create_eval_run_async(client):
289296
assert evaluation_run.error is None
290297
assert evaluation_run.inference_configs is None
291298
assert evaluation_run.error is None
299+
assert evaluation_run.labels is None
300+
assert evaluation_run.error is None
292301

293302

294303
pytestmark = pytest_helper.setup(

vertexai/_genai/evals.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ def _CreateEvaluationRunParameters_to_vertex(
7777
if getv(from_object, ["evaluation_config"]) is not None:
7878
setv(to_object, ["evaluationConfig"], getv(from_object, ["evaluation_config"]))
7979

80+
if getv(from_object, ["labels"]) is not None:
81+
setv(to_object, ["labels"], getv(from_object, ["labels"]))
82+
8083
if getv(from_object, ["config"]) is not None:
8184
setv(to_object, ["config"], getv(from_object, ["config"]))
8285

@@ -236,6 +239,9 @@ def _EvaluationRun_from_vertex(
236239
if getv(from_object, ["inferenceConfigs"]) is not None:
237240
setv(to_object, ["inference_configs"], getv(from_object, ["inferenceConfigs"]))
238241

242+
if getv(from_object, ["labels"]) is not None:
243+
setv(to_object, ["labels"], getv(from_object, ["labels"]))
244+
239245
return to_object
240246

241247

@@ -464,6 +470,7 @@ def _create_evaluation_run(
464470
display_name: Optional[str] = None,
465471
data_source: types.EvaluationRunDataSourceOrDict,
466472
evaluation_config: types.EvaluationRunConfigOrDict,
473+
labels: Optional[dict[str, str]] = None,
467474
config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
468475
inference_configs: Optional[
469476
dict[str, types.EvaluationRunInferenceConfigOrDict]
@@ -478,6 +485,7 @@ def _create_evaluation_run(
478485
display_name=display_name,
479486
data_source=data_source,
480487
evaluation_config=evaluation_config,
488+
labels=labels,
481489
config=config,
482490
inference_configs=inference_configs,
483491
)
@@ -1316,6 +1324,7 @@ def create_evaluation_run(
13161324
list[types.EvaluationRunMetricOrDict]
13171325
] = None, # TODO: Make required unified metrics available in prod.
13181326
agent_info: Optional[types.AgentInfo] = None,
1327+
labels: Optional[dict[str, str]] = None,
13191328
config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
13201329
) -> types.EvaluationRun:
13211330
"""Creates an EvaluationRun."""
@@ -1353,13 +1362,25 @@ def create_evaluation_run(
13531362
tools=agent_info.tool_declarations,
13541363
)
13551364
)
1365+
if (
1366+
not agent_info.agent
1367+
or len(agent_info.agent.split("reasoningEngines/")) != 2
1368+
):
1369+
raise ValueError(
1370+
"agent_info.agent cannot be empty. Please provide a valid reasoning engine resource name in the format of projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine}."
1371+
)
1372+
labels = labels or {}
1373+
labels["vertex-ai-evaluation-agent-engine-id"] = agent_info.agent.split(
1374+
"reasoningEngines/"
1375+
)[-1]
13561376

13571377
return self._create_evaluation_run( # type: ignore[no-any-return]
13581378
name=name,
13591379
display_name=display_name,
13601380
data_source=dataset,
13611381
evaluation_config=evaluation_config,
13621382
inference_configs=inference_configs,
1383+
labels=labels,
13631384
config=config,
13641385
)
13651386

@@ -1566,6 +1587,7 @@ async def _create_evaluation_run(
15661587
display_name: Optional[str] = None,
15671588
data_source: types.EvaluationRunDataSourceOrDict,
15681589
evaluation_config: types.EvaluationRunConfigOrDict,
1590+
labels: Optional[dict[str, str]] = None,
15691591
config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
15701592
inference_configs: Optional[
15711593
dict[str, types.EvaluationRunInferenceConfigOrDict]
@@ -1580,6 +1602,7 @@ async def _create_evaluation_run(
15801602
display_name=display_name,
15811603
data_source=data_source,
15821604
evaluation_config=evaluation_config,
1605+
labels=labels,
15831606
config=config,
15841607
inference_configs=inference_configs,
15851608
)
@@ -2121,6 +2144,7 @@ async def create_evaluation_run(
21212144
list[types.EvaluationRunMetricOrDict]
21222145
] = None, # TODO: Make required unified metrics available in prod.
21232146
agent_info: Optional[types.AgentInfo] = None,
2147+
labels: Optional[dict[str, str]] = None,
21242148
config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
21252149
) -> types.EvaluationRun:
21262150
"""Creates an EvaluationRun."""
@@ -2158,13 +2182,25 @@ async def create_evaluation_run(
21582182
tools=agent_info.tool_declarations,
21592183
)
21602184
)
2185+
if (
2186+
not agent_info.agent
2187+
or len(agent_info.agent.split("reasoningEngines/")) != 2
2188+
):
2189+
raise ValueError(
2190+
"agent_info.agent cannot be empty. Please provide a valid reasoning engine resource name in the format of projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine}."
2191+
)
2192+
labels = labels or {}
2193+
labels["vertex-ai-evaluation-agent-engine-id"] = agent_info.agent.split(
2194+
"reasoningEngines/"
2195+
)[-1]
21612196

21622197
result = await self._create_evaluation_run( # type: ignore[no-any-return]
21632198
name=name,
21642199
display_name=display_name,
21652200
data_source=dataset,
21662201
evaluation_config=evaluation_config,
21672202
inference_configs=inference_configs,
2203+
labels=labels,
21682204
config=config,
21692205
)
21702206

vertexai/_genai/types.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,6 +1220,7 @@ class _CreateEvaluationRunParameters(_common.BaseModel):
12201220
evaluation_config: Optional[EvaluationRunConfig] = Field(
12211221
default=None, description=""""""
12221222
)
1223+
labels: Optional[dict[str, str]] = Field(default=None, description="""""")
12231224
config: Optional[CreateEvaluationRunConfig] = Field(
12241225
default=None, description=""""""
12251226
)
@@ -1243,6 +1244,9 @@ class _CreateEvaluationRunParametersDict(TypedDict, total=False):
12431244
evaluation_config: Optional[EvaluationRunConfigDict]
12441245
""""""
12451246

1247+
labels: Optional[dict[str, str]]
1248+
""""""
1249+
12461250
config: Optional[CreateEvaluationRunConfigDict]
12471251
""""""
12481252

@@ -1482,6 +1486,11 @@ class EventDict(TypedDict, total=False):
14821486
class AgentInfo(_common.BaseModel):
14831487
"""The agent info of an agent, used for agent eval."""
14841488

1489+
agent: Optional[str] = Field(
1490+
default=None,
1491+
description="""The agent engine used to run agent. Agent engine resource name in str type, with format
1492+
`projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""",
1493+
)
14851494
name: Optional[str] = Field(
14861495
default=None, description="""Agent name, used as an identifier."""
14871496
)
@@ -1499,6 +1508,10 @@ class AgentInfo(_common.BaseModel):
14991508
class AgentInfoDict(TypedDict, total=False):
15001509
"""The agent info of an agent, used for agent eval."""
15011510

1511+
agent: Optional[str]
1512+
"""The agent engine used to run agent. Agent engine resource name in str type, with format
1513+
`projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`."""
1514+
15021515
name: Optional[str]
15031516
"""Agent name, used as an identifier."""
15041517

@@ -1919,6 +1932,7 @@ class EvaluationRun(_common.BaseModel):
19191932
default=None,
19201933
description="""This field is experimental and may change in future versions. The inference configs for the evaluation run.""",
19211934
)
1935+
labels: Optional[dict[str, str]] = Field(default=None, description="""""")
19221936

19231937
# TODO(b/448806531): Remove all the overridden _from_response methods once the
19241938
# ticket is resolved and published.
@@ -2003,6 +2017,9 @@ class EvaluationRunDict(TypedDict, total=False):
20032017
inference_configs: Optional[dict[str, "EvaluationRunInferenceConfigDict"]]
20042018
"""This field is experimental and may change in future versions. The inference configs for the evaluation run."""
20052019

2020+
labels: Optional[dict[str, str]]
2021+
""""""
2022+
20062023

20072024
EvaluationRunOrDict = Union[EvaluationRun, EvaluationRunDict]
20082025

0 commit comments

Comments
 (0)