1919from google .genai import types as genai_types
2020import pytest
2121
22-
23- def test_create_eval_run_data_source_evaluation_set (client ):
24- """Tests that create_evaluation_run() creates a correctly structured EvaluationRun."""
25- client ._api_client ._http_options .api_version = "v1beta1"
26- tool = genai_types .Tool (
27- function_declarations = [
28- genai_types .FunctionDeclaration (
29- name = "get_weather" ,
30- description = "Get weather in a location" ,
31- parameters = {
32- "type" : "object" ,
33- "properties" : {"location" : {"type" : "string" }},
34- },
22+ GCS_DEST = "gs://lakeyk-test-limited/eval_run_output"
23+ UNIVERSAL_AR_METRIC = types .EvaluationRunMetric (
24+ metric = "universal_ar_v1" ,
25+ metric_config = types .UnifiedMetric (
26+ predefined_metric_spec = types .PredefinedMetricSpec (
27+ metric_spec_name = "universal_ar_v1" ,
28+ )
29+ ),
30+ )
31+ FINAL_RESPONSE_QUALITY_METRIC = types .EvaluationRunMetric (
32+ metric = "final_response_quality_v1" ,
33+ metric_config = types .UnifiedMetric (
34+ predefined_metric_spec = types .PredefinedMetricSpec (
35+ metric_spec_name = "final_response_quality_v1" ,
36+ )
37+ ),
38+ )
39+ LLM_METRIC = types .EvaluationRunMetric (
40+ metric = "llm_metric" ,
41+ metric_config = types .UnifiedMetric (
42+ llm_based_metric_spec = types .LLMBasedMetricSpec (
43+ metric_prompt_template = (
44+ "\n Evaluate the fluency of the response. Provide a score from 1-5."
3545 )
36- ]
37- )
38- evaluation_run = client .evals .create_evaluation_run (
39- name = "test4" ,
40- display_name = "test4" ,
41- dataset = types .EvaluationRunDataSource (
42- evaluation_set = "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
43- ),
44- agent_info = types .AgentInfo (
45- name = "agent-1" ,
46- instruction = "agent-1 instruction" ,
47- tool_declarations = [tool ],
48- ),
49- dest = "gs://lakeyk-limited-bucket/eval_run_output" ,
50- )
51- assert isinstance (evaluation_run , types .EvaluationRun )
52- assert evaluation_run .display_name == "test4"
53- assert evaluation_run .state == types .EvaluationRunState .PENDING
54- assert isinstance (evaluation_run .data_source , types .EvaluationRunDataSource )
55- assert evaluation_run .data_source .evaluation_set == (
56- "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
57- )
58- assert evaluation_run .inference_configs [
59- "agent-1"
60- ] == types .EvaluationRunInferenceConfig (
61- agent_config = types .EvaluationRunAgentConfig (
62- developer_instruction = genai_types .Content (
63- parts = [genai_types .Part (text = "agent-1 instruction" )]
64- ),
65- tools = [tool ],
6646 )
67- )
68- assert evaluation_run .error is None
47+ ),
48+ )
49+
50+
51+ # TODO(b/431231205): Re-enable once Unified Metrics are in prod.
52+ # def test_create_eval_run_data_source_evaluation_set(client):
53+ # """Tests that create_evaluation_run() creates a correctly structured EvaluationRun."""
54+ # client._api_client._http_options.base_url = (
55+ # "https://us-central1-autopush-aiplatform.sandbox.googleapis.com/"
56+ # )
57+ # client._api_client._http_options.api_version = "v1beta1"
58+ # tool = genai_types.Tool(
59+ # function_declarations=[
60+ # genai_types.FunctionDeclaration(
61+ # name="get_weather",
62+ # description="Get weather in a location",
63+ # parameters={
64+ # "type": "object",
65+ # "properties": {"location": {"type": "string"}},
66+ # },
67+ # )
68+ # ]
69+ # )
70+ # evaluation_run = client.evals.create_evaluation_run(
71+ # name="test4",
72+ # display_name="test4",
73+ # dataset=types.EvaluationRunDataSource(
74+ # evaluation_set="projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
75+ # ),
76+ # dest=GCS_DEST,
77+ # metrics=[
78+ # UNIVERSAL_AR_METRIC,
79+ # types.RubricMetric.FINAL_RESPONSE_QUALITY,
80+ # LLM_METRIC
81+ # ],
82+ # agent_info=types.AgentInfo(
83+ # name="agent-1",
84+ # instruction="agent-1 instruction",
85+ # tool_declarations=[tool],
86+ # ),
87+ # )
88+ # assert isinstance(evaluation_run, types.EvaluationRun)
89+ # assert evaluation_run.display_name == "test4"
90+ # assert evaluation_run.state == types.EvaluationRunState.PENDING
91+ # assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
92+ # assert evaluation_run.data_source.evaluation_set == (
93+ # "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
94+ # )
95+ # assert evaluation_run.evaluation_config == types.EvaluationRunConfig(
96+ # output_config=genai_types.OutputConfig(
97+ # gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST)
98+ # ),
99+ # metrics=[UNIVERSAL_AR_METRIC, FINAL_RESPONSE_QUALITY_METRIC, LLM_METRIC],
100+ # )
101+ # assert evaluation_run.inference_configs[
102+ # "agent-1"
103+ # ] == types.EvaluationRunInferenceConfig(
104+ # agent_config=types.EvaluationRunAgentConfig(
105+ # developer_instruction=genai_types.Content(
106+ # parts=[genai_types.Part(text="agent-1 instruction")]
107+ # ),
108+ # tools=[tool],
109+ # )
110+ # )
111+ # assert evaluation_run.error is None
69112
70113
71114def test_create_eval_run_data_source_bigquery_request_set (client ):
@@ -84,7 +127,7 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
84127 },
85128 )
86129 ),
87- dest = "gs://lakeyk-limited-bucket/eval_run_output" ,
130+ dest = GCS_DEST ,
88131 )
89132 assert isinstance (evaluation_run , types .EvaluationRun )
90133 assert evaluation_run .display_name == "test5"
@@ -101,6 +144,11 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
101144 },
102145 )
103146 )
147+ assert evaluation_run .evaluation_config == types .EvaluationRunConfig (
148+ output_config = genai_types .OutputConfig (
149+ gcs_destination = genai_types .GcsDestination (output_uri_prefix = GCS_DEST )
150+ ),
151+ )
104152 assert evaluation_run .inference_configs is None
105153 assert evaluation_run .error is None
106154
@@ -220,7 +268,7 @@ async def test_create_eval_run_async(client):
220268 },
221269 )
222270 ),
223- dest = "gs://lakeyk-limited-bucket/eval_run_output" ,
271+ dest = GCS_DEST ,
224272 )
225273 assert isinstance (evaluation_run , types .EvaluationRun )
226274 assert evaluation_run .display_name == "test8"
@@ -233,6 +281,12 @@ async def test_create_eval_run_async(client):
233281 "checkpoint_2" : "checkpoint_2" ,
234282 },
235283 )
284+ assert evaluation_run .evaluation_config == types .EvaluationRunConfig (
285+ output_config = genai_types .OutputConfig (
286+ gcs_destination = genai_types .GcsDestination (output_uri_prefix = GCS_DEST )
287+ ),
288+ )
289+ assert evaluation_run .error is None
236290 assert evaluation_run .inference_configs is None
237291 assert evaluation_run .error is None
238292
0 commit comments