@@ -38,15 +38,15 @@ def test_create_eval_run_data_source_evaluation_set(client):
3838 evaluation_run = client .evals .create_evaluation_run (
3939 name = "test4" ,
4040 display_name = "test4" ,
41- data_source = types .EvaluationRunDataSource (
41+ dataset = types .EvaluationRunDataSource (
4242 evaluation_set = "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
4343 ),
4444 agent_info = types .AgentInfo (
4545 name = "agent-1" ,
4646 instruction = "agent-1 instruction" ,
4747 tool_declarations = [tool ],
4848 ),
49- dest = "gs://lakeyk-test- limited/eval_run_output" ,
49+ dest = "gs://lakeyk-limited-bucket /eval_run_output" ,
5050 )
5151 assert isinstance (evaluation_run , types .EvaluationRun )
5252 assert evaluation_run .display_name == "test4"
@@ -73,7 +73,7 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
7373 evaluation_run = client .evals .create_evaluation_run (
7474 name = "test5" ,
7575 display_name = "test5" ,
76- data_source = types .EvaluationRunDataSource (
76+ dataset = types .EvaluationRunDataSource (
7777 bigquery_request_set = types .BigQueryRequestSet (
7878 uri = "bq://lakeyk-test-limited.inference_batch_prediction_input.1317387725199900672_1b" ,
7979 prompt_column = "request" ,
@@ -84,7 +84,7 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
8484 },
8585 )
8686 ),
87- dest = "gs://lakeyk-test- limited/eval_run_output" ,
87+ dest = "gs://lakeyk-limited-bucket /eval_run_output" ,
8888 )
8989 assert isinstance (evaluation_run , types .EvaluationRun )
9090 assert evaluation_run .display_name == "test5"
@@ -105,6 +105,101 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
105105 assert evaluation_run .error is None
106106
107107
108+ # Test fails in replay mode because of the timestamp issue
109+ # def test_create_eval_run_data_source_evaluation_dataset(client):
110+ # """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with EvaluationDataset."""
111+ # input_df = pd.DataFrame(
112+ # {
113+ # "prompt": ["prompt1", "prompt2"],
114+ # "reference": ["reference1", "reference2"],
115+ # "response": ["response1", "response2"],
116+ # "intermediate_events": [
117+ # [
118+ # {
119+ # "content": {
120+ # "parts": [
121+ # {"text": "first user input"},
122+ # ],
123+ # "role": "user",
124+ # },
125+ # },
126+ # {
127+ # "content": {
128+ # "parts": [
129+ # {"text": "first model response"},
130+ # ],
131+ # "role": "model",
132+ # },
133+ # },
134+ # ],
135+ # [
136+ # {
137+ # "content": {
138+ # "parts": [
139+ # {"text": "second user input"},
140+ # ],
141+ # "role": "user",
142+ # },
143+ # },
144+ # {
145+ # "content": {
146+ # "parts": [
147+ # {"text": "second model response"},
148+ # ],
149+ # "role": "model",
150+ # },
151+ # },
152+ # ],
153+ # ],
154+ # }
155+ # )
156+ # evaluation_run = client.evals.create_evaluation_run(
157+ # name="test6",
158+ # display_name="test6",
159+ # dataset=types.EvaluationDataset(
160+ # candidate_name="candidate_1",
161+ # eval_dataset_df=input_df,
162+ # ),
163+ # dest="gs://lakeyk-limited-bucket/eval_run_output",
164+ # )
165+ # assert isinstance(evaluation_run, types.EvaluationRun)
166+ # assert evaluation_run.display_name == "test6"
167+ # assert evaluation_run.state == types.EvaluationRunState.PENDING
168+ # assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
169+ # # Check evaluation set
170+ # assert evaluation_run.data_source.evaluation_set
171+ # eval_set = client.evals.get_evaluation_set(
172+ # name=evaluation_run.data_source.evaluation_set
173+ # )
174+ # assert len(eval_set.evaluation_items) == 2
175+ # # Check evaluation items
176+ # for i, eval_item_name in enumerate(eval_set.evaluation_items):
177+ # eval_item = client.evals.get_evaluation_item(name=eval_item_name)
178+ # assert eval_item.evaluation_item_type == types.EvaluationItemType.REQUEST
179+ # assert eval_item.evaluation_request.prompt.text == input_df.iloc[i]["prompt"]
180+ # assert (
181+ # eval_item.evaluation_request.candidate_responses[0].text
182+ # == input_df.iloc[i]["response"]
183+ # )
184+ # assert (
185+ # eval_item.evaluation_request.candidate_responses[0].events[0].parts[0].text
186+ # == input_df.iloc[i]["intermediate_events"][0]["content"]["parts"][0]["text"]
187+ # )
188+ # assert (
189+ # eval_item.evaluation_request.candidate_responses[0].events[0].role
190+ # == input_df.iloc[i]["intermediate_events"][0]["content"]["role"]
191+ # )
192+ # assert (
193+ # eval_item.evaluation_request.candidate_responses[0].events[1].parts[0].text
194+ # == input_df.iloc[i]["intermediate_events"][1]["content"]["parts"][0]["text"]
195+ # )
196+ # assert (
197+ # eval_item.evaluation_request.candidate_responses[0].events[1].role
198+ # == input_df.iloc[i]["intermediate_events"][1]["content"]["role"]
199+ # )
200+ # assert evaluation_run.error is None
201+
202+
108203pytest_plugins = ("pytest_asyncio" ,)
109204
110205
@@ -114,7 +209,7 @@ async def test_create_eval_run_async(client):
114209 evaluation_run = await client .aio .evals .create_evaluation_run (
115210 name = "test8" ,
116211 display_name = "test8" ,
117- data_source = types .EvaluationRunDataSource (
212+ dataset = types .EvaluationRunDataSource (
118213 bigquery_request_set = types .BigQueryRequestSet (
119214 uri = "bq://lakeyk-test-limited.inference_batch_prediction_input.1317387725199900672_1b" ,
120215 prompt_column = "request" ,
@@ -125,7 +220,7 @@ async def test_create_eval_run_async(client):
125220 },
126221 )
127222 ),
128- dest = "gs://lakeyk-test- limited/eval_run_output" ,
223+ dest = "gs://lakeyk-limited-bucket /eval_run_output" ,
129224 )
130225 assert isinstance (evaluation_run , types .EvaluationRun )
131226 assert evaluation_run .display_name == "test8"
0 commit comments