@@ -1885,6 +1885,56 @@ def test_convert_with_additional_columns(self):
18851885 eval_case = result_dataset .eval_cases [0 ]
18861886 assert eval_case .custom_column == "custom_value"
18871887
1888+ def test_convert_with_agent_eval_fields (self ):
1889+ """Tests that agent eval data is converted correctly from a flattened format."""
1890+ raw_data_df = pd .DataFrame (
1891+ {
1892+ "prompt" : ["Hello" ],
1893+ "response" : ["Hi" ],
1894+ "intermediate_events" : [
1895+ [
1896+ {
1897+ "event_id" : "event1" ,
1898+ "content" : {"parts" : [{"text" : "intermediate event" }]},
1899+ }
1900+ ]
1901+ ],
1902+ }
1903+ )
1904+ raw_data = raw_data_df .to_dict (orient = "records" )
1905+ result_dataset = self .converter .convert (raw_data )
1906+ assert len (result_dataset .eval_cases ) == 1
1907+ eval_case = result_dataset .eval_cases [0 ]
1908+ assert eval_case .intermediate_events [0 ].event_id == "event1"
1909+
1910+ def test_convert_with_intermediate_events_as_event_objects (self ):
1911+ """Tests that agent eval data is converted correctly when intermediate_events are Event objects."""
1912+ raw_data_df = pd .DataFrame (
1913+ {
1914+ "prompt" : ["Hello" ],
1915+ "response" : ["Hi" ],
1916+ "intermediate_events" : [
1917+ [
1918+ vertexai_genai_types .Event (
1919+ event_id = "event1" ,
1920+ content = genai_types .Content (
1921+ parts = [genai_types .Part (text = "intermediate event" )]
1922+ ),
1923+ )
1924+ ]
1925+ ],
1926+ }
1927+ )
1928+ raw_data = raw_data_df .to_dict (orient = "records" )
1929+ result_dataset = self .converter .convert (raw_data )
1930+ assert len (result_dataset .eval_cases ) == 1
1931+ eval_case = result_dataset .eval_cases [0 ]
1932+ assert eval_case .intermediate_events [0 ].event_id == "event1"
1933+ assert (
1934+ eval_case .intermediate_events [0 ].content .parts [0 ].text
1935+ == "intermediate event"
1936+ )
1937+
18881938
18891939class TestOpenAIDataConverter :
18901940 """Unit tests for the _OpenAIDataConverter class."""
@@ -2765,7 +2815,10 @@ def test_merge_flatten_and_gemini_datasets(self):
27652815 )
27662816
27672817 def test_merge_empty_input_list (self ):
2768- with pytest .raises (ValueError , match = "Input 'raw_datasets' cannot be empty." ):
2818+ with pytest .raises (
2819+ ValueError ,
2820+ match = "Input 'raw_datasets' cannot be empty and must be a list of lists." ,
2821+ ):
27692822 _evals_data_converters .merge_response_datasets_into_canonical_format (
27702823 raw_datasets = [], schemas = []
27712824 )
@@ -2810,7 +2863,10 @@ def test_merge_mismatched_schemas_list_length(self):
28102863 ]
28112864 with pytest .raises (
28122865 ValueError ,
2813- match = "A list of schemas must be provided, one for each raw dataset." ,
2866+ match = (
2867+ "A list of schemas must be provided, one for each raw dataset. Got 2"
2868+ " schemas for 3 datasets."
2869+ ),
28142870 ):
28152871 _evals_data_converters .merge_response_datasets_into_canonical_format (
28162872 [raw_dataset_1 , raw_dataset_2 , raw_dataset_3 ],
@@ -2824,7 +2880,10 @@ def test_merge_empty_schemas_list(self):
28242880 ]
28252881 with pytest .raises (
28262882 ValueError ,
2827- match = "A list of schemas must be provided, one for each raw dataset." ,
2883+ match = (
2884+ "A list of schemas must be provided, one for each raw dataset. Got 0"
2885+ " schemas for 1 datasets."
2886+ ),
28282887 ):
28292888 _evals_data_converters .merge_response_datasets_into_canonical_format (
28302889 [raw_dataset_1 ], schemas = []
@@ -2918,6 +2977,46 @@ def test_merge_with_different_custom_columns(self):
29182977 assert merged_dataset .eval_cases [1 ].custom_col_2 == "value_2_2"
29192978 assert merged_dataset .eval_cases [1 ].custom_col_3 == "value_2_3"
29202979
2980+ def test_merge_with_intermediate_events (self ):
2981+ raw_dataset_1 = [
2982+ {
2983+ "prompt" : "Prompt 1" ,
2984+ "response" : "Response 1a" ,
2985+ "intermediate_events" : [
2986+ {
2987+ "event_id" : "event1" ,
2988+ "content" : {"parts" : [{"text" : "intermediate event" }]},
2989+ }
2990+ ],
2991+ }
2992+ ]
2993+ raw_dataset_2 = [
2994+ {
2995+ "prompt" : "Prompt 1" ,
2996+ "response" : "Response 1b" ,
2997+ "intermediate_events" : [
2998+ {
2999+ "event_id" : "event2" ,
3000+ "content" : {"parts" : [{"text" : "intermediate event 2" }]},
3001+ }
3002+ ],
3003+ }
3004+ ]
3005+ schemas = [
3006+ _evals_data_converters .EvalDatasetSchema .FLATTEN ,
3007+ _evals_data_converters .EvalDatasetSchema .FLATTEN ,
3008+ ]
3009+
3010+ merged_dataset = (
3011+ _evals_data_converters .merge_response_datasets_into_canonical_format (
3012+ [raw_dataset_1 , raw_dataset_2 ], schemas = schemas
3013+ )
3014+ )
3015+
3016+ assert len (merged_dataset .eval_cases ) == 1
3017+ assert len (merged_dataset .eval_cases [0 ].intermediate_events ) == 1
3018+ assert merged_dataset .eval_cases [0 ].intermediate_events [0 ].event_id == "event1"
3019+
29213020 def test_merge_with_metadata (self ):
29223021 raw_dataset_1 = [
29233022 {
0 commit comments