Skip to content

Commit 00e57de

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Updated Observability GenAI data format converter for JSONL
PiperOrigin-RevId: 818756007
1 parent 0d1240e commit 00e57de

File tree

2 files changed

+75
-84
lines changed

2 files changed

+75
-84
lines changed

tests/unit/vertexai/genai/test_evals.py

Lines changed: 51 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2036,15 +2036,15 @@ def test_convert_simple_request_response(self):
20362036
raw_data = [
20372037
{
20382038
"format": "observability",
2039-
"request": [
2039+
"request": json.dumps(
20402040
{"role": "user", "parts": [{"content": "Hello", "type": "text"}]}
2041-
],
2042-
"response": [
2041+
),
2042+
"response": json.dumps(
20432043
{
20442044
"role": "system",
20452045
"parts": [{"content": "Hi", "type": "text"}],
20462046
}
2047-
],
2047+
),
20482048
}
20492049
]
20502050
result_dataset = self.converter.convert(raw_data)
@@ -2068,19 +2068,21 @@ def test_convert_with_system_instruction(self):
20682068
raw_data = [
20692069
{
20702070
"format": "observability",
2071-
"request": [
2071+
"request": json.dumps(
20722072
{"role": "user", "parts": [{"content": "Hello", "type": "text"}]}
2073-
],
2074-
"response": [
2073+
),
2074+
"response": json.dumps(
20752075
{
20762076
"role": "system",
20772077
"parts": [{"content": "Hi", "type": "text"}],
20782078
}
2079-
],
2080-
"system_instruction": {
2081-
"role": "user",
2082-
"parts": [{"content": "Be helpful", "type": "text"}],
2083-
},
2079+
),
2080+
"system_instruction": json.dumps(
2081+
{
2082+
"role": "user",
2083+
"parts": [{"content": "Be helpful", "type": "text"}],
2084+
}
2085+
),
20842086
}
20852087
]
20862088
result_dataset = self.converter.convert(raw_data)
@@ -2093,22 +2095,28 @@ def test_convert_with_conversation_history(self):
20932095
raw_data = [
20942096
{
20952097
"format": "observability",
2096-
"request": [
2097-
{"role": "user", "parts": [{"content": "Hello", "type": "text"}]},
2098-
{"role": "system", "parts": [{"content": "Hi", "type": "text"}]},
2098+
"request": json.dumps(
2099+
{"role": "user", "parts": [{"content": "Hello", "type": "text"}]}
2100+
)
2101+
+ "\n"
2102+
+ json.dumps(
2103+
{"role": "system", "parts": [{"content": "Hi", "type": "text"}]}
2104+
)
2105+
+ "\n"
2106+
+ json.dumps(
20992107
{
21002108
"role": "user",
21012109
"parts": [
21022110
{"content": "What's the meaning of life?", "type": "text"}
21032111
],
2104-
},
2105-
],
2106-
"response": [
2112+
}
2113+
),
2114+
"response": json.dumps(
21072115
{
21082116
"role": "system",
21092117
"parts": [{"content": "42.", "type": "text"}],
21102118
}
2111-
],
2119+
),
21122120
}
21132121
]
21142122

@@ -2139,27 +2147,27 @@ def test_convert_multiple_request_response(self):
21392147
raw_data = [
21402148
{
21412149
"format": "observability",
2142-
"request": [
2150+
"request": json.dumps(
21432151
{"role": "user", "parts": [{"content": "Hello", "type": "text"}]}
2144-
],
2145-
"response": [
2152+
),
2153+
"response": json.dumps(
21462154
{
21472155
"role": "system",
21482156
"parts": [{"content": "Hi", "type": "text"}],
21492157
}
2150-
],
2158+
),
21512159
},
21522160
{
21532161
"format": "observability",
2154-
"request": [
2162+
"request": json.dumps(
21552163
{"role": "user", "parts": [{"content": "Goodbye", "type": "text"}]}
2156-
],
2157-
"response": [
2164+
),
2165+
"response": json.dumps(
21582166
{
21592167
"role": "system",
21602168
"parts": [{"content": "Bye", "type": "text"}],
21612169
}
2162-
],
2170+
),
21632171
},
21642172
]
21652173
result_dataset = self.converter.convert(raw_data)
@@ -2187,7 +2195,7 @@ def test_convert_skips_unknown_part_type(self):
21872195
raw_data = [
21882196
{
21892197
"format": "observability",
2190-
"request": [
2198+
"request": json.dumps(
21912199
{
21922200
"role": "user",
21932201
"parts": [
@@ -2196,13 +2204,13 @@ def test_convert_skips_unknown_part_type(self):
21962204
{"content": "Hello", "type": "text"},
21972205
],
21982206
}
2199-
],
2200-
"response": [
2207+
),
2208+
"response": json.dumps(
22012209
{
22022210
"role": "system",
22032211
"parts": [{"content": "Hi", "type": "text"}],
22042212
}
2205-
],
2213+
),
22062214
}
22072215
]
22082216

@@ -2217,12 +2225,12 @@ def test_convert_skips_missing_request(self):
22172225
raw_data = [
22182226
{
22192227
"format": "observability",
2220-
"response": [
2228+
"response": json.dumps(
22212229
{
22222230
"role": "system",
22232231
"parts": [{"content": "Hi", "type": "text"}],
22242232
}
2225-
],
2233+
),
22262234
}
22272235
]
22282236
result_dataset = self.converter.convert(raw_data)
@@ -2232,9 +2240,9 @@ def test_convert_skips_missing_response(self):
22322240
raw_data = [
22332241
{
22342242
"format": "observability",
2235-
"request": [
2243+
"request": json.dumps(
22362244
{"role": "user", "parts": [{"content": "Hello", "type": "text"}]}
2237-
],
2245+
),
22382246
}
22392247
]
22402248
result_dataset = self.converter.convert(raw_data)
@@ -2244,7 +2252,7 @@ def test_convert_tool_call_parts(self):
22442252
raw_data = [
22452253
{
22462254
"format": "observability",
2247-
"request": [
2255+
"request": json.dumps(
22482256
{
22492257
"role": "user",
22502258
"parts": [
@@ -2256,8 +2264,8 @@ def test_convert_tool_call_parts(self):
22562264
}
22572265
],
22582266
}
2259-
],
2260-
"response": [
2267+
),
2268+
"response": json.dumps(
22612269
{
22622270
"role": "system",
22632271
"parts": [
@@ -2268,7 +2276,7 @@ def test_convert_tool_call_parts(self):
22682276
}
22692277
],
22702278
}
2271-
],
2279+
),
22722280
}
22732281
]
22742282
result_dataset = self.converter.convert(raw_data)
@@ -3433,15 +3441,15 @@ def test_auto_detect_observability_schema(self):
34333441
raw_data = [
34343442
{
34353443
"format": "observability",
3436-
"request": [
3444+
"request": json.dumps(
34373445
{"role": "user", "parts": [{"content": "Hello", "type": "text"}]}
3438-
],
3439-
"response": [
3446+
),
3447+
"response": json.dumps(
34403448
{
34413449
"role": "system",
34423450
"parts": [{"content": "Hi", "type": "text"}],
34433451
}
3444-
],
3452+
),
34453453
}
34463454
]
34473455
assert (

vertexai/_genai/_observability_data_converter.py

Lines changed: 24 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,26 @@
2828
logger = logging.getLogger("vertexai_genai._observability_data_converters")
2929

3030

31+
def _load_jsonl(data: Any, case_id: str) -> list[dict[Any, Any]]:
32+
"""Parses the raw JSONL data into a list of dict possible."""
33+
if isinstance(data, str):
34+
json_list = []
35+
for line in data.splitlines():
36+
loaded_json = json.loads(line)
37+
if not isinstance(loaded_json, dict):
38+
raise TypeError(
39+
f"Decoded JSON payload is not a dict for case "
40+
f"{case_id}. Type found: {type(loaded_json).__name__}"
41+
)
42+
json_list.append(loaded_json)
43+
return json_list
44+
else:
45+
raise TypeError(
46+
f"Payload is not a JSONL string for case {case_id}. Type "
47+
f"found: {type(data).__name__}"
48+
)
49+
50+
3151
class ObservabilityDataConverter(_evals_utils.EvalDataConverter):
3252
"""Converter for dataset in GCP Observability GenAI format."""
3353

@@ -131,44 +151,6 @@ def _parse_messages(
131151
reference=None,
132152
)
133153

134-
def _load_json_dict(self, data: Any, case_id: str) -> dict[Any, str]:
135-
"""Parses the raw data into a dict if possible."""
136-
if isinstance(data, str):
137-
loaded_json = json.loads(data)
138-
if isinstance(loaded_json, dict):
139-
return loaded_json
140-
else:
141-
raise TypeError(
142-
f"Decoded JSON payload is not a dictionary for case "
143-
f"{case_id}. Type found: {type(loaded_json).__name__}"
144-
)
145-
elif isinstance(data, dict):
146-
return data
147-
else:
148-
raise TypeError(
149-
f"Payload is not a dictionary for case {case_id}. Type found: "
150-
f"{type(data).__name__}"
151-
)
152-
153-
def _load_json_list(self, data: Any, case_id: str) -> list[Any]:
154-
"""Parses the raw data into a list if possible."""
155-
if isinstance(data, str):
156-
loaded_json = json.loads(data)
157-
if isinstance(loaded_json, list):
158-
return loaded_json
159-
else:
160-
raise TypeError(
161-
f"Decoded JSON payload is not a list for case "
162-
f"{case_id}. Type found: {type(loaded_json).__name__}"
163-
)
164-
elif isinstance(data, list):
165-
return data
166-
else:
167-
raise TypeError(
168-
f"Payload is not a list for case {case_id}. Type found: "
169-
f"{type(data).__name__}"
170-
)
171-
172154
@override
173155
def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset:
174156
"""Converts a list of GCP Observability GenAI cases into an EvaluationDataset."""
@@ -185,15 +167,16 @@ def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset:
185167
continue
186168

187169
request_data = case.get("request", [])
188-
request_list = self._load_json_list(request_data, eval_case_id)
170+
request_list = _load_jsonl(request_data, eval_case_id)
189171

190172
response_data = case.get("response", [])
191-
response_list = self._load_json_list(response_data, eval_case_id)
173+
response_list = _load_jsonl(response_data, eval_case_id)
192174

193175
system_dict = None
194176
if "system_instruction" in case:
195177
system_data = case.get("system_instruction", {})
196-
system_dict = self._load_json_dict(system_data, eval_case_id)
178+
system_list = _load_jsonl(system_data, eval_case_id)
179+
system_dict = system_list[0] if system_list else {}
197180

198181
eval_case = self._parse_messages(
199182
eval_case_id, request_list, response_list, system_dict

0 commit comments

Comments
 (0)