Skip to content

Commit a53574d

Browse files
committed
refactor output parsing to allow for text part tool call not just raw string
1 parent 53b6963 commit a53574d

File tree

1 file changed

+242
-38
lines changed

1 file changed

+242
-38
lines changed

app-server/src/traces/spans.rs

Lines changed: 242 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,50 +1098,36 @@ fn output_message_from_genai_attributes(
10981098

10991099
let tool_calls = parse_tool_calls(attributes, prefix);
11001100

1101-
if tool_calls.is_empty() {
1102-
if let Some(Value::String(s)) = msg_content {
1103-
if let Ok(content) =
1104-
serde_json::from_str::<Vec<InstrumentationChatMessageContentPart>>(&s)
1105-
{
1106-
Some(ChatMessage {
1107-
role: msg_role,
1108-
content: ChatMessageContent::ContentPartList(
1109-
content
1110-
.into_iter()
1111-
.map(ChatMessageContentPart::from_instrumentation_content_part)
1112-
.collect(),
1113-
),
1114-
tool_call_id: None,
1115-
})
1116-
} else {
1117-
Some(ChatMessage {
1118-
role: msg_role,
1119-
content: ChatMessageContent::Text(s),
1120-
tool_call_id: None,
1121-
})
1122-
}
1101+
let content_parts = if let Some(Value::String(s)) = msg_content {
1102+
if let Ok(content) = serde_json::from_str::<Vec<InstrumentationChatMessageContentPart>>(&s)
1103+
{
1104+
content
1105+
.into_iter()
1106+
.map(ChatMessageContentPart::from_instrumentation_content_part)
1107+
.collect()
11231108
} else {
1124-
None
1125-
}
1126-
} else {
1127-
let mut out_vec = if let Some(Value::String(s)) = msg_content {
1128-
if s.is_empty() {
1109+
if s.is_empty() || s == "\"\"" {
11291110
vec![]
11301111
} else {
1131-
let text_block = ChatMessageContentPart::Text(ChatMessageText { text: s });
1132-
vec![text_block]
1112+
vec![ChatMessageContentPart::Text(ChatMessageText { text: s })]
11331113
}
1134-
} else {
1135-
vec![]
1136-
};
1137-
out_vec.extend(
1138-
tool_calls
1139-
.into_iter()
1140-
.map(|tool_call| ChatMessageContentPart::ToolCall(tool_call)),
1141-
);
1114+
}
1115+
} else {
1116+
vec![]
1117+
};
1118+
let tool_call_parts = tool_calls
1119+
.into_iter()
1120+
.map(|tool_call| ChatMessageContentPart::ToolCall(tool_call))
1121+
.collect::<Vec<_>>();
1122+
1123+
if content_parts.is_empty() && tool_call_parts.is_empty() {
1124+
None
1125+
} else {
11421126
Some(ChatMessage {
11431127
role: msg_role,
1144-
content: ChatMessageContent::ContentPartList(out_vec),
1128+
content: ChatMessageContent::ContentPartList(
1129+
content_parts.into_iter().chain(tool_call_parts).collect(),
1130+
),
11451131
tool_call_id: None,
11461132
})
11471133
}
@@ -2978,4 +2964,222 @@ mod tests {
29782964
panic!("Expected arguments to be present");
29792965
}
29802966
}
2967+
2968+
/// This test primarily tests that when the output of the model contains text parts
2969+
/// and tool calls, the text parts are parsed correctly. In contrast, anthropic
2970+
/// instrumentation yields the text block preceding the tool calls as a raw string.
2971+
#[test]
2972+
fn test_parse_and_enrich_attributes_google_genai() {
2973+
let attributes = HashMap::from([
2974+
("gen_ai.system".to_string(), json!("gemini")),
2975+
(
2976+
"gen_ai.request.model".to_string(),
2977+
json!("gemini-2.5-flash-lite"),
2978+
),
2979+
(
2980+
"gen_ai.response.model".to_string(),
2981+
json!("gemini-2.5-flash-lite"),
2982+
),
2983+
(
2984+
"gen_ai.response.id".to_string(),
2985+
json!("F1CwaLjFLfOUxN8PhMGb-Qc"),
2986+
),
2987+
("gen_ai.prompt.0.role".to_string(), json!("user")),
2988+
(
2989+
"gen_ai.prompt.0.content".to_string(),
2990+
json!(
2991+
"[{\"type\":\"text\",\"text\":\"What's the opposite of 'bright'? Also, what is the weather in Tokyo?\"}]"
2992+
),
2993+
),
2994+
// This is the important bit. Notice how the output is a list of text parts
2995+
("gen_ai.completion.0.role".to_string(), json!("model")),
2996+
(
2997+
"gen_ai.completion.0.content".to_string(),
2998+
json!(
2999+
"[{\"type\":\"text\",\"text\":\"The opposite of 'bright' is 'dim'. I'll go ahead and get the weather in Tokyo for you.\"}]"
3000+
),
3001+
),
3002+
(
3003+
"gen_ai.completion.0.tool_calls.0.id".to_string(),
3004+
json!("get_weather"),
3005+
),
3006+
(
3007+
"gen_ai.completion.0.tool_calls.0.name".to_string(),
3008+
json!("get_weather"),
3009+
),
3010+
(
3011+
"gen_ai.completion.0.tool_calls.0.arguments".to_string(),
3012+
json!("{\"location\":\"Tokyo\"}"),
3013+
),
3014+
("gen_ai.usage.input_tokens".to_string(), json!(66)),
3015+
("gen_ai.usage.output_tokens".to_string(), json!(39)),
3016+
("llm.usage.total_tokens".to_string(), json!(105)),
3017+
("llm.request.type".to_string(), json!("completion")),
3018+
("lmnr.span.sdk_version".to_string(), json!("0.7.8")),
3019+
(
3020+
"lmnr.span.language_version".to_string(),
3021+
json!("python@3.13"),
3022+
),
3023+
(
3024+
"lmnr.span.instrumentation_source".to_string(),
3025+
json!("python"),
3026+
),
3027+
]);
3028+
3029+
let mut span = Span {
3030+
span_id: Uuid::new_v4(),
3031+
project_id: Uuid::new_v4(),
3032+
trace_id: Uuid::new_v4(),
3033+
parent_span_id: None,
3034+
name: "gemini.generate_content".to_string(),
3035+
attributes: SpanAttributes::new(attributes),
3036+
start_time: Utc::now(),
3037+
end_time: Utc::now(),
3038+
span_type: SpanType::LLM,
3039+
input: None,
3040+
output: None,
3041+
events: None,
3042+
status: None,
3043+
labels: None,
3044+
input_url: None,
3045+
output_url: None,
3046+
};
3047+
3048+
// Verify initial state
3049+
assert!(span.input.is_none());
3050+
assert!(span.output.is_none());
3051+
assert!(
3052+
span.attributes
3053+
.raw_attributes
3054+
.get("gen_ai.prompt.0.content")
3055+
.is_some()
3056+
);
3057+
assert!(
3058+
span.attributes
3059+
.raw_attributes
3060+
.get("gen_ai.completion.0.content")
3061+
.is_some()
3062+
);
3063+
assert!(
3064+
span.attributes
3065+
.raw_attributes
3066+
.get("gen_ai.completion.0.tool_calls.0.name")
3067+
.is_some()
3068+
);
3069+
3070+
span.parse_and_enrich_attributes();
3071+
3072+
assert!(span.input.is_some());
3073+
let input = span.input.as_ref().unwrap();
3074+
let input_messages: Vec<ChatMessage> = serde_json::from_value(input.clone()).unwrap();
3075+
assert_eq!(input_messages.len(), 1);
3076+
3077+
assert_eq!(input_messages[0].role, "user");
3078+
match &input_messages[0].content {
3079+
ChatMessageContent::ContentPartList(parts) => {
3080+
assert_eq!(parts.len(), 1);
3081+
let text_part = &parts[0];
3082+
match text_part {
3083+
ChatMessageContentPart::Text(text) => {
3084+
assert_eq!(
3085+
text.text,
3086+
"What's the opposite of 'bright'? Also, what is the weather in Tokyo?"
3087+
);
3088+
}
3089+
_ => panic!("Expected text content for user message"),
3090+
}
3091+
}
3092+
_ => panic!("Expected content part list for user message"),
3093+
}
3094+
3095+
assert!(span.output.is_some());
3096+
let output = span.output.as_ref().unwrap();
3097+
let output_messages: Vec<ChatMessage> = serde_json::from_value(output.clone()).unwrap();
3098+
assert_eq!(output_messages.len(), 1);
3099+
3100+
assert_eq!(output_messages[0].role, "model");
3101+
match &output_messages[0].content {
3102+
ChatMessageContent::ContentPartList(parts) => {
3103+
assert_eq!(parts.len(), 2); // text part + tool call part
3104+
3105+
// First part should be text
3106+
match &parts[0] {
3107+
ChatMessageContentPart::Text(text_part) => {
3108+
assert_eq!(
3109+
text_part.text,
3110+
"The opposite of 'bright' is 'dim'. I'll go ahead and get the weather in Tokyo for you."
3111+
);
3112+
}
3113+
_ => panic!("Expected text part as first content part"),
3114+
}
3115+
3116+
// Second part should be tool call
3117+
match &parts[1] {
3118+
ChatMessageContentPart::ToolCall(tool_call) => {
3119+
assert_eq!(tool_call.name, "get_weather");
3120+
assert_eq!(tool_call.id, Some("get_weather".to_string()));
3121+
assert!(tool_call.arguments.is_some());
3122+
let args = tool_call.arguments.as_ref().unwrap();
3123+
assert_eq!(args.get("location").unwrap(), &json!("Tokyo"));
3124+
}
3125+
_ => panic!("Expected tool call as second content part"),
3126+
}
3127+
}
3128+
_ => panic!("Expected content part list for assistant output"),
3129+
}
3130+
3131+
// Verify that tool call attributes are preserved
3132+
assert_eq!(
3133+
span.attributes
3134+
.raw_attributes
3135+
.get("gen_ai.completion.0.tool_calls.0.name"),
3136+
Some(&json!("get_weather"))
3137+
);
3138+
assert_eq!(
3139+
span.attributes
3140+
.raw_attributes
3141+
.get("gen_ai.completion.0.tool_calls.0.id"),
3142+
Some(&json!("get_weather"))
3143+
);
3144+
assert_eq!(
3145+
span.attributes
3146+
.raw_attributes
3147+
.get("gen_ai.completion.0.tool_calls.0.arguments"),
3148+
Some(&json!("{\"location\":\"Tokyo\"}"))
3149+
);
3150+
3151+
// Verify that other attributes are preserved
3152+
assert_eq!(
3153+
span.attributes.raw_attributes.get("gen_ai.system"),
3154+
Some(&json!("gemini"))
3155+
);
3156+
assert_eq!(
3157+
span.attributes.raw_attributes.get("gen_ai.request.model"),
3158+
Some(&json!("gemini-2.5-flash-lite"))
3159+
);
3160+
assert_eq!(
3161+
span.attributes.raw_attributes.get("gen_ai.response.model"),
3162+
Some(&json!("gemini-2.5-flash-lite"))
3163+
);
3164+
assert_eq!(
3165+
span.attributes.raw_attributes.get("gen_ai.response.id"),
3166+
Some(&json!("F1CwaLjFLfOUxN8PhMGb-Qc"))
3167+
);
3168+
assert_eq!(
3169+
span.attributes
3170+
.raw_attributes
3171+
.get("gen_ai.usage.input_tokens"),
3172+
Some(&json!(66))
3173+
);
3174+
assert_eq!(
3175+
span.attributes
3176+
.raw_attributes
3177+
.get("gen_ai.usage.output_tokens"),
3178+
Some(&json!(39))
3179+
);
3180+
assert_eq!(
3181+
span.attributes.raw_attributes.get("llm.usage.total_tokens"),
3182+
Some(&json!(105))
3183+
);
3184+
}
29813185
}

0 commit comments

Comments
 (0)