Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 13 additions & 31 deletions crates/lingua/src/processing/import.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::processing::import_openai_responses::try_import_openai_responses;
use crate::providers::anthropic::generated as anthropic;
use crate::providers::openai::convert::ChatCompletionRequestMessageExt;
use crate::providers::openai::generated as openai;
use crate::serde_json;
use crate::serde_json::Value;
use crate::universal::convert::TryFromLLM;
Expand Down Expand Up @@ -57,6 +57,10 @@ fn has_message_structure(data: &Value) -> bool {

/// Try to convert a value to lingua messages by attempting multiple format conversions
fn try_converting_to_messages(data: &Value) -> Vec<Message> {
if let Some(messages) = try_import_openai_responses(data) {
return messages;
}

// Early bailout: if data doesn't have message structure, skip expensive deserializations
if !has_message_structure(data) {
// Still try nested object search (for wrapped messages like {messages: [...]})
Expand Down Expand Up @@ -104,32 +108,6 @@ fn try_converting_to_messages(data: &Value) -> Vec<Message> {
}
}

// Try Responses API format
if let Ok(provider_messages) =
serde_json::from_value::<Vec<openai::InputItem>>(data_to_parse.clone())
{
if let Ok(messages) =
<Vec<Message> as TryFromLLM<Vec<openai::InputItem>>>::try_from(provider_messages)
{
if !messages.is_empty() {
return messages;
}
}
}

// Try Responses API output format
if let Ok(provider_messages) =
serde_json::from_value::<Vec<openai::OutputItem>>(data_to_parse.clone())
{
if let Ok(messages) =
<Vec<Message> as TryFromLLM<Vec<openai::OutputItem>>>::try_from(provider_messages)
{
if !messages.is_empty() {
return messages;
}
}
}

// Try Anthropic format (including role-based system/developer messages).
if let Some(anthropic_messages) = try_anthropic_or_system_messages(data_to_parse) {
if !anthropic_messages.is_empty() {
Expand Down Expand Up @@ -265,7 +243,7 @@ fn parse_user_content(value: &Value) -> Option<UserContent> {
for item in arr {
if let Some(obj) = item.as_object() {
if let Some(Value::String(text_type)) = obj.get("type") {
if text_type == "text" {
if matches!(text_type.as_str(), "text" | "input_text" | "output_text") {
if let Some(Value::String(text)) = obj.get("text") {
parts.push(UserContentPart::Text(TextContentPart {
text: text.clone(),
Expand Down Expand Up @@ -296,7 +274,7 @@ fn parse_assistant_content(value: &Value) -> Option<AssistantContent> {
for item in arr {
if let Some(obj) = item.as_object() {
if let Some(Value::String(text_type)) = obj.get("type") {
if text_type == "text" {
if matches!(text_type.as_str(), "text" | "output_text" | "input_text") {
if let Some(Value::String(text)) = obj.get("text") {
parts.push(crate::universal::AssistantContentPart::Text(
TextContentPart {
Expand Down Expand Up @@ -425,8 +403,12 @@ pub fn import_messages_from_spans(spans: Vec<Span>) -> Vec<Message> {
let mut messages = Vec::new();

for span in spans {
// Try to extract messages from input
if let Some(input) = &span.input {
// Preserve raw string prompts as user messages (common in Responses API traces).
if let Some(Value::String(input_text)) = &span.input {
messages.push(Message::User {
content: UserContent::String(input_text.clone()),
});
} else if let Some(input) = &span.input {
let input_messages = try_converting_to_messages(input);
messages.extend(input_messages);
}
Expand Down
265 changes: 265 additions & 0 deletions crates/lingua/src/processing/import_openai_responses.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
use crate::providers::openai::generated as openai;
use crate::serde_json;
use crate::universal::convert::TryFromLLM;
use crate::universal::{AssistantContent, AssistantContentPart, Message, TextContentPart};
use serde::de::Deserializer;
use serde::{Deserialize, Serialize};

#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
enum ResponsesImportItemKind {
#[serde(rename = "function_call_output")]
FunctionCallOutput,
#[serde(rename = "function_call_result")]
FunctionCallResult,
#[serde(rename = "custom_tool_call_output")]
CustomToolCallOutput,
#[serde(rename = "image_generation_call")]
ImageGenerationCall,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
enum ResponsesImportItemType {
Known(ResponsesImportItemKind),
Other(String),
}

#[derive(Debug, Deserialize)]
struct ResponsesImportItemKindProbe {
#[serde(rename = "type")]
item_type: ResponsesImportItemKind,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
struct ResponsesImportCallIdCompatItem {
#[serde(rename = "type", skip_serializing_if = "Option::is_none")]
item_type: Option<ResponsesImportItemType>,
#[serde(default, alias = "callId", skip_serializing_if = "Option::is_none")]
call_id: Option<serde_json::Value>,
#[serde(flatten)]
extra: serde_json::Map<String, serde_json::Value>,
}

#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
enum ResponsesImportFunctionOutputKind {
#[serde(rename = "function_call_output")]
FunctionCallOutput,
#[serde(rename = "function_call_result")]
FunctionCallResult,
#[serde(rename = "custom_tool_call_output")]
CustomToolCallOutput,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
struct ResponsesImportFunctionOutputCompatItem {
#[serde(rename = "type")]
item_type: ResponsesImportFunctionOutputKind,
#[serde(default, alias = "callId", skip_serializing_if = "Option::is_none")]
call_id: Option<serde_json::Value>,
#[serde(default, deserialize_with = "deserialize_optional_string_or_json")]
output: Option<String>,
#[serde(flatten)]
extra: serde_json::Map<String, serde_json::Value>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
struct ResponsesImportImageGenerationCompatItem {
#[serde(rename = "type")]
item_type: ResponsesImportItemKind,
#[serde(default, alias = "callId", skip_serializing_if = "Option::is_none")]
call_id: Option<serde_json::Value>,
#[serde(default, deserialize_with = "deserialize_optional_string_or_json")]
result: Option<String>,
#[serde(flatten)]
extra: serde_json::Map<String, serde_json::Value>,
}

fn deserialize_optional_string_or_json<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
where
D: Deserializer<'de>,
{
let value = Option::<serde_json::Value>::deserialize(deserializer)?;
let Some(value) = value else {
return Ok(None);
};

match value {
serde_json::Value::String(s) => Ok(Some(s)),
other => Ok(Some(other.to_string())),
}
}

fn normalize_responses_import_item_value(
original: &serde_json::Value,
) -> Option<serde_json::Value> {
let original = original.clone();
let item_kind = serde_json::from_value::<ResponsesImportItemKindProbe>(original.clone())
.ok()
.map(|probe| probe.item_type);

let normalized = match item_kind {
Some(
ResponsesImportItemKind::FunctionCallOutput
| ResponsesImportItemKind::FunctionCallResult,
)
| Some(ResponsesImportItemKind::CustomToolCallOutput) => {
let mut compat =
serde_json::from_value::<ResponsesImportFunctionOutputCompatItem>(original.clone())
.ok()?;
if matches!(
compat.item_type,
ResponsesImportFunctionOutputKind::FunctionCallResult
) {
compat.item_type = ResponsesImportFunctionOutputKind::FunctionCallOutput;
}
serde_json::to_value(compat).ok()?
}
Some(ResponsesImportItemKind::ImageGenerationCall) => {
let compat = serde_json::from_value::<ResponsesImportImageGenerationCompatItem>(
original.clone(),
)
.ok()?;
serde_json::to_value(compat).ok()?
}
_ => {
let compat =
serde_json::from_value::<ResponsesImportCallIdCompatItem>(original.clone()).ok()?;
serde_json::to_value(compat).ok()?
}
};

if normalized == original {
None
} else {
Some(normalized)
}
}

fn normalize_responses_import_items(data: &serde_json::Value) -> Option<serde_json::Value> {
let arr = serde_json::from_value::<Vec<serde_json::Value>>(data.clone()).ok()?;
let mut changed = false;
let mut normalized = Vec::with_capacity(arr.len());

for item in arr {
if let Some(normalized_item) = normalize_responses_import_item_value(&item) {
changed = true;
normalized.push(normalized_item);
} else {
normalized.push(item);
}
}

if changed {
Some(serde_json::Value::Array(normalized))
} else {
None
}
}

fn merge_adjacent_reasoning_assistant_messages(messages: Vec<Message>) -> Vec<Message> {
let mut merged: Vec<Message> = Vec::with_capacity(messages.len());

for message in messages {
let can_merge = match (merged.last(), &message) {
(
Some(Message::Assistant {
content: AssistantContent::Array(prev_parts),
..
}),
Message::Assistant { .. },
) => {
!prev_parts.is_empty()
&& prev_parts
.iter()
.all(|part| matches!(part, AssistantContentPart::Reasoning { .. }))
}
_ => false,
};

if !can_merge {
merged.push(message);
continue;
}

let previous = merged.pop().expect("previous message should exist");
let Message::Assistant {
content: AssistantContent::Array(reasoning_parts),
id: reasoning_id,
} = previous
else {
unreachable!("checked previous assistant reasoning message above");
};

let Message::Assistant {
content: next_content,
id: next_id,
} = message
else {
unreachable!("checked current assistant message above");
};

let mut combined_parts = reasoning_parts;
match next_content {
AssistantContent::Array(parts) => combined_parts.extend(parts),
AssistantContent::String(text) => {
combined_parts.push(AssistantContentPart::Text(TextContentPart {
text,
encrypted_content: None,
provider_options: None,
}))
}
}

merged.push(Message::Assistant {
content: AssistantContent::Array(combined_parts),
id: next_id.or(reasoning_id),
});
}

merged
}

fn try_from_responses_items_candidate(candidate: &serde_json::Value) -> Option<Vec<Message>> {
let wrapped;
let candidate = if candidate.is_object() {
wrapped = serde_json::Value::Array(vec![candidate.clone()]);
&wrapped
} else {
candidate
};

if let Ok(provider_messages) =
serde_json::from_value::<Vec<openai::InputItem>>(candidate.clone())
{
if let Ok(messages) =
<Vec<Message> as TryFromLLM<Vec<openai::InputItem>>>::try_from(provider_messages)
{
if !messages.is_empty() {
return Some(merge_adjacent_reasoning_assistant_messages(messages));
}
}
}

if let Ok(provider_messages) =
serde_json::from_value::<Vec<openai::OutputItem>>(candidate.clone())
{
if let Ok(messages) =
<Vec<Message> as TryFromLLM<Vec<openai::OutputItem>>>::try_from(provider_messages)
{
if !messages.is_empty() {
return Some(merge_adjacent_reasoning_assistant_messages(messages));
}
}
}

None
}

pub(crate) fn try_import_openai_responses(data: &serde_json::Value) -> Option<Vec<Message>> {
if let Some(messages) = try_from_responses_items_candidate(data) {
return Some(messages);
}

let normalized = normalize_responses_import_items(data)?;
try_from_responses_items_candidate(&normalized)
}
1 change: 1 addition & 0 deletions crates/lingua/src/processing/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pub mod adapters;
pub mod dedup;
pub mod import;
mod import_openai_responses;
pub mod transform;

pub use adapters::{
Expand Down
13 changes: 7 additions & 6 deletions crates/lingua/src/providers/openai/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1961,13 +1961,14 @@ impl TryFromLLM<Vec<openai::OutputItem>> for Vec<Message> {
}
};

// Only create a message if there are parts
if !parts.is_empty() {
messages.push(Message::Assistant {
content: AssistantContent::Array(parts),
id: item_id,
});
if parts.is_empty() {
continue;
}

messages.push(Message::Assistant {
content: AssistantContent::Array(parts),
id: item_id,
});
}

Ok(messages)
Expand Down
4 changes: 4 additions & 0 deletions payloads/import-cases/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ Supported keys:
- `expectedRolesInOrder` (string array)
- `mustContainText` (string array)

Notes:

- Unknown keys are ignored by the current test runner. We use `_migrationNote` to document intentional expectation changes when porting from older frontend converter tests.

## Test modes

Default mode (strict):
Expand Down
6 changes: 6 additions & 0 deletions payloads/import-cases/adk-basic-input-output.assertions.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading