Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@
import org.slf4j.LoggerFactory;

/**
* Merges multi-agent conversation messages for OpenAI HTTP API.
* Consolidates multiple agent messages into single user messages with history tags.
* Merges multi-agent conversation messages for OpenAI HTTP API. Consolidates multiple agent
* messages into single user messages with history tags.
*
* <p>This class combines all agent messages into a single user message with conversation
* history wrapped in special tags. Images and audio are preserved as separate ContentParts.
* <p>This class combines all agent messages into a single user message with conversation history
* wrapped in special tags. Images and audio are preserved as separate ContentParts.
*/
public class OpenAIConversationMerger {

Expand Down Expand Up @@ -132,10 +132,6 @@ private void processMessage(
List<OpenAIContentPart> allParts,
boolean includePrefix) {
String agentName = msg.getName();
String roleLabel = roleFormatter.apply(msg);
if (roleLabel == null) {
roleLabel = "Unknown";
}

// Process all blocks
List<ContentBlock> blocks = msg.getContent();
Expand All @@ -145,7 +141,7 @@ private void processMessage(
for (ContentBlock block : blocks) {
if (block instanceof TextBlock tb) {
if (includePrefix) {
appendRoleAndName(textBuffer, roleLabel, agentName);
appendNamePrefix(textBuffer, agentName);
}
textBuffer.append(tb.getText()).append("\n");

Expand All @@ -162,7 +158,7 @@ private void processMessage(
if (source == null) {
log.warn("ImageBlock has null source, skipping");
if (includePrefix) {
appendRoleAndName(textBuffer, roleLabel, agentName);
appendNamePrefix(textBuffer, agentName);
}
textBuffer.append("[Image - null source]\n");
} else {
Expand All @@ -174,7 +170,7 @@ private void processMessage(
e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName();
log.warn("Failed to process ImageBlock: {}", errorMsg);
if (includePrefix) {
appendRoleAndName(textBuffer, roleLabel, agentName);
appendNamePrefix(textBuffer, agentName);
}
textBuffer
.append("[Image - processing failed: ")
Expand All @@ -195,7 +191,7 @@ private void processMessage(
if (source == null) {
log.warn("VideoBlock has null source, skipping");
if (includePrefix) {
appendRoleAndName(textBuffer, roleLabel, agentName);
appendNamePrefix(textBuffer, agentName);
}
textBuffer.append("[Video - null source]\n");
} else {
Expand All @@ -207,7 +203,7 @@ private void processMessage(
e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName();
log.warn("Failed to process VideoBlock: {}", errorMsg);
if (includePrefix) {
appendRoleAndName(textBuffer, roleLabel, agentName);
appendNamePrefix(textBuffer, agentName);
}
textBuffer
.append("[Video - processing failed: ")
Expand All @@ -228,15 +224,15 @@ private void processMessage(
if (source == null) {
log.warn("AudioBlock has null source, skipping");
if (includePrefix) {
appendRoleAndName(textBuffer, roleLabel, agentName);
appendNamePrefix(textBuffer, agentName);
}
textBuffer.append("[Audio - null source]\n");
} else if (source instanceof Base64Source b64) {
String audioData = b64.getData();
if (audioData == null || audioData.isEmpty()) {
log.warn("Base64Source has null or empty data, skipping");
if (includePrefix) {
appendRoleAndName(textBuffer, roleLabel, agentName);
appendNamePrefix(textBuffer, agentName);
}
textBuffer.append("[Audio - null or empty data]\n");
} else {
Expand All @@ -248,30 +244,30 @@ private void processMessage(
if (url == null || url.isEmpty()) {
log.warn("URLSource has null or empty URL, skipping");
if (includePrefix) {
appendRoleAndName(textBuffer, roleLabel, agentName);
appendNamePrefix(textBuffer, agentName);
}
textBuffer.append("[Audio - null or empty URL]\n");
} else {
log.warn(
"URL-based audio not directly supported, using text"
+ " reference");
if (includePrefix) {
appendRoleAndName(textBuffer, roleLabel, agentName);
appendNamePrefix(textBuffer, agentName);
}
textBuffer.append("[Audio URL: ").append(url).append("]\n");
}
} else {
log.warn("Unknown audio source type: {}", source.getClass());
if (includePrefix) {
appendRoleAndName(textBuffer, roleLabel, agentName);
appendNamePrefix(textBuffer, agentName);
}
textBuffer.append("[Audio - unsupported source type]\n");
}
} catch (Exception e) {
String errorMsg =
e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName();
log.warn("Failed to process AudioBlock: {}", errorMsg);
appendRoleAndName(textBuffer, roleLabel, agentName);
appendNamePrefix(textBuffer, agentName);
textBuffer
.append("[Audio - processing failed: ")
.append(errorMsg)
Expand All @@ -281,11 +277,16 @@ private void processMessage(
} else if (block instanceof ThinkingBlock thinkingBlock) {
// Include ThinkingBlock in conversation history for models that support reasoning
if (includePrefix) {
appendRoleAndName(textBuffer, roleLabel, agentName);
}
String thinking = thinkingBlock.getThinking();
if (thinking != null && !thinking.isEmpty()) {
textBuffer.append("[Thinking]: ").append(thinking).append("\n");
appendNamePrefix(textBuffer, agentName);
String thinking = thinkingBlock.getThinking();
if (thinking != null && !thinking.isEmpty()) {
textBuffer.append("[Thinking]: ").append(thinking).append("\n");
}
} else {
String thinking = thinkingBlock.getThinking();
if (thinking != null && !thinking.isEmpty()) {
textBuffer.append("[Thinking]: ").append(thinking).append("\n");
}
}
} else if (block instanceof ToolResultBlock toolResult) {
// Use provided converter to handle multimodal content in tool results
Expand All @@ -295,13 +296,8 @@ private void processMessage(
? resultText
: "[Empty tool result]";

// For tool results, we format slightly differently to include tool name
textBuffer.append(roleLabel);
if (agentName != null
&& !agentName.equals(roleLabel)
&& !agentName.equals("Unknown")) {
textBuffer.append(" ").append(agentName);
}
// For tool results, format as: name (tool_name): result
textBuffer.append(agentName);
textBuffer
.append(" (")
.append(toolResult.getName())
Expand All @@ -312,31 +308,23 @@ private void processMessage(
}
}

private void appendRoleAndName(StringBuilder buffer, String roleLabel, String agentName) {
buffer.append(roleLabel);
if (agentName != null && !agentName.equals(roleLabel) && !agentName.equals("Unknown")) {
buffer.append(" ").append(agentName);
private void appendNamePrefix(StringBuilder buffer, String agentName) {
if (agentName != null && !agentName.isEmpty()) {
buffer.append(agentName).append(": ");
}
buffer.append(": ");
}

/**
* Convert image Source to URL string for OpenAI API.
*/
/** Convert image Source to URL string for OpenAI API. */
private String convertImageSourceToUrl(Source source) {
return OpenAIConverterUtils.convertImageSourceToUrl(source);
}

/**
* Convert video Source to URL string for OpenAI API.
*/
/** Convert video Source to URL string for OpenAI API. */
private String convertVideoSourceToUrl(Source source) {
return OpenAIConverterUtils.convertVideoSourceToUrl(source);
}

/**
* Detect audio format from media type.
*/
/** Detect audio format from media type. */
private String detectAudioFormat(String mediaType) {
return OpenAIConverterUtils.detectAudioFormat(mediaType);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -332,4 +332,187 @@ void testMixedContentWithNullHandling() {
assertTrue(content.contains("Message 1"), "Should contain first message");
assertTrue(content.contains("Message 2"), "Should contain second message");
}

@Test
@DisplayName("Should format history with only name prefix without roleLabel")
void testHistoryFormatWithNameOnly() {
List<Msg> messages = new ArrayList<>();

Msg msg1 =
Msg.builder()
.role(MsgRole.USER)
.name("Alice")
.content(List.of(TextBlock.builder().text("Hello").build()))
.build();

Msg msg2 =
Msg.builder()
.role(MsgRole.ASSISTANT)
.name("Bob")
.content(List.of(TextBlock.builder().text("Hi there").build()))
.build();

messages.add(msg1);
messages.add(msg2);

OpenAIMessage result =
merger.mergeToUserMessage(
messages, msg -> msg.getRole().toString(), blocks -> "Tool result");

assertNotNull(result);
String content = result.getContentAsString();
assertNotNull(content);

// Verify format is "name: text" without roleLabel
assertTrue(content.contains("Alice: Hello"), "Should format as 'Alice: Hello'");
assertTrue(content.contains("Bob: Hi there"), "Should format as 'Bob: Hi there'");

// Verify roleLabel (USER/ASSISTANT) is NOT present
int userIndex = content.indexOf("USER");
int assistantIndex = content.indexOf("ASSISTANT");
assertTrue(
userIndex == -1 || userIndex > content.indexOf("Alice: Hello"),
"Should not contain USER roleLabel before Alice's message");
assertTrue(
assistantIndex == -1 || assistantIndex > content.indexOf("Bob: Hi there"),
"Should not contain ASSISTANT roleLabel before Bob's message");
}

@Test
@DisplayName("Should format ToolResultBlock with name only")
void testToolResultFormatWithNameOnly() {
List<Msg> messages = new ArrayList<>();

io.agentscope.core.message.ToolResultBlock toolResult =
io.agentscope.core.message.ToolResultBlock.builder()
.name("search_tool")
.output(List.of(TextBlock.builder().text("Search completed").build()))
.build();

Msg msg =
Msg.builder()
.role(MsgRole.TOOL)
.name("ToolAgent")
.content(List.of(toolResult))
.build();

messages.add(msg);

OpenAIMessage result =
merger.mergeToUserMessage(
messages,
msg2 -> msg2.getRole().toString(),
blocks -> {
StringBuilder sb = new StringBuilder();
for (var block : blocks) {
if (block instanceof TextBlock tb) {
sb.append(tb.getText());
}
}
return sb.toString();
});

assertNotNull(result);
String content = result.getContentAsString();
assertNotNull(content);

// Verify format is "name (tool_name): result"
assertTrue(
content.contains("ToolAgent (search_tool): Search completed"),
"Should format as 'ToolAgent (search_tool): Search completed'");

// Verify roleLabel is NOT present
assertTrue(
!content.contains("TOOL ToolAgent"), "Should not contain 'TOOL ToolAgent' format");
}

@Test
@DisplayName("Should format multimodal content with name prefix only")
void testMultimodalFormatWithNameOnly() {
List<Msg> messages = new ArrayList<>();

URLSource imageSource = URLSource.builder().url("http://example.com/pic.jpg").build();
ImageBlock imageBlock = ImageBlock.builder().source(imageSource).build();

Msg msg1 =
Msg.builder()
.role(MsgRole.USER)
.name("Alice")
.content(List.of(TextBlock.builder().text("Look at this").build()))
.build();

Msg msg2 =
Msg.builder()
.role(MsgRole.ASSISTANT)
.name("Bob")
.content(
List.of(
TextBlock.builder().text("Interesting").build(),
imageBlock))
.build();

messages.add(msg1);
messages.add(msg2);

OpenAIMessage result =
merger.mergeToUserMessage(
messages, msg -> msg.getRole().toString(), blocks -> "Tool result");

assertNotNull(result);
assertTrue(result.isMultimodal() || result.getContentAsString() != null);

if (!result.isMultimodal()) {
String content = result.getContentAsString();
assertTrue(
content.contains("Alice: Look at this"),
"Should format as 'Alice: Look at this'");
assertTrue(content.contains("Bob: Interesting"), "Should format as 'Bob: Interesting'");
}
}

@Test
@DisplayName("Should handle ThinkingBlock with name prefix only")
void testThinkingBlockFormatWithNameOnly() {
List<Msg> messages = new ArrayList<>();

// Add a first message to make thinking message part of history
Msg msg1 =
Msg.builder()
.role(MsgRole.USER)
.name("User")
.content(List.of(TextBlock.builder().text("Question").build()))
.build();

io.agentscope.core.message.ThinkingBlock thinkingBlock =
io.agentscope.core.message.ThinkingBlock.builder()
.thinking("Let me analyze this...")
.build();

Msg msg2 =
Msg.builder()
.role(MsgRole.ASSISTANT)
.name("Thinker")
.content(
List.of(
thinkingBlock,
TextBlock.builder().text("My conclusion").build()))
.build();

messages.add(msg1);
messages.add(msg2);

OpenAIMessage result =
merger.mergeToUserMessage(
messages, msg -> msg.getRole().toString(), blocks -> "Tool result");

assertNotNull(result);
String content = result.getContentAsString();
assertNotNull(content);

assertTrue(
content.contains("Thinker: [Thinking]: Let me analyze this..."),
"Should include thinking with name prefix");
assertTrue(
content.contains("Thinker: My conclusion"), "Should include text with name prefix");
}
}
Loading