fix: improve streaming responses and tool message handling

fbzhong · fbzhong · commit e59957a30565 · 2025-07-24T01:45:19.000+08:00
- Add is_error field support for tool messages in Anthropic integration
- Fix OpenAI content serialization for assistant and tool messages
- Add StreamOptions to model parameters for better streaming control
- Correct Anthropic streaming response format with proper usage tracking
- Fix tool result handling to support error states and multiple tool results
- Add stream index parameter to streaming response converters
- Ensure proper content block handling across both providers
diff --git a/core/providers/anthropic.go b/core/providers/anthropic.go
@@ -515,6 +515,10 @@ func prepareAnthropicChatRequest(messages []schemas.BifrostMessage, params *sche
 					"tool_use_id": *msg.ToolMessage.ToolCallID,
 				}
 
+				if msg.ToolMessage.IsError != nil {
+					toolCallResult["is_error"] = *msg.ToolMessage.IsError
+				}
+
 				var toolCallResultContent []map[string]interface{}
 
 				if msg.Content.ContentStr != nil {
diff --git a/core/providers/openai.go b/core/providers/openai.go
@@ -222,12 +222,23 @@ func prepareOpenAIChatRequest(messages []schemas.BifrostMessage, params *schemas
 	for _, msg := range messages {
 		if msg.Role == schemas.ModelChatMessageRoleAssistant {
 			assistantMessage := map[string]interface{}{
-				"role":    msg.Role,
-				"content": msg.Content,
+				"role": msg.Role,
 			}
 			if msg.AssistantMessage != nil && msg.AssistantMessage.ToolCalls != nil {
 				assistantMessage["tool_calls"] = *msg.AssistantMessage.ToolCalls
 			}
+			if msg.Content.ContentStr != nil {
+				assistantMessage["content"] = *msg.Content.ContentStr
+			} else if msg.Content.ContentBlocks != nil && len(*msg.Content.ContentBlocks) > 0 {
+				var sb strings.Builder
+				for _, block := range *msg.Content.ContentBlocks {
+					if block.Text != nil && *block.Text != "" {
+						sb.WriteString(*block.Text)
+						sb.WriteString(" ")
+					}
+				}
+				assistantMessage["content"] = sb.String()
+			}
 			formattedMessages = append(formattedMessages, assistantMessage)
 		} else {
 			message := map[string]interface{}{
@@ -250,6 +261,24 @@ func prepareOpenAIChatRequest(messages []schemas.BifrostMessage, params *schemas
 
 			if msg.ToolMessage != nil && msg.ToolMessage.ToolCallID != nil {
 				message["tool_call_id"] = *msg.ToolMessage.ToolCallID
+				if msg.IsError != nil {
+					message["is_error"] = *msg.IsError
+				}
+
+				content := message["content"]
+				if contentBlocks, ok := content.([]schemas.ContentBlock); ok {
+					var sb strings.Builder
+					for _, block := range contentBlocks {
+						if block.Text != nil && *block.Text != "" {
+							sb.WriteString(*block.Text)
+							sb.WriteString(" ")
+						} else if block.ImageURL != nil {
+							sb.WriteString(block.ImageURL.URL)
+							sb.WriteString(" ")
+						}
+					}
+					message["content"] = sb.String()
+				}
 			}
 
 			formattedMessages = append(formattedMessages, message)
diff --git a/core/schemas/bifrost.go b/core/schemas/bifrost.go
@@ -10,6 +10,11 @@ const (
 	DefaultInitialPoolSize = 100
 )
 
+// StreamOptions represents the options for streaming requests.
+type StreamOptions struct {
+	IncludeUsage bool `json:"include_usage"`
+}
+
 // BifrostConfig represents the configuration for initializing a Bifrost instance.
 // It contains the necessary components for setting up the system including account details,
 // plugins, logging, and initial pool size.
@@ -161,19 +166,20 @@ type Fallback struct {
 // your request to the model. Bifrost follows a standard set of parameters which
 // mapped to the provider's parameters.
 type ModelParameters struct {
-	ToolChoice        *ToolChoice `json:"tool_choice,omitempty"`         // Whether to call a tool
-	Tools             *[]Tool     `json:"tools,omitempty"`               // Tools to use
-	Temperature       *float64    `json:"temperature,omitempty"`         // Controls randomness in the output
-	TopP              *float64    `json:"top_p,omitempty"`               // Controls diversity via nucleus sampling
-	TopK              *int        `json:"top_k,omitempty"`               // Controls diversity via top-k sampling
-	MaxTokens         *int        `json:"max_tokens,omitempty"`          // Maximum number of tokens to generate
-	StopSequences     *[]string   `json:"stop_sequences,omitempty"`      // Sequences that stop generation
-	PresencePenalty   *float64    `json:"presence_penalty,omitempty"`    // Penalizes repeated tokens
-	FrequencyPenalty  *float64    `json:"frequency_penalty,omitempty"`   // Penalizes frequent tokens
-	ParallelToolCalls *bool       `json:"parallel_tool_calls,omitempty"` // Enables parallel tool calls
-	EncodingFormat    *string     `json:"encoding_format,omitempty"`     // Format for embedding output (e.g., "float", "base64")
-	Dimensions        *int        `json:"dimensions,omitempty"`          // Number of dimensions for embedding output
-	User              *string     `json:"user,omitempty"`                // User identifier for tracking
+	ToolChoice        *ToolChoice    `json:"tool_choice,omitempty"`         // Whether to call a tool
+	Tools             *[]Tool        `json:"tools,omitempty"`               // Tools to use
+	Temperature       *float64       `json:"temperature,omitempty"`         // Controls randomness in the output
+	TopP              *float64       `json:"top_p,omitempty"`               // Controls diversity via nucleus sampling
+	TopK              *int           `json:"top_k,omitempty"`               // Controls diversity via top-k sampling
+	MaxTokens         *int           `json:"max_tokens,omitempty"`          // Maximum number of tokens to generate
+	StopSequences     *[]string      `json:"stop_sequences,omitempty"`      // Sequences that stop generation
+	PresencePenalty   *float64       `json:"presence_penalty,omitempty"`    // Penalizes repeated tokens
+	FrequencyPenalty  *float64       `json:"frequency_penalty,omitempty"`   // Penalizes frequent tokens
+	ParallelToolCalls *bool          `json:"parallel_tool_calls,omitempty"` // Enables parallel tool calls
+	EncodingFormat    *string        `json:"encoding_format,omitempty"`     // Format for embedding output (e.g., "float", "base64")
+	Dimensions        *int           `json:"dimensions,omitempty"`          // Number of dimensions for embedding output
+	User              *string        `json:"user,omitempty"`                // User identifier for tracking
+	StreamOptions     *StreamOptions `json:"stream_options,omitempty"`      // Stream options for streaming requests
 	// Dynamic parameters that can be provider-specific, they are directly
 	// added to the request as is.
 	ExtraParams map[string]interface{} `json:"-"`
@@ -351,6 +357,7 @@ type ContentBlock struct {
 // ToolMessage represents a message from a tool
 type ToolMessage struct {
 	ToolCallID *string `json:"tool_call_id,omitempty"`
+	IsError    *bool   `json:"is_error,omitempty"`
 }
 
 // AssistantMessage represents a message from an assistant
diff --git a/transports/bifrost-http/integrations/anthropic/types.go b/transports/bifrost-http/integrations/anthropic/types.go
@@ -3,7 +3,6 @@ package anthropic
 import (
 	"encoding/json"
 	"fmt"
-	"strings"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
@@ -21,6 +20,7 @@ type AnthropicContentBlock struct {
 	Name      *string               `json:"name,omitempty"`        // For tool_use content
 	Input     interface{}           `json:"input,omitempty"`       // For tool_use content
 	Content   AnthropicContent      `json:"content,omitempty"`     // For tool_result content
+	IsError   *bool                 `json:"is_error,omitempty"`    // For tool_result content
 	Source    *AnthropicImageSource `json:"source,omitempty"`      // For image content
 }
 
@@ -63,17 +63,18 @@ type AnthropicToolChoice struct {
 
 // AnthropicMessageRequest represents an Anthropic messages API request
 type AnthropicMessageRequest struct {
-	Model         string               `json:"model"`
-	MaxTokens     int                  `json:"max_tokens"`
-	Messages      []AnthropicMessage   `json:"messages"`
-	System        *AnthropicContent    `json:"system,omitempty"`
-	Temperature   *float64             `json:"temperature,omitempty"`
-	TopP          *float64             `json:"top_p,omitempty"`
-	TopK          *int                 `json:"top_k,omitempty"`
-	StopSequences *[]string            `json:"stop_sequences,omitempty"`
-	Stream        *bool                `json:"stream,omitempty"`
-	Tools         *[]AnthropicTool     `json:"tools,omitempty"`
-	ToolChoice    *AnthropicToolChoice `json:"tool_choice,omitempty"`
+	Model         string                 `json:"model"`
+	MaxTokens     int                    `json:"max_tokens"`
+	Messages      []AnthropicMessage     `json:"messages"`
+	System        *AnthropicContent      `json:"system,omitempty"`
+	Temperature   *float64               `json:"temperature,omitempty"`
+	TopP          *float64               `json:"top_p,omitempty"`
+	TopK          *int                   `json:"top_k,omitempty"`
+	StopSequences *[]string              `json:"stop_sequences,omitempty"`
+	Stream        *bool                  `json:"stream,omitempty"`
+	StreamOptions *schemas.StreamOptions `json:"stream_options,omitempty"`
+	Tools         *[]AnthropicTool       `json:"tools,omitempty"`
+	ToolChoice    *AnthropicToolChoice   `json:"tool_choice,omitempty"`
 }
 
 // IsStreamingRequested implements the StreamingRequest interface
@@ -126,6 +127,23 @@ type AnthropicStreamResponse struct {
 	Usage        *AnthropicUsage         `json:"usage,omitempty"`
 }
 
+func (s *AnthropicStreamResponse) ToSSE() string {
+	jsonData, err := json.Marshal(s)
+	if err != nil {
+		return "event: error\ndata: {\"type\": \"error\", \"error\": {\"type\": \"internal_error\", \"message\": \"Failed to marshal stream response\"}}\n\n"
+	}
+	return fmt.Sprintf("event: %s\ndata: %s\n\n", s.Type, string(jsonData))
+}
+
+func (s *AnthropicStreamResponse) SetModel(model string) {
+	if s.Model != nil {
+		*s.Model = model
+	}
+	if s.Message != nil && s.Message.Model != "" {
+		s.Message.Model = model
+	}
+}
+
 // AnthropicStreamMessage represents the message structure in streaming events
 type AnthropicStreamMessage struct {
 	ID           string                  `json:"id"`
@@ -238,6 +256,8 @@ func (r *AnthropicMessageRequest) ConvertToBifrostRequest() *schemas.BifrostRequ
 			var toolCalls []schemas.ToolCall
 			var contentBlocks []schemas.ContentBlock
 
+			skipAppendMessage := false
+
 			for _, content := range *msg.Content.ContentBlocks {
 				switch content.Type {
 				case "text":
@@ -281,48 +301,69 @@ func (r *AnthropicMessageRequest) ConvertToBifrostRequest() *schemas.BifrostRequ
 						toolCalls = append(toolCalls, tc)
 					}
 				case "tool_result":
-					if content.ToolUseID != nil {
-						bifrostMsg.ToolMessage = &schemas.ToolMessage{
-							ToolCallID: content.ToolUseID,
-						}
-						if content.Content.ContentStr != nil {
-							contentBlocks = append(contentBlocks, schemas.ContentBlock{
-								Type: schemas.ContentBlockTypeText,
-								Text: content.Content.ContentStr,
-							})
-						} else if content.Content.ContentBlocks != nil {
-							for _, block := range *content.Content.ContentBlocks {
-								if block.Text != nil {
-									contentBlocks = append(contentBlocks, schemas.ContentBlock{
-										Type: schemas.ContentBlockTypeText,
-										Text: block.Text,
-									})
-								} else if block.Source != nil {
-									contentBlocks = append(contentBlocks, schemas.ContentBlock{
-										Type: schemas.ContentBlockTypeImage,
-										ImageURL: &schemas.ImageURLStruct{
-											URL: func() string {
-												if block.Source.Data != nil {
-													mime := "image/png"
-													if block.Source.MediaType != nil && *block.Source.MediaType != "" {
-														mime = *block.Source.MediaType
-													}
-													return "data:" + mime + ";base64," + *block.Source.Data
-												}
-												if block.Source.URL != nil {
-													return *block.Source.URL
+					if content.ToolUseID == nil || *content.ToolUseID == "" {
+						continue
+					}
+
+					skipAppendMessage = true
+
+					bifrostMsg.Role = schemas.ModelChatMessageRoleTool
+					bifrostMsg.ToolMessage = &schemas.ToolMessage{
+						ToolCallID: content.ToolUseID,
+						IsError:    content.IsError,
+					}
+					if content.Content.ContentStr != nil {
+						contentBlocks = append(contentBlocks, schemas.ContentBlock{
+							Type: schemas.ContentBlockTypeText,
+							Text: content.Content.ContentStr,
+						})
+					} else if content.Content.ContentBlocks != nil {
+						for _, block := range *content.Content.ContentBlocks {
+							if block.Text != nil {
+								contentBlocks = append(contentBlocks, schemas.ContentBlock{
+									Type: schemas.ContentBlockTypeText,
+									Text: block.Text,
+								})
+							} else if block.Source != nil {
+								contentBlocks = append(contentBlocks, schemas.ContentBlock{
+									Type: schemas.ContentBlockTypeImage,
+									ImageURL: &schemas.ImageURLStruct{
+										URL: func() string {
+											if block.Source.Data != nil {
+												mime := "image/png"
+												if block.Source.MediaType != nil && *block.Source.MediaType != "" {
+													mime = *block.Source.MediaType
 												}
-												return ""
-											}()},
-									})
-								}
+												return "data:" + mime + ";base64," + *block.Source.Data
+											}
+											if block.Source.URL != nil {
+												return *block.Source.URL
+											}
+											return ""
+										}()},
+								})
 							}
 						}
-						bifrostMsg.Role = schemas.ModelChatMessageRoleTool
 					}
+
+					if len(contentBlocks) > 0 {
+						blocks := make([]schemas.ContentBlock, len(contentBlocks))
+						copy(blocks, contentBlocks)
+						bifrostMsg.Content = schemas.MessageContent{
+							ContentBlocks: &blocks,
+						}
+						messages = append(messages, bifrostMsg)
+						bifrostMsg = schemas.BifrostMessage{}
+						contentBlocks = contentBlocks[:0]
+					}
+					continue
 				}
 			}
 
+			if skipAppendMessage {
+				continue
+			}
+
 			// Concatenate all text contents
 			if len(contentBlocks) > 0 {
 				bifrostMsg.Content = schemas.MessageContent{
@@ -360,6 +401,9 @@ func (r *AnthropicMessageRequest) ConvertToBifrostRequest() *schemas.BifrostRequ
 		if r.StopSequences != nil {
 			params.StopSequences = r.StopSequences
 		}
+		if r.StreamOptions != nil {
+			params.StreamOptions = r.StreamOptions
+		}
 
 		bifrostReq.Params = params
 	}
@@ -450,6 +494,9 @@ func DeriveAnthropicFromBifrostResponse(bifrostResp *schemas.BifrostResponse) *A
 			InputTokens:  bifrostResp.Usage.PromptTokens,
 			OutputTokens: bifrostResp.Usage.CompletionTokens,
 		}
+		if bifrostResp.Usage.TokenDetails != nil {
+			anthropicResp.Usage.CacheReadInputTokens = bifrostResp.Usage.TokenDetails.CachedTokens
+		}
 	}
 
 	// Convert choices to content
@@ -521,9 +568,9 @@ func DeriveAnthropicFromBifrostResponse(bifrostResp *schemas.BifrostResponse) *A
 }
 
 // DeriveAnthropicStreamFromBifrostResponse converts a Bifrost streaming response to Anthropic SSE string format
-func DeriveAnthropicStreamFromBifrostResponse(bifrostResp *schemas.BifrostResponse, streamIndex int) string {
+func DeriveAnthropicStreamFromBifrostResponse(bifrostResp *schemas.BifrostResponse, streamIndex int) []*AnthropicStreamResponse {
 	if bifrostResp == nil {
-		return ""
+		return nil
 	}
 
 	var streamRespList []*AnthropicStreamResponse
@@ -550,6 +597,13 @@ func DeriveAnthropicStreamFromBifrostResponse(bifrostResp *schemas.BifrostRespon
 					if bifrostResp.Usage.TokenDetails != nil {
 						usage.CacheReadInputTokens = bifrostResp.Usage.TokenDetails.CachedTokens
 					}
+				} else {
+					// Default to 1 token for input and output, e.g. for DeepSeek api.
+					// Return the actual usage in the final message delta.
+					usage = &AnthropicUsage{
+						InputTokens:  1,
+						OutputTokens: 1,
+					}
 				}
 				streamResp.Type = "message_start"
 				streamResp.Message = &AnthropicStreamMessage{
@@ -625,14 +679,19 @@ func DeriveAnthropicStreamFromBifrostResponse(bifrostResp *schemas.BifrostRespon
 				})
 
 				// Handle message delta
+				usage := &AnthropicUsage{
+					OutputTokens: bifrostResp.Usage.CompletionTokens,
+					InputTokens:  bifrostResp.Usage.PromptTokens,
+				}
+				if bifrostResp.Usage.TokenDetails != nil {
+					usage.CacheReadInputTokens = bifrostResp.Usage.TokenDetails.CachedTokens
+				}
 				streamResp = &AnthropicStreamResponse{
 					Type: "message_delta",
 					Delta: &AnthropicStreamDelta{
 						StopReason: choice.FinishReason,
 					},
-					Usage: &AnthropicUsage{
-						OutputTokens: bifrostResp.Usage.CompletionTokens,
-					},
+					Usage: usage,
 				}
 			}
 
@@ -668,24 +727,17 @@ func DeriveAnthropicStreamFromBifrostResponse(bifrostResp *schemas.BifrostRespon
 
 	}
 
-	var sb strings.Builder
+	result := make([]*AnthropicStreamResponse, 0, len(streamRespList))
 	for _, streamResp := range streamRespList {
 		// Ignore empty stream responses
 		if streamResp.Type == "" {
 			continue
 		}
 
-		// Marshal to JSON and format as SSE
-		jsonData, err := json.Marshal(streamResp)
-		if err != nil {
-			return ""
-		}
-
-		// Format as Anthropic SSE
-		sb.WriteString(fmt.Sprintf("event: %s\ndata: %s\n\n", streamResp.Type, jsonData))
+		result = append(result, streamResp)
 	}
 
-	return sb.String()
+	return result
 }
 
 // DeriveAnthropicErrorFromBifrostError derives a AnthropicMessageError from a BifrostError
diff --git a/transports/bifrost-http/integrations/openai/types.go b/transports/bifrost-http/integrations/openai/types.go
diff --git a/transports/go.mod b/transports/go.mod

Original file line number	Diff line number	Diff line change
`@@ -515,6 +515,10 @@ func prepareAnthropicChatRequest(messages []schemas.BifrostMessage, params *sche`
`515`	`515`	`"tool_use_id": *msg.ToolMessage.ToolCallID,`
`516`	`516`	`}`
`517`	`517`
	`518`	`+ if msg.ToolMessage.IsError != nil {`
	`519`	`+ toolCallResult["is_error"] = *msg.ToolMessage.IsError`
	`520`	`+ }`
	`521`	`+`
`518`	`522`	`var toolCallResultContent []map[string]interface{}`
`519`	`523`
`520`	`524`	`if msg.Content.ContentStr != nil {`