feat: enhance Azure OpenAI compatibility and streaming reliability

fbzhong · fbzhong · commit f15544e4143c · 2025-07-24T22:10:58.000+08:00
- Add Azure OpenAI content filtering support (PromptFilterResults, ContentFilterResults)
- Improve OpenAI streaming flow with consistent response handling
- Add LLMUsage.Clone() method for safe usage object duplication
- Remove unused SetModel method from AnthropicStreamResponse
- Fix streaming termination issues with usage-only chunks
- Enhance response structure compatibility across providers
diff --git a/core/providers/anthropic.go b/core/providers/anthropic.go
@@ -1171,7 +1171,6 @@ func handleAnthropicStreaming(
 				// Handle delta changes to the top-level message
 				if event.Usage != nil && usage != nil {
 					usage.OutputTokens = event.Usage.OutputTokens
-					usage.CacheCreationInputTokens = event.Usage.CacheCreationInputTokens
 				}
 
 				// Send usage information immediately if present
diff --git a/core/providers/openai.go b/core/providers/openai.go
@@ -594,6 +594,9 @@ func handleOpenAIStreaming(
 
 			// Handle usage-only chunks (when stream_options include_usage is true)
 			if len(response.Choices) == 0 && response.Usage != nil {
+				// Empty choices array.
+				response.Choices = []schemas.BifrostResponseChoice{}
+
 				// This is a usage information chunk at the end of stream
 				if params != nil {
 					response.ExtraFields.Params = *params
@@ -619,9 +622,7 @@ func handleOpenAIStreaming(
 				response.ExtraFields.Provider = providerType
 
 				processAndSendResponse(ctx, postHookRunner, &response, responseChan)
-
-				// End stream processing after finish reason
-				break
+				continue
 			}
 
 			// Handle regular content chunks
@@ -632,6 +633,7 @@ func handleOpenAIStreaming(
 				response.ExtraFields.Provider = providerType
 
 				processAndSendResponse(ctx, postHookRunner, &response, responseChan)
+				continue
 			}
 		}
 
diff --git a/core/schemas/bifrost.go b/core/schemas/bifrost.go
@@ -378,18 +378,41 @@ type ImageURLStruct struct {
 
 // BifrostResponse represents the complete result from any bifrost request.
 type BifrostResponse struct {
-	ID                string                     `json:"id,omitempty"`
-	Object            string                     `json:"object,omitempty"` // text.completion, chat.completion, or embedding
-	Choices           []BifrostResponseChoice    `json:"choices,omitempty"`
-	Embedding         [][]float32                `json:"data,omitempty"`       // Maps to "data" field in provider responses (e.g., OpenAI embedding format)
-	Speech            *BifrostSpeech             `json:"speech,omitempty"`     // Maps to "speech" field in provider responses (e.g., OpenAI speech format)
-	Transcribe        *BifrostTranscribe         `json:"transcribe,omitempty"` // Maps to "transcribe" field in provider responses (e.g., OpenAI transcription format)
-	Model             string                     `json:"model,omitempty"`
-	Created           int                        `json:"created,omitempty"` // The Unix timestamp (in seconds).
-	ServiceTier       *string                    `json:"service_tier,omitempty"`
-	SystemFingerprint *string                    `json:"system_fingerprint,omitempty"`
-	Usage             *LLMUsage                  `json:"usage,omitempty"`
-	ExtraFields       BifrostResponseExtraFields `json:"extra_fields"`
+	ID                  string                     `json:"id,omitempty"`
+	Object              string                     `json:"object,omitempty"` // text.completion, chat.completion, or embedding
+	Choices             []BifrostResponseChoice    `json:"choices,omitempty"`
+	Embedding           [][]float32                `json:"data,omitempty"`       // Maps to "data" field in provider responses (e.g., OpenAI embedding format)
+	Speech              *BifrostSpeech             `json:"speech,omitempty"`     // Maps to "speech" field in provider responses (e.g., OpenAI speech format)
+	Transcribe          *BifrostTranscribe         `json:"transcribe,omitempty"` // Maps to "transcribe" field in provider responses (e.g., OpenAI transcription format)
+	Model               string                     `json:"model,omitempty"`
+	Created             int                        `json:"created,omitempty"` // The Unix timestamp (in seconds).
+	ServiceTier         *string                    `json:"service_tier,omitempty"`
+	SystemFingerprint   *string                    `json:"system_fingerprint,omitempty"`
+	Usage               *LLMUsage                  `json:"usage,omitempty"`
+	PromptFilterResults *[]PromptFilterResult      `json:"prompt_filter_results,omitempty"` // Azure OpenAI Service
+	ExtraFields         BifrostResponseExtraFields `json:"extra_fields"`
+}
+
+// FilterResult represents the result of a content filter.
+type FilterResult struct {
+	Filtered bool `json:"filtered"`
+	Severity bool `json:"severity"`
+}
+
+// ContentFilterResult represents the result of a content filter.
+type ContentFilterResult struct {
+	HateSpeech FilterResult `json:"hate_speech,omitempty"`
+	SelfHarm   FilterResult `json:"self_harm,omitempty"`
+	Sexual     FilterResult `json:"sexual,omitempty"`
+	Violence   FilterResult `json:"violence,omitempty"`
+	Jailbreak  FilterResult `json:"jailbreak,omitempty"`
+	Profanity  FilterResult `json:"profanity,omitempty"`
+}
+
+// PromptFilterResult represents the result of a prompt filter.
+type PromptFilterResult struct {
+	PromptIndex          int                  `json:"prompt_index"`
+	ContentFilterResults *ContentFilterResult `json:"content_filter_results"`
 }
 
 // LLMUsage represents token usage information
@@ -401,6 +424,36 @@ type LLMUsage struct {
 	CompletionTokensDetails *CompletionTokensDetails `json:"completion_tokens_details,omitempty"`
 }
 
+func (u *LLMUsage) Clone() *LLMUsage {
+	if u == nil {
+		return nil
+	}
+
+	ret := &LLMUsage{
+		PromptTokens:     u.PromptTokens,
+		CompletionTokens: u.CompletionTokens,
+		TotalTokens:      u.TotalTokens,
+	}
+
+	if u.TokenDetails != nil {
+		ret.TokenDetails = &TokenDetails{
+			CachedTokens: u.TokenDetails.CachedTokens,
+			AudioTokens:  u.TokenDetails.AudioTokens,
+		}
+	}
+
+	if u.CompletionTokensDetails != nil {
+		ret.CompletionTokensDetails = &CompletionTokensDetails{
+			ReasoningTokens:          u.CompletionTokensDetails.ReasoningTokens,
+			AudioTokens:              u.CompletionTokensDetails.AudioTokens,
+			AcceptedPredictionTokens: u.CompletionTokensDetails.AcceptedPredictionTokens,
+			RejectedPredictionTokens: u.CompletionTokensDetails.RejectedPredictionTokens,
+		}
+	}
+
+	return ret
+}
+
 type AudioLLMUsage struct {
 	InputTokens        int                `json:"input_tokens"`
 	InputTokensDetails *AudioTokenDetails `json:"input_tokens_details,omitempty"`
@@ -501,8 +554,9 @@ type Annotation struct {
 // IMPORTANT: Only one of BifrostNonStreamResponseChoice or BifrostStreamResponseChoice
 // should be non-nil at a time.
 type BifrostResponseChoice struct {
-	Index        int     `json:"index"`
-	FinishReason *string `json:"finish_reason,omitempty"`
+	Index                int                  `json:"index"`
+	FinishReason         *string              `json:"finish_reason,omitempty"`
+	ContentFilterResults *ContentFilterResult `json:"content_filter_results,omitempty"` // Azure OpenAI Service or DeepSeek
 
 	*BifrostNonStreamResponseChoice
 	*BifrostStreamResponseChoice
diff --git a/transports/bifrost-http/integrations/anthropic/types.go b/transports/bifrost-http/integrations/anthropic/types.go
@@ -135,15 +135,6 @@ func (s *AnthropicStreamResponse) ToSSE() string {
 	return fmt.Sprintf("event: %s\ndata: %s\n\n", s.Type, string(jsonData))
 }
 
-func (s *AnthropicStreamResponse) SetModel(model string) {
-	if s.Model != nil {
-		*s.Model = model
-	}
-	if s.Message != nil && s.Message.Model != "" {
-		s.Message.Model = model
-	}
-}
-
 // AnthropicStreamMessage represents the message structure in streaming events
 type AnthropicStreamMessage struct {
 	ID           string                  `json:"id"`
diff --git a/transports/bifrost-http/integrations/openai/types.go b/transports/bifrost-http/integrations/openai/types.go
@@ -1,6 +1,9 @@
 package openai
 
 import (
+	"encoding/json"
+	"fmt"
+
 	"github.com/maximhq/bifrost/core/schemas"
 	"github.com/maximhq/bifrost/transports/bifrost-http/integrations"
 )
@@ -70,14 +73,15 @@ func (r *OpenAITranscriptionRequest) IsStreamingRequested() bool {
 
 // OpenAIChatResponse represents an OpenAI chat completion response
 type OpenAIChatResponse struct {
-	ID                string                          `json:"id"`
-	Object            string                          `json:"object"`
-	Created           int                             `json:"created"`
-	Model             string                          `json:"model"`
-	Choices           []schemas.BifrostResponseChoice `json:"choices"`
-	Usage             *schemas.LLMUsage               `json:"usage,omitempty"` // Reuse schema type
-	ServiceTier       *string                         `json:"service_tier,omitempty"`
-	SystemFingerprint *string                         `json:"system_fingerprint,omitempty"`
+	ID                  string                          `json:"id"`
+	Object              string                          `json:"object"`
+	Created             int                             `json:"created"`
+	Model               string                          `json:"model"`
+	Choices             []schemas.BifrostResponseChoice `json:"choices"`
+	Usage               *schemas.LLMUsage               `json:"usage,omitempty"` // Reuse schema type
+	ServiceTier         *string                         `json:"service_tier,omitempty"`
+	SystemFingerprint   *string                         `json:"system_fingerprint,omitempty"`
+	PromptFilterResults *[]schemas.PromptFilterResult   `json:"prompt_filter_results,omitempty"`
 }
 
 // OpenAIChatError represents an OpenAI chat completion error response
@@ -93,6 +97,11 @@ type OpenAIChatError struct {
 	} `json:"error"`
 }
 
+func (e *OpenAIChatError) ToSSE() string {
+	data, _ := json.Marshal(e)
+	return fmt.Sprintf("data: %s\n\n", data)
+}
+
 // OpenAIChatErrorStruct represents the error structure of an OpenAI chat completion error response
 type OpenAIChatErrorStruct struct {
 	Type    string      `json:"type"`     // Error type
@@ -104,10 +113,11 @@ type OpenAIChatErrorStruct struct {
 
 // OpenAIStreamChoice represents a choice in a streaming response chunk
 type OpenAIStreamChoice struct {
-	Index        int                `json:"index"`
-	Delta        *OpenAIStreamDelta `json:"delta,omitempty"`
-	FinishReason *string            `json:"finish_reason,omitempty"`
-	LogProbs     *schemas.LogProbs  `json:"logprobs,omitempty"`
+	Index                int                          `json:"index"`
+	Delta                *OpenAIStreamDelta           `json:"delta,omitempty"`
+	FinishReason         *string                      `json:"finish_reason,omitempty"`
+	LogProbs             *schemas.LogProbs            `json:"logprobs,omitempty"`
+	ContentFilterResults *schemas.ContentFilterResult `json:"content_filter_results,omitempty"`
 }
 
 // OpenAIStreamDelta represents the incremental content in a streaming chunk
@@ -128,6 +138,11 @@ type OpenAIStreamResponse struct {
 	Usage             *schemas.LLMUsage    `json:"usage,omitempty"`
 }
 
+func (r *OpenAIStreamResponse) ToSSE() string {
+	data, _ := json.Marshal(r)
+	return fmt.Sprintf("data: %s\n\n", data)
+}
+
 // ConvertToBifrostRequest converts an OpenAI chat request to Bifrost format
 func (r *OpenAIChatRequest) ConvertToBifrostRequest() *schemas.BifrostRequest {
 	provider, model := integrations.ParseModelString(r.Model, schemas.OpenAI)
@@ -314,14 +329,15 @@ func DeriveOpenAIFromBifrostResponse(bifrostResp *schemas.BifrostResponse) *Open
 	}
 
 	openaiResp := &OpenAIChatResponse{
-		ID:                bifrostResp.ID,
-		Object:            bifrostResp.Object,
-		Created:           bifrostResp.Created,
-		Model:             bifrostResp.Model,
-		Choices:           bifrostResp.Choices,
-		Usage:             bifrostResp.Usage,
-		ServiceTier:       bifrostResp.ServiceTier,
-		SystemFingerprint: bifrostResp.SystemFingerprint,
+		ID:                  bifrostResp.ID,
+		Object:              bifrostResp.Object,
+		Created:             bifrostResp.Created,
+		Model:               bifrostResp.Model,
+		Choices:             bifrostResp.Choices,
+		Usage:               bifrostResp.Usage,
+		ServiceTier:         bifrostResp.ServiceTier,
+		SystemFingerprint:   bifrostResp.SystemFingerprint,
+		PromptFilterResults: bifrostResp.PromptFilterResults,
 	}
 
 	return openaiResp
@@ -413,8 +429,9 @@ func DeriveOpenAIStreamFromBifrostResponse(bifrostResp *schemas.BifrostResponse)
 	// Convert choices to streaming format
 	for _, choice := range bifrostResp.Choices {
 		streamChoice := OpenAIStreamChoice{
-			Index:        choice.Index,
-			FinishReason: choice.FinishReason,
+			Index:                choice.Index,
+			FinishReason:         choice.FinishReason,
+			ContentFilterResults: choice.ContentFilterResults,
 		}
 
 		var delta *OpenAIStreamDelta

Original file line number	Diff line number	Diff line change
`@@ -1171,7 +1171,6 @@ func handleAnthropicStreaming(`
`1171`	`1171`	`// Handle delta changes to the top-level message`
`1172`	`1172`	`if event.Usage != nil && usage != nil {`
`1173`	`1173`	`usage.OutputTokens = event.Usage.OutputTokens`
`1174`		`- usage.CacheCreationInputTokens = event.Usage.CacheCreationInputTokens`
`1175`	`1174`	`}`
`1176`	`1175`
`1177`	`1176`	`// Send usage information immediately if present`