@@ -588,24 +588,36 @@ export async function executeResponsesProviderRequest(
588588 }
589589
590590 // For Azure with deferred format: make a final call with the response format applied
591- // This only happens if we had tool calls and a deferred format
592- if ( deferredTextFormat && iterationCount > 0 ) {
591+ // This happens whenever we have a deferred format, even if no tools were called
592+ // (the initial call was made without the format, so we need to apply it now)
593+ let appliedDeferredFormat = false
594+ if ( deferredTextFormat ) {
593595 logger . info (
594- `Applying deferred JSON schema response format for ${ config . providerLabel } after tool calls completed `
596+ `Applying deferred JSON schema response format for ${ config . providerLabel } (iterationCount: ${ iterationCount } ) `
595597 )
596598
597599 const finalFormatStartTime = Date . now ( )
598600
599- // Add the output items from the last response to the input
600- const lastOutputItems = convertResponseOutputToInputItems ( currentResponse . output )
601- if ( lastOutputItems . length ) {
602- currentInput . push ( ...lastOutputItems )
601+ // Determine what input to use for the formatted call
602+ let formattedInput : ResponsesInputItem [ ]
603+
604+ if ( iterationCount > 0 ) {
605+ // Tools were called - include the conversation history with tool results
606+ const lastOutputItems = convertResponseOutputToInputItems ( currentResponse . output )
607+ if ( lastOutputItems . length ) {
608+ currentInput . push ( ...lastOutputItems )
609+ }
610+ formattedInput = currentInput
611+ } else {
612+ // No tools were called - just retry the initial call with format applied
613+ // Don't include the model's previous unformatted response
614+ formattedInput = initialInput
603615 }
604616
605617 // Make final call with the response format - build payload without tools
606618 const finalPayload : Record < string , any > = {
607619 model : config . modelName ,
608- input : currentInput ,
620+ input : formattedInput ,
609621 text : {
610622 ...( basePayload . text ?? { } ) ,
611623 format : deferredTextFormat ,
@@ -643,19 +655,30 @@ export async function executeResponsesProviderRequest(
643655 if ( formattedText ) {
644656 content = formattedText
645657 }
658+
659+ appliedDeferredFormat = true
646660 }
647661
648- if ( request . stream ) {
662+ // Skip streaming if we already applied deferred format - we have the formatted content
663+ // Making another streaming call would lose the formatted response
664+ if ( request . stream && ! appliedDeferredFormat ) {
649665 logger . info ( 'Using streaming for final response after tool processing' )
650666
651667 const accumulatedCost = calculateCost ( request . model , tokens . input , tokens . output )
652668
669+ // For Azure with deferred format in streaming mode, include the format in the streaming call
670+ const streamOverrides : Record < string , any > = { stream : true , tool_choice : 'auto' }
671+ if ( deferredTextFormat ) {
672+ streamOverrides . text = {
673+ ...( basePayload . text ?? { } ) ,
674+ format : deferredTextFormat ,
675+ }
676+ }
677+
653678 const streamResponse = await fetch ( config . endpoint , {
654679 method : 'POST' ,
655680 headers : config . headers ,
656- body : JSON . stringify (
657- createRequestBody ( currentInput , { stream : true , tool_choice : 'auto' } )
658- ) ,
681+ body : JSON . stringify ( createRequestBody ( currentInput , streamOverrides ) ) ,
659682 } )
660683
661684 if ( ! streamResponse . ok ) {
0 commit comments