@@ -144,23 +144,38 @@ export async function executeResponsesProviderRequest(
144144 }
145145 }
146146
147+ // Store response format config - for Azure with tools, we defer applying it until after tool calls complete
148+ let deferredTextFormat : { type : string ; name : string ; schema : any ; strict : boolean } | undefined
149+ const hasTools = ! ! request . tools ?. length
150+ const isAzure = config . providerId === 'azure-openai'
151+
147152 if ( request . responseFormat ) {
148153 const isStrict = request . responseFormat . strict !== false
149154 const rawSchema = request . responseFormat . schema || request . responseFormat
150155 // OpenAI strict mode requires additionalProperties: false on ALL nested objects
151156 const cleanedSchema = isStrict ? enforceStrictSchema ( rawSchema ) : rawSchema
152157
153- basePayload . text = {
154- ...( basePayload . text ?? { } ) ,
155- format : {
156- type : 'json_schema' ,
157- name : request . responseFormat . name || 'response_schema' ,
158- schema : cleanedSchema ,
159- strict : isStrict ,
160- } ,
158+ const textFormat = {
159+ type : 'json_schema' as const ,
160+ name : request . responseFormat . name || 'response_schema' ,
161+ schema : cleanedSchema ,
162+ strict : isStrict ,
161163 }
162164
163- logger . info ( `Added JSON schema response format to ${ config . providerLabel } request` )
165+ // Azure OpenAI has issues combining tools + response_format in the same request
166+ // Defer the format until after tool calls complete for Azure
167+ if ( isAzure && hasTools ) {
168+ deferredTextFormat = textFormat
169+ logger . info (
170+ `Deferring JSON schema response format for ${ config . providerLabel } (will apply after tool calls complete)`
171+ )
172+ } else {
173+ basePayload . text = {
174+ ...( basePayload . text ?? { } ) ,
175+ format : textFormat ,
176+ }
177+ logger . info ( `Added JSON schema response format to ${ config . providerLabel } request` )
178+ }
164179 }
165180
166181 const tools = request . tools ?. length
@@ -572,6 +587,64 @@ export async function executeResponsesProviderRequest(
572587 iterationCount ++
573588 }
574589
590+ // For Azure with deferred format: make a final call with the response format applied
591+ // This only happens if we had tool calls and a deferred format
592+ if ( deferredTextFormat && iterationCount > 0 ) {
593+ logger . info (
594+ `Applying deferred JSON schema response format for ${ config . providerLabel } after tool calls completed`
595+ )
596+
597+ const finalFormatStartTime = Date . now ( )
598+
599+ // Add the output items from the last response to the input
600+ const lastOutputItems = convertResponseOutputToInputItems ( currentResponse . output )
601+ if ( lastOutputItems . length ) {
602+ currentInput . push ( ...lastOutputItems )
603+ }
604+
605+ // Make final call with the response format - build payload without tools
606+ const finalPayload : Record < string , any > = {
607+ model : config . modelName ,
608+ input : currentInput ,
609+ text : {
610+ ...( basePayload . text ?? { } ) ,
611+ format : deferredTextFormat ,
612+ } ,
613+ }
614+
615+ // Copy over non-tool related settings
616+ if ( request . temperature !== undefined ) finalPayload . temperature = request . temperature
617+ if ( request . maxTokens != null ) finalPayload . max_output_tokens = request . maxTokens
618+
619+ currentResponse = await postResponses ( finalPayload )
620+
621+ const finalFormatEndTime = Date . now ( )
622+ const finalFormatDuration = finalFormatEndTime - finalFormatStartTime
623+
624+ timeSegments . push ( {
625+ type : 'model' ,
626+ name : 'Final formatted response' ,
627+ startTime : finalFormatStartTime ,
628+ endTime : finalFormatEndTime ,
629+ duration : finalFormatDuration ,
630+ } )
631+
632+ modelTime += finalFormatDuration
633+
634+ const finalUsage = parseResponsesUsage ( currentResponse . usage )
635+ if ( finalUsage ) {
636+ tokens . input += finalUsage . promptTokens
637+ tokens . output += finalUsage . completionTokens
638+ tokens . total += finalUsage . totalTokens
639+ }
640+
641+ // Update content with the formatted response
642+ const formattedText = extractResponseText ( currentResponse . output )
643+ if ( formattedText ) {
644+ content = formattedText
645+ }
646+ }
647+
575648 if ( request . stream ) {
576649 logger . info ( 'Using streaming for final response after tool processing' )
577650
0 commit comments