Skip to content

Commit 1933e1a

Browse files
authored
improvement(openai): migrate to responses api (#3135)
* Migrate openai to use responses api * Consolidate azure * Fix streaming * Bug fixes * Bug fixes * Fix responseformat * Refactor * Fix bugs * Fix * Fix azure openai response format with tool calls * Fixes * Fixes * Fix temp
1 parent 793adda commit 1933e1a

File tree

7 files changed

+1511
-1249
lines changed

7 files changed

+1511
-1249
lines changed

apps/sim/app/api/wand/route.ts

Lines changed: 161 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ import { userStats, workflow } from '@sim/db/schema'
33
import { createLogger } from '@sim/logger'
44
import { eq, sql } from 'drizzle-orm'
55
import { type NextRequest, NextResponse } from 'next/server'
6-
import OpenAI, { AzureOpenAI } from 'openai'
76
import { getBYOKKey } from '@/lib/api-key/byok'
87
import { getSession } from '@/lib/auth'
98
import { logModelUsage } from '@/lib/billing/core/usage-log'
@@ -12,6 +11,7 @@ import { env } from '@/lib/core/config/env'
1211
import { getCostMultiplier, isBillingEnabled } from '@/lib/core/config/feature-flags'
1312
import { generateRequestId } from '@/lib/core/utils/request'
1413
import { verifyWorkspaceMembership } from '@/app/api/workflows/utils'
14+
import { extractResponseText, parseResponsesUsage } from '@/providers/openai/utils'
1515
import { getModelPricing } from '@/providers/utils'
1616

1717
export const dynamic = 'force-dynamic'
@@ -28,18 +28,6 @@ const openaiApiKey = env.OPENAI_API_KEY
2828

2929
const useWandAzure = azureApiKey && azureEndpoint && azureApiVersion
3030

31-
const client = useWandAzure
32-
? new AzureOpenAI({
33-
apiKey: azureApiKey,
34-
apiVersion: azureApiVersion,
35-
endpoint: azureEndpoint,
36-
})
37-
: openaiApiKey
38-
? new OpenAI({
39-
apiKey: openaiApiKey,
40-
})
41-
: null
42-
4331
if (!useWandAzure && !openaiApiKey) {
4432
logger.warn(
4533
'Neither Azure OpenAI nor OpenAI API key found. Wand generation API will not function.'
@@ -202,20 +190,18 @@ export async function POST(req: NextRequest) {
202190
}
203191

204192
let isBYOK = false
205-
let activeClient = client
206-
let byokApiKey: string | null = null
193+
let activeOpenAIKey = openaiApiKey
207194

208195
if (workspaceId && !useWandAzure) {
209196
const byokResult = await getBYOKKey(workspaceId, 'openai')
210197
if (byokResult) {
211198
isBYOK = true
212-
byokApiKey = byokResult.apiKey
213-
activeClient = new OpenAI({ apiKey: byokResult.apiKey })
199+
activeOpenAIKey = byokResult.apiKey
214200
logger.info(`[${requestId}] Using BYOK OpenAI key for wand generation`)
215201
}
216202
}
217203

218-
if (!activeClient) {
204+
if (!useWandAzure && !activeOpenAIKey) {
219205
logger.error(`[${requestId}] AI client not initialized. Missing API key.`)
220206
return NextResponse.json(
221207
{ success: false, error: 'Wand generation service is not configured.' },
@@ -276,17 +262,18 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
276262
)
277263

278264
const apiUrl = useWandAzure
279-
? `${azureEndpoint}/openai/deployments/${wandModelName}/chat/completions?api-version=${azureApiVersion}`
280-
: 'https://api.openai.com/v1/chat/completions'
265+
? `${azureEndpoint?.replace(/\/$/, '')}/openai/v1/responses?api-version=${azureApiVersion}`
266+
: 'https://api.openai.com/v1/responses'
281267

282268
const headers: Record<string, string> = {
283269
'Content-Type': 'application/json',
270+
'OpenAI-Beta': 'responses=v1',
284271
}
285272

286273
if (useWandAzure) {
287274
headers['api-key'] = azureApiKey!
288275
} else {
289-
headers.Authorization = `Bearer ${byokApiKey || openaiApiKey}`
276+
headers.Authorization = `Bearer ${activeOpenAIKey}`
290277
}
291278

292279
logger.debug(`[${requestId}] Making streaming request to: ${apiUrl}`)
@@ -296,11 +283,10 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
296283
headers,
297284
body: JSON.stringify({
298285
model: useWandAzure ? wandModelName : 'gpt-4o',
299-
messages: messages,
286+
input: messages,
300287
temperature: 0.2,
301-
max_tokens: 10000,
288+
max_output_tokens: 10000,
302289
stream: true,
303-
stream_options: { include_usage: true },
304290
}),
305291
})
306292

@@ -327,16 +313,29 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
327313
return
328314
}
329315

316+
let finalUsage: any = null
317+
let usageRecorded = false
318+
319+
const recordUsage = async () => {
320+
if (usageRecorded || !finalUsage) {
321+
return
322+
}
323+
324+
usageRecorded = true
325+
await updateUserStatsForWand(session.user.id, finalUsage, requestId, isBYOK)
326+
}
327+
330328
try {
331329
let buffer = ''
332330
let chunkCount = 0
333-
let finalUsage: any = null
331+
let activeEventType: string | undefined
334332

335333
while (true) {
336334
const { done, value } = await reader.read()
337335

338336
if (done) {
339337
logger.info(`[${requestId}] Stream completed. Total chunks: ${chunkCount}`)
338+
await recordUsage()
340339
controller.enqueue(encoder.encode(`data: ${JSON.stringify({ done: true })}\n\n`))
341340
controller.close()
342341
break
@@ -348,47 +347,90 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
348347
buffer = lines.pop() || ''
349348

350349
for (const line of lines) {
351-
if (line.startsWith('data: ')) {
352-
const data = line.slice(6).trim()
350+
const trimmed = line.trim()
351+
if (!trimmed) {
352+
continue
353+
}
353354

354-
if (data === '[DONE]') {
355-
logger.info(`[${requestId}] Received [DONE] signal`)
355+
if (trimmed.startsWith('event:')) {
356+
activeEventType = trimmed.slice(6).trim()
357+
continue
358+
}
356359

357-
if (finalUsage) {
358-
await updateUserStatsForWand(session.user.id, finalUsage, requestId, isBYOK)
359-
}
360+
if (!trimmed.startsWith('data:')) {
361+
continue
362+
}
360363

361-
controller.enqueue(
362-
encoder.encode(`data: ${JSON.stringify({ done: true })}\n\n`)
363-
)
364-
controller.close()
365-
return
366-
}
364+
const data = trimmed.slice(5).trim()
365+
if (data === '[DONE]') {
366+
logger.info(`[${requestId}] Received [DONE] signal`)
367367

368-
try {
369-
const parsed = JSON.parse(data)
370-
const content = parsed.choices?.[0]?.delta?.content
368+
await recordUsage()
371369

372-
if (content) {
373-
chunkCount++
374-
if (chunkCount === 1) {
375-
logger.info(`[${requestId}] Received first content chunk`)
376-
}
370+
controller.enqueue(
371+
encoder.encode(`data: ${JSON.stringify({ done: true })}\n\n`)
372+
)
373+
controller.close()
374+
return
375+
}
376+
377+
let parsed: any
378+
try {
379+
parsed = JSON.parse(data)
380+
} catch (parseError) {
381+
logger.debug(`[${requestId}] Skipped non-JSON line: ${data.substring(0, 100)}`)
382+
continue
383+
}
384+
385+
const eventType = parsed?.type ?? activeEventType
386+
387+
if (
388+
eventType === 'response.error' ||
389+
eventType === 'error' ||
390+
eventType === 'response.failed'
391+
) {
392+
throw new Error(parsed?.error?.message || 'Responses stream error')
393+
}
394+
395+
if (
396+
eventType === 'response.output_text.delta' ||
397+
eventType === 'response.output_json.delta'
398+
) {
399+
let content = ''
400+
if (typeof parsed.delta === 'string') {
401+
content = parsed.delta
402+
} else if (parsed.delta && typeof parsed.delta.text === 'string') {
403+
content = parsed.delta.text
404+
} else if (parsed.delta && parsed.delta.json !== undefined) {
405+
content = JSON.stringify(parsed.delta.json)
406+
} else if (parsed.json !== undefined) {
407+
content = JSON.stringify(parsed.json)
408+
} else if (typeof parsed.text === 'string') {
409+
content = parsed.text
410+
}
377411

378-
controller.enqueue(
379-
encoder.encode(`data: ${JSON.stringify({ chunk: content })}\n\n`)
380-
)
412+
if (content) {
413+
chunkCount++
414+
if (chunkCount === 1) {
415+
logger.info(`[${requestId}] Received first content chunk`)
381416
}
382417

383-
if (parsed.usage) {
384-
finalUsage = parsed.usage
385-
logger.info(
386-
`[${requestId}] Received usage data: ${JSON.stringify(parsed.usage)}`
387-
)
418+
controller.enqueue(
419+
encoder.encode(`data: ${JSON.stringify({ chunk: content })}\n\n`)
420+
)
421+
}
422+
}
423+
424+
if (eventType === 'response.completed') {
425+
const usage = parseResponsesUsage(parsed?.response?.usage ?? parsed?.usage)
426+
if (usage) {
427+
finalUsage = {
428+
prompt_tokens: usage.promptTokens,
429+
completion_tokens: usage.completionTokens,
430+
total_tokens: usage.totalTokens,
388431
}
389-
} catch (parseError) {
390-
logger.debug(
391-
`[${requestId}] Skipped non-JSON line: ${data.substring(0, 100)}`
432+
logger.info(
433+
`[${requestId}] Received usage data: ${JSON.stringify(finalUsage)}`
392434
)
393435
}
394436
}
@@ -401,6 +443,12 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
401443
stack: streamError?.stack,
402444
})
403445

446+
try {
447+
await recordUsage()
448+
} catch (usageError) {
449+
logger.warn(`[${requestId}] Failed to record usage after stream error`, usageError)
450+
}
451+
404452
const errorData = `data: ${JSON.stringify({ error: 'Streaming failed', done: true })}\n\n`
405453
controller.enqueue(encoder.encode(errorData))
406454
controller.close()
@@ -424,8 +472,6 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
424472
message: error?.message || 'Unknown error',
425473
code: error?.code,
426474
status: error?.status,
427-
responseStatus: error?.response?.status,
428-
responseData: error?.response?.data ? safeStringify(error.response.data) : undefined,
429475
stack: error?.stack,
430476
useWandAzure,
431477
model: useWandAzure ? wandModelName : 'gpt-4o',
@@ -440,14 +486,43 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
440486
}
441487
}
442488

443-
const completion = await activeClient.chat.completions.create({
444-
model: useWandAzure ? wandModelName : 'gpt-4o',
445-
messages: messages,
446-
temperature: 0.3,
447-
max_tokens: 10000,
489+
const apiUrl = useWandAzure
490+
? `${azureEndpoint?.replace(/\/$/, '')}/openai/v1/responses?api-version=${azureApiVersion}`
491+
: 'https://api.openai.com/v1/responses'
492+
493+
const headers: Record<string, string> = {
494+
'Content-Type': 'application/json',
495+
'OpenAI-Beta': 'responses=v1',
496+
}
497+
498+
if (useWandAzure) {
499+
headers['api-key'] = azureApiKey!
500+
} else {
501+
headers.Authorization = `Bearer ${activeOpenAIKey}`
502+
}
503+
504+
const response = await fetch(apiUrl, {
505+
method: 'POST',
506+
headers,
507+
body: JSON.stringify({
508+
model: useWandAzure ? wandModelName : 'gpt-4o',
509+
input: messages,
510+
temperature: 0.2,
511+
max_output_tokens: 10000,
512+
}),
448513
})
449514

450-
const generatedContent = completion.choices[0]?.message?.content?.trim()
515+
if (!response.ok) {
516+
const errorText = await response.text()
517+
const apiError = new Error(
518+
`API request failed: ${response.status} ${response.statusText} - ${errorText}`
519+
)
520+
;(apiError as any).status = response.status
521+
throw apiError
522+
}
523+
524+
const completion = await response.json()
525+
const generatedContent = extractResponseText(completion.output)?.trim()
451526

452527
if (!generatedContent) {
453528
logger.error(
@@ -461,8 +536,18 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
461536

462537
logger.info(`[${requestId}] Wand generation successful`)
463538

464-
if (completion.usage) {
465-
await updateUserStatsForWand(session.user.id, completion.usage, requestId, isBYOK)
539+
const usage = parseResponsesUsage(completion.usage)
540+
if (usage) {
541+
await updateUserStatsForWand(
542+
session.user.id,
543+
{
544+
prompt_tokens: usage.promptTokens,
545+
completion_tokens: usage.completionTokens,
546+
total_tokens: usage.totalTokens,
547+
},
548+
requestId,
549+
isBYOK
550+
)
466551
}
467552

468553
return NextResponse.json({ success: true, content: generatedContent })
@@ -472,10 +557,6 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
472557
message: error?.message || 'Unknown error',
473558
code: error?.code,
474559
status: error?.status,
475-
responseStatus: error instanceof OpenAI.APIError ? error.status : error?.response?.status,
476-
responseData: (error as any)?.response?.data
477-
? safeStringify((error as any).response.data)
478-
: undefined,
479560
stack: error?.stack,
480561
useWandAzure,
481562
model: useWandAzure ? wandModelName : 'gpt-4o',
@@ -484,26 +565,19 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
484565
})
485566

486567
let clientErrorMessage = 'Wand generation failed. Please try again later.'
487-
let status = 500
568+
let status = typeof (error as any)?.status === 'number' ? (error as any).status : 500
488569

489-
if (error instanceof OpenAI.APIError) {
490-
status = error.status || 500
491-
logger.error(
492-
`[${requestId}] ${useWandAzure ? 'Azure OpenAI' : 'OpenAI'} API Error: ${status} - ${error.message}`
493-
)
494-
495-
if (status === 401) {
496-
clientErrorMessage = 'Authentication failed. Please check your API key configuration.'
497-
} else if (status === 429) {
498-
clientErrorMessage = 'Rate limit exceeded. Please try again later.'
499-
} else if (status >= 500) {
500-
clientErrorMessage =
501-
'The wand generation service is currently unavailable. Please try again later.'
502-
}
503-
} else if (useWandAzure && error.message?.includes('DeploymentNotFound')) {
570+
if (useWandAzure && error?.message?.includes('DeploymentNotFound')) {
504571
clientErrorMessage =
505572
'Azure OpenAI deployment not found. Please check your model deployment configuration.'
506573
status = 404
574+
} else if (status === 401) {
575+
clientErrorMessage = 'Authentication failed. Please check your API key configuration.'
576+
} else if (status === 429) {
577+
clientErrorMessage = 'Rate limit exceeded. Please try again later.'
578+
} else if (status >= 500) {
579+
clientErrorMessage =
580+
'The wand generation service is currently unavailable. Please try again later.'
507581
}
508582

509583
return NextResponse.json(

0 commit comments

Comments
 (0)