@@ -3,7 +3,6 @@ import { userStats, workflow } from '@sim/db/schema'
33import { createLogger } from '@sim/logger'
44import { eq , sql } from 'drizzle-orm'
55import { type NextRequest , NextResponse } from 'next/server'
6- import OpenAI , { AzureOpenAI } from 'openai'
76import { getBYOKKey } from '@/lib/api-key/byok'
87import { getSession } from '@/lib/auth'
98import { logModelUsage } from '@/lib/billing/core/usage-log'
@@ -12,6 +11,7 @@ import { env } from '@/lib/core/config/env'
1211import { getCostMultiplier , isBillingEnabled } from '@/lib/core/config/feature-flags'
1312import { generateRequestId } from '@/lib/core/utils/request'
1413import { verifyWorkspaceMembership } from '@/app/api/workflows/utils'
14+ import { extractResponseText , parseResponsesUsage } from '@/providers/openai/utils'
1515import { getModelPricing } from '@/providers/utils'
1616
1717export const dynamic = 'force-dynamic'
@@ -28,18 +28,6 @@ const openaiApiKey = env.OPENAI_API_KEY
2828
2929const useWandAzure = azureApiKey && azureEndpoint && azureApiVersion
3030
31- const client = useWandAzure
32- ? new AzureOpenAI ( {
33- apiKey : azureApiKey ,
34- apiVersion : azureApiVersion ,
35- endpoint : azureEndpoint ,
36- } )
37- : openaiApiKey
38- ? new OpenAI ( {
39- apiKey : openaiApiKey ,
40- } )
41- : null
42-
4331if ( ! useWandAzure && ! openaiApiKey ) {
4432 logger . warn (
4533 'Neither Azure OpenAI nor OpenAI API key found. Wand generation API will not function.'
@@ -202,20 +190,18 @@ export async function POST(req: NextRequest) {
202190 }
203191
204192 let isBYOK = false
205- let activeClient = client
206- let byokApiKey : string | null = null
193+ let activeOpenAIKey = openaiApiKey
207194
208195 if ( workspaceId && ! useWandAzure ) {
209196 const byokResult = await getBYOKKey ( workspaceId , 'openai' )
210197 if ( byokResult ) {
211198 isBYOK = true
212- byokApiKey = byokResult . apiKey
213- activeClient = new OpenAI ( { apiKey : byokResult . apiKey } )
199+ activeOpenAIKey = byokResult . apiKey
214200 logger . info ( `[${ requestId } ] Using BYOK OpenAI key for wand generation` )
215201 }
216202 }
217203
218- if ( ! activeClient ) {
204+ if ( ! useWandAzure && ! activeOpenAIKey ) {
219205 logger . error ( `[${ requestId } ] AI client not initialized. Missing API key.` )
220206 return NextResponse . json (
221207 { success : false , error : 'Wand generation service is not configured.' } ,
@@ -276,17 +262,18 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
276262 )
277263
278264 const apiUrl = useWandAzure
279- ? `${ azureEndpoint } /openai/deployments/ ${ wandModelName } /chat/completions ?api-version=${ azureApiVersion } `
280- : 'https://api.openai.com/v1/chat/completions '
265+ ? `${ azureEndpoint ?. replace ( / \/ $ / , '' ) } /openai/v1/responses ?api-version=${ azureApiVersion } `
266+ : 'https://api.openai.com/v1/responses '
281267
282268 const headers : Record < string , string > = {
283269 'Content-Type' : 'application/json' ,
270+ 'OpenAI-Beta' : 'responses=v1' ,
284271 }
285272
286273 if ( useWandAzure ) {
287274 headers [ 'api-key' ] = azureApiKey !
288275 } else {
289- headers . Authorization = `Bearer ${ byokApiKey || openaiApiKey } `
276+ headers . Authorization = `Bearer ${ activeOpenAIKey } `
290277 }
291278
292279 logger . debug ( `[${ requestId } ] Making streaming request to: ${ apiUrl } ` )
@@ -296,11 +283,10 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
296283 headers,
297284 body : JSON . stringify ( {
298285 model : useWandAzure ? wandModelName : 'gpt-4o' ,
299- messages : messages ,
286+ input : messages ,
300287 temperature : 0.2 ,
301- max_tokens : 10000 ,
288+ max_output_tokens : 10000 ,
302289 stream : true ,
303- stream_options : { include_usage : true } ,
304290 } ) ,
305291 } )
306292
@@ -327,16 +313,29 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
327313 return
328314 }
329315
316+ let finalUsage : any = null
317+ let usageRecorded = false
318+
319+ const recordUsage = async ( ) => {
320+ if ( usageRecorded || ! finalUsage ) {
321+ return
322+ }
323+
324+ usageRecorded = true
325+ await updateUserStatsForWand ( session . user . id , finalUsage , requestId , isBYOK )
326+ }
327+
330328 try {
331329 let buffer = ''
332330 let chunkCount = 0
333- let finalUsage : any = null
331+ let activeEventType : string | undefined
334332
335333 while ( true ) {
336334 const { done, value } = await reader . read ( )
337335
338336 if ( done ) {
339337 logger . info ( `[${ requestId } ] Stream completed. Total chunks: ${ chunkCount } ` )
338+ await recordUsage ( )
340339 controller . enqueue ( encoder . encode ( `data: ${ JSON . stringify ( { done : true } ) } \n\n` ) )
341340 controller . close ( )
342341 break
@@ -348,47 +347,90 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
348347 buffer = lines . pop ( ) || ''
349348
350349 for ( const line of lines ) {
351- if ( line . startsWith ( 'data: ' ) ) {
352- const data = line . slice ( 6 ) . trim ( )
350+ const trimmed = line . trim ( )
351+ if ( ! trimmed ) {
352+ continue
353+ }
353354
354- if ( data === '[DONE]' ) {
355- logger . info ( `[${ requestId } ] Received [DONE] signal` )
355+ if ( trimmed . startsWith ( 'event:' ) ) {
356+ activeEventType = trimmed . slice ( 6 ) . trim ( )
357+ continue
358+ }
356359
357- if ( finalUsage ) {
358- await updateUserStatsForWand ( session . user . id , finalUsage , requestId , isBYOK )
359- }
360+ if ( ! trimmed . startsWith ( 'data:' ) ) {
361+ continue
362+ }
360363
361- controller . enqueue (
362- encoder . encode ( `data: ${ JSON . stringify ( { done : true } ) } \n\n` )
363- )
364- controller . close ( )
365- return
366- }
364+ const data = trimmed . slice ( 5 ) . trim ( )
365+ if ( data === '[DONE]' ) {
366+ logger . info ( `[${ requestId } ] Received [DONE] signal` )
367367
368- try {
369- const parsed = JSON . parse ( data )
370- const content = parsed . choices ?. [ 0 ] ?. delta ?. content
368+ await recordUsage ( )
371369
372- if ( content ) {
373- chunkCount ++
374- if ( chunkCount === 1 ) {
375- logger . info ( `[${ requestId } ] Received first content chunk` )
376- }
370+ controller . enqueue (
371+ encoder . encode ( `data: ${ JSON . stringify ( { done : true } ) } \n\n` )
372+ )
373+ controller . close ( )
374+ return
375+ }
376+
377+ let parsed : any
378+ try {
379+ parsed = JSON . parse ( data )
380+ } catch ( parseError ) {
381+ logger . debug ( `[${ requestId } ] Skipped non-JSON line: ${ data . substring ( 0 , 100 ) } ` )
382+ continue
383+ }
384+
385+ const eventType = parsed ?. type ?? activeEventType
386+
387+ if (
388+ eventType === 'response.error' ||
389+ eventType === 'error' ||
390+ eventType === 'response.failed'
391+ ) {
392+ throw new Error ( parsed ?. error ?. message || 'Responses stream error' )
393+ }
394+
395+ if (
396+ eventType === 'response.output_text.delta' ||
397+ eventType === 'response.output_json.delta'
398+ ) {
399+ let content = ''
400+ if ( typeof parsed . delta === 'string' ) {
401+ content = parsed . delta
402+ } else if ( parsed . delta && typeof parsed . delta . text === 'string' ) {
403+ content = parsed . delta . text
404+ } else if ( parsed . delta && parsed . delta . json !== undefined ) {
405+ content = JSON . stringify ( parsed . delta . json )
406+ } else if ( parsed . json !== undefined ) {
407+ content = JSON . stringify ( parsed . json )
408+ } else if ( typeof parsed . text === 'string' ) {
409+ content = parsed . text
410+ }
377411
378- controller . enqueue (
379- encoder . encode ( `data: ${ JSON . stringify ( { chunk : content } ) } \n\n` )
380- )
412+ if ( content ) {
413+ chunkCount ++
414+ if ( chunkCount === 1 ) {
415+ logger . info ( `[${ requestId } ] Received first content chunk` )
381416 }
382417
383- if ( parsed . usage ) {
384- finalUsage = parsed . usage
385- logger . info (
386- `[${ requestId } ] Received usage data: ${ JSON . stringify ( parsed . usage ) } `
387- )
418+ controller . enqueue (
419+ encoder . encode ( `data: ${ JSON . stringify ( { chunk : content } ) } \n\n` )
420+ )
421+ }
422+ }
423+
424+ if ( eventType === 'response.completed' ) {
425+ const usage = parseResponsesUsage ( parsed ?. response ?. usage ?? parsed ?. usage )
426+ if ( usage ) {
427+ finalUsage = {
428+ prompt_tokens : usage . promptTokens ,
429+ completion_tokens : usage . completionTokens ,
430+ total_tokens : usage . totalTokens ,
388431 }
389- } catch ( parseError ) {
390- logger . debug (
391- `[${ requestId } ] Skipped non-JSON line: ${ data . substring ( 0 , 100 ) } `
432+ logger . info (
433+ `[${ requestId } ] Received usage data: ${ JSON . stringify ( finalUsage ) } `
392434 )
393435 }
394436 }
@@ -401,6 +443,12 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
401443 stack : streamError ?. stack ,
402444 } )
403445
446+ try {
447+ await recordUsage ( )
448+ } catch ( usageError ) {
449+ logger . warn ( `[${ requestId } ] Failed to record usage after stream error` , usageError )
450+ }
451+
404452 const errorData = `data: ${ JSON . stringify ( { error : 'Streaming failed' , done : true } ) } \n\n`
405453 controller . enqueue ( encoder . encode ( errorData ) )
406454 controller . close ( )
@@ -424,8 +472,6 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
424472 message : error ?. message || 'Unknown error' ,
425473 code : error ?. code ,
426474 status : error ?. status ,
427- responseStatus : error ?. response ?. status ,
428- responseData : error ?. response ?. data ? safeStringify ( error . response . data ) : undefined ,
429475 stack : error ?. stack ,
430476 useWandAzure,
431477 model : useWandAzure ? wandModelName : 'gpt-4o' ,
@@ -440,14 +486,43 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
440486 }
441487 }
442488
443- const completion = await activeClient . chat . completions . create ( {
444- model : useWandAzure ? wandModelName : 'gpt-4o' ,
445- messages : messages ,
446- temperature : 0.3 ,
447- max_tokens : 10000 ,
489+ const apiUrl = useWandAzure
490+ ? `${ azureEndpoint ?. replace ( / \/ $ / , '' ) } /openai/v1/responses?api-version=${ azureApiVersion } `
491+ : 'https://api.openai.com/v1/responses'
492+
493+ const headers : Record < string , string > = {
494+ 'Content-Type' : 'application/json' ,
495+ 'OpenAI-Beta' : 'responses=v1' ,
496+ }
497+
498+ if ( useWandAzure ) {
499+ headers [ 'api-key' ] = azureApiKey !
500+ } else {
501+ headers . Authorization = `Bearer ${ activeOpenAIKey } `
502+ }
503+
504+ const response = await fetch ( apiUrl , {
505+ method : 'POST' ,
506+ headers,
507+ body : JSON . stringify ( {
508+ model : useWandAzure ? wandModelName : 'gpt-4o' ,
509+ input : messages ,
510+ temperature : 0.2 ,
511+ max_output_tokens : 10000 ,
512+ } ) ,
448513 } )
449514
450- const generatedContent = completion . choices [ 0 ] ?. message ?. content ?. trim ( )
515+ if ( ! response . ok ) {
516+ const errorText = await response . text ( )
517+ const apiError = new Error (
518+ `API request failed: ${ response . status } ${ response . statusText } - ${ errorText } `
519+ )
520+ ; ( apiError as any ) . status = response . status
521+ throw apiError
522+ }
523+
524+ const completion = await response . json ( )
525+ const generatedContent = extractResponseText ( completion . output ) ?. trim ( )
451526
452527 if ( ! generatedContent ) {
453528 logger . error (
@@ -461,8 +536,18 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
461536
462537 logger . info ( `[${ requestId } ] Wand generation successful` )
463538
464- if ( completion . usage ) {
465- await updateUserStatsForWand ( session . user . id , completion . usage , requestId , isBYOK )
539+ const usage = parseResponsesUsage ( completion . usage )
540+ if ( usage ) {
541+ await updateUserStatsForWand (
542+ session . user . id ,
543+ {
544+ prompt_tokens : usage . promptTokens ,
545+ completion_tokens : usage . completionTokens ,
546+ total_tokens : usage . totalTokens ,
547+ } ,
548+ requestId ,
549+ isBYOK
550+ )
466551 }
467552
468553 return NextResponse . json ( { success : true , content : generatedContent } )
@@ -472,10 +557,6 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
472557 message : error ?. message || 'Unknown error' ,
473558 code : error ?. code ,
474559 status : error ?. status ,
475- responseStatus : error instanceof OpenAI . APIError ? error . status : error ?. response ?. status ,
476- responseData : ( error as any ) ?. response ?. data
477- ? safeStringify ( ( error as any ) . response . data )
478- : undefined ,
479560 stack : error ?. stack ,
480561 useWandAzure,
481562 model : useWandAzure ? wandModelName : 'gpt-4o' ,
@@ -484,26 +565,19 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
484565 } )
485566
486567 let clientErrorMessage = 'Wand generation failed. Please try again later.'
487- let status = 500
568+ let status = typeof ( error as any ) ?. status === 'number' ? ( error as any ) . status : 500
488569
489- if ( error instanceof OpenAI . APIError ) {
490- status = error . status || 500
491- logger . error (
492- `[${ requestId } ] ${ useWandAzure ? 'Azure OpenAI' : 'OpenAI' } API Error: ${ status } - ${ error . message } `
493- )
494-
495- if ( status === 401 ) {
496- clientErrorMessage = 'Authentication failed. Please check your API key configuration.'
497- } else if ( status === 429 ) {
498- clientErrorMessage = 'Rate limit exceeded. Please try again later.'
499- } else if ( status >= 500 ) {
500- clientErrorMessage =
501- 'The wand generation service is currently unavailable. Please try again later.'
502- }
503- } else if ( useWandAzure && error . message ?. includes ( 'DeploymentNotFound' ) ) {
570+ if ( useWandAzure && error ?. message ?. includes ( 'DeploymentNotFound' ) ) {
504571 clientErrorMessage =
505572 'Azure OpenAI deployment not found. Please check your model deployment configuration.'
506573 status = 404
574+ } else if ( status === 401 ) {
575+ clientErrorMessage = 'Authentication failed. Please check your API key configuration.'
576+ } else if ( status === 429 ) {
577+ clientErrorMessage = 'Rate limit exceeded. Please try again later.'
578+ } else if ( status >= 500 ) {
579+ clientErrorMessage =
580+ 'The wand generation service is currently unavailable. Please try again later.'
507581 }
508582
509583 return NextResponse . json (
0 commit comments