modular · williamw · Oct 14, 2025 · Oct 13, 2025 · Oct 13, 2025 · Oct 14, 2025
diff --git a/genai-cookbook/README.md b/genai-cookbook/README.md
@@ -79,16 +79,20 @@ The Agentic Cookbook is collection of recipes demonstrating how to build modern
 Build a streaming chat interface that maintains conversation context across multiple exchanges. This recipe demonstrates:
 
 - Real-time token streaming using the Vercel AI SDK
-- Auto-scrolling message display using Mantine
+- Markdown rendering with syntax-highlighted code blocks using Streamdown
+- Auto-scrolling message display with smart scroll detection
+- Seamless compatibility with Modular MAX and OpenAI-compatible endpoints
 
 ### 2. **Image Captioning**
 
-Create an intelligent image captioning system that generates natural language descriptions for uploaded images. Features include:
+Create an intelligent image captioning system that generates natural language descriptions for uploaded images with progressive streaming and performance tracking. Features include:
 
+- **NDJSON streaming**: Custom useNDJSON hook for progressive results—captions appear as they're generated
+- **Parallel processing**: Multiple images processed simultaneously for maximum speed
+- **Performance metrics**: TTFT (time to first token) and duration tracking with human-readable formatting via pretty-ms
 - Drag-and-drop image upload with Mantine Dropzone
-- Base64 image encoding for API transport
 - Customizable prompt for caption generation
-- Gallery view with loading states and progress indicators
+- Gallery view with loading states and real-time updates
 
 ## Architecture
 

diff --git a/genai-cookbook/apps/cookbook/components/CodeToggle.tsx b/genai-cookbook/apps/cookbook/components/CodeToggle.tsx
@@ -24,7 +24,7 @@ export function CodeToggle() {
         icon?: React.ReactNode
     }) => {
         return (
-            <Button size="compact-sm" radius="xl">
+            <Button variant="light" size="compact-sm" radius="xl">
                 <>
                     {icon}
                     <Text size="sm" fw="500" p="6px">

diff --git a/genai-cookbook/apps/cookbook/components/ThemeToggle.tsx b/genai-cookbook/apps/cookbook/components/ThemeToggle.tsx
@@ -1,17 +1,25 @@
 'use client'
 
+import { useEffect, useState } from 'react'
 import { ActionIcon, Tooltip, useMantineColorScheme } from '@mantine/core'
 import { IconMoon, IconSun } from '@tabler/icons-react'
 
 export function ThemeToggle({ stroke }: { stroke: number }): JSX.Element {
     const { setColorScheme, colorScheme } = useMantineColorScheme()
+    const [mounted, setMounted] = useState(false)
+
+    useEffect(() => {
+        setMounted(true)
+    }, [])
 
     function toggleColorScheme() {
         const result = colorScheme === 'dark' ? 'light' : 'dark'
         return setColorScheme(result)
     }
 
-    const label = colorScheme === 'dark' ? 'Switch to light' : 'Switch to dark'
+    const resolvedScheme = mounted ? colorScheme : 'light'
+    const label =
+        resolvedScheme === 'dark' ? 'Switch to light' : 'Switch to dark'
 
     return (
         <Tooltip label={label}>
@@ -20,7 +28,7 @@ export function ThemeToggle({ stroke }: { stroke: number }): JSX.Element {
                 aria-label={label}
                 variant="transparent"
             >
-                {colorScheme === 'dark' ? (
+                {resolvedScheme === 'dark' ? (
                     <IconMoon stroke={stroke} />
                 ) : (
                     <IconSun stroke={stroke} />

diff --git a/genai-cookbook/apps/cookbook/package.json b/genai-cookbook/apps/cookbook/package.json
@@ -11,33 +11,34 @@
         "format:check": "prettier --check ."
     },
     "dependencies": {
-        "@ai-sdk/openai": "^2.0.23",
-        "@ai-sdk/react": "^2.0.30",
-        "@mantine/core": "^7.17.8",
-        "@mantine/dropzone": "^7.17.8",
-        "@mantine/hooks": "^7.17.8",
+        "@ai-sdk/openai": "catalog:",
+        "@ai-sdk/react": "catalog:",
+        "@mantine/core": "catalog:",
+        "@mantine/dropzone": "catalog:",
+        "@mantine/hooks": "catalog:",
         "@modular/recipes": "workspace:*",
-        "@tabler/icons-react": "^3.34.1",
-        "ai": "^5.0.28",
-        "nanoid": "^5.1.5",
+        "@tabler/icons-react": "catalog:",
+        "ai": "catalog:",
+        "nanoid": "catalog:",
         "next": "^14",
-        "openai": "^5.20.2",
+        "openai": "catalog:",
+        "pretty-ms": "catalog:",
         "react": "^18",
         "react-dom": "^18",
         "react-syntax-highlighter": "^15.6.6",
-        "streamdown": "^1.3.0",
-        "sass": "^1.93.2"
+        "sass": "^1.93.2",
+        "streamdown": "catalog:"
     },
     "devDependencies": {
-        "@types/node": "^20",
-        "@types/react": "^18",
-        "@types/react-dom": "^18",
+        "@types/node": "catalog:",
+        "@types/react": "catalog:",
+        "@types/react-dom": "catalog:",
         "@types/react-syntax-highlighter": "^15.5.13",
         "eslint": "^8",
         "eslint-config-next": "14.2.31",
         "postcss": "^8",
         "tailwindcss": "^3.4.1",
-        "typescript": "^5"
+        "typescript": "catalog:"
     },
     "packageManager": "pnpm@10.18.1+sha512.77a884a165cbba2d8d1c19e3b4880eee6d2fcabd0d879121e282196b80042351d5eb3ca0935fa599da1dc51265cc68816ad2bddd2a2de5ea9fdf92adbec7cd34"
 }
diff --git a/genai-cookbook/packages/recipes/package.json b/genai-cookbook/packages/recipes/package.json
@@ -11,25 +11,26 @@
         "./*": "./src/*"
     },
     "dependencies": {
-        "@ai-sdk/openai": "^2.0.23",
-        "@ai-sdk/react": "^2.0.30",
-        "@mantine/core": "^7.17.8",
-        "@mantine/dropzone": "^7.17.8",
-        "@mantine/hooks": "^7.17.8",
-        "@tabler/icons-react": "^3.34.1",
-        "ai": "^5.0.28",
-        "nanoid": "^5.1.5",
-        "openai": "^5.20.2",
-        "streamdown": "^1.3.0"
+        "@ai-sdk/openai": "catalog:",
+        "@ai-sdk/react": "catalog:",
+        "@mantine/core": "catalog:",
+        "@mantine/dropzone": "catalog:",
+        "@mantine/hooks": "catalog:",
+        "@tabler/icons-react": "catalog:",
+        "ai": "catalog:",
+        "nanoid": "catalog:",
+        "openai": "catalog:",
+        "pretty-ms": "catalog:",
+        "streamdown": "catalog:"
     },
     "peerDependencies": {
         "react": "^18",
         "react-dom": "^18"
     },
     "devDependencies": {
-        "@types/node": "^20",
-        "@types/react": "^18",
-        "@types/react-dom": "^18",
-        "typescript": "^5"
+        "@types/node": "catalog:",
+        "@types/react": "catalog:",
+        "@types/react-dom": "catalog:",
+        "typescript": "catalog:"
     }
 }
diff --git a/genai-cookbook/packages/recipes/src/image-captioning/api.ts b/genai-cookbook/packages/recipes/src/image-captioning/api.ts
@@ -1,42 +1,123 @@
-import { generateText } from 'ai'
+import { streamText } from 'ai'
 import { RecipeContext } from '../types'
 import { createOpenAI } from '@ai-sdk/openai'
 
 /*
- * The captioning API mirrors our multi-turn chat route but returns a single
- * string instead of a streaming response. Because Modular MAX speaks the
- * OpenAI-compatible protocol, the Vercel AI SDK can works with Modular MAX
- * out of the box.
+ * Image Captioning API with NDJSON Streaming and Performance Metrics
+ *
+ * This API demonstrates progressive response streaming using NDJSON (newline-delimited JSON).
+ * Instead of waiting for all captions to complete, we stream each result as it's generated,
+ * providing immediate feedback to users along with detailed performance metrics.
+ *
+ * Key concepts:
+ * - NDJSON format: One JSON object per line, easy to parse progressively
+ * - Parallel processing: All images caption simultaneously for speed
+ * - Stream-as-you-go: Results appear in the UI the moment they're ready
+ * - Performance tracking: TTFT (time to first token) and duration (generation time) per image
+ * - OpenAI-compatible: Works with Modular MAX or any OpenAI-compatible server
+ *
+ * Timing metrics explained:
+ * - TTFT: Time from request start to first token (measures latency)
+ * - Duration: Time from first token to completion (measures generation speed)
  */
 
-// ============================================================================
-// POST /api — generates an image caption
-// ============================================================================
 export default async function POST(req: Request, context: RecipeContext) {
     const { apiKey, baseUrl, modelName } = context
-    const { messages } = await req.json()
-    if (!messages) {
-        return new Response('Client did not provide messages', { status: 400 })
+    const body = await req.json()
+
+    const isBatch = Array.isArray(body.batch)
+
+    if (!isBatch && !body.messages) {
+        return new Response('Client did not provide messages or batch', { status: 400 })
     }
 
-    // Use the the Vercel AI SDK to connect to the MAX endpoint
     try {
-        // createOpenAI returns an OpenAI-compatible client
+        // The Vercel AI SDK's createOpenAI works with any OpenAI-compatible endpoint
         const client = createOpenAI({ baseURL: baseUrl, apiKey })
-
-        // chat(modelName) works with LLM servers like MAX that
-        // implement the chat-completions format
         const model = client.chat(modelName)
 
-        // Finally, we call generateText to get a caption for our images
-        const { text } = await generateText({
-            // The recipe UI creates messages in the ModelMessage format,
-            // so converting from UIMessage to ModelMessage is unnecessary
-            model: model,
-            messages: messages,
-        })
+        if (isBatch) {
+            // NDJSON streaming: send results progressively as they complete
+            const encoder = new TextEncoder()
+            const stream = new ReadableStream({
+                async start(controller) {
+                    try {
+                        // Process all images in parallel using Promise.all
+                        // As each caption completes, we immediately stream it to the client
+                        await Promise.all(
+                            body.batch.map(async (item: { imageId: string; messages: any }) => {
+                                try {
+                                    const startTime = Date.now()
+                                    let firstTokenTime: number | null = null
+                                    let ttft: number | null = null
+                                    let textChunks: string[] = []
+
+                                    // Use streamText (not generateText) to capture timing metrics
+                                    const result = streamText({
+                                        model: model,
+                                        messages: item.messages,
+                                    })
+
+                                    // Consume the stream chunk-by-chunk to collect text and timing
+                                    for await (const chunk of result.textStream) {
+                                        // Capture TTFT: time from request start to first token
+                                        if (ttft === null) {
+                                            firstTokenTime = Date.now()
+                                            ttft = firstTokenTime - startTime
+                                        }
+                                        textChunks.push(chunk)
+                                    }
+
+                                    // Duration: time from first token to completion (not total time)
+                                    const duration = firstTokenTime ? Date.now() - firstTokenTime : null
+                                    const text = textChunks.join('')
+
+                                    // Stream result as NDJSON: one JSON object per line with metrics
+                                    const line = JSON.stringify({
+                                        imageId: item.imageId,
+                                        text,
+                                        ttft,
+                                        duration
+                                    }) + '\n'
+                                    controller.enqueue(encoder.encode(line))
+                                } catch (error) {
+                                    // Send errors per-image so UI can show partial results
+                                    const errorMessage = error instanceof Error ? error.message : 'Unknown error'
+                                    const line = JSON.stringify({
+                                        imageId: item.imageId,
+                                        error: errorMessage
+                                    }) + '\n'
+                                    controller.enqueue(encoder.encode(line))
+                                }
+                            })
+                        )
+
+                        controller.close()
+                    } catch (error) {
+                        controller.error(error)
+                    }
+                },
+            })
+
+            return new Response(stream, {
+                headers: {
+                    'Content-Type': 'application/x-ndjson',
+                },
+            })
+        } else {
+            // Single caption request: stream and collect text
+            const result = streamText({
+                model: model,
+                messages: body.messages,
+            })
+
+            let textChunks: string[] = []
+            for await (const chunk of result.textStream) {
+                textChunks.push(chunk)
+            }
 
-        return Response.json({ text })
+            return Response.json({ text: textChunks.join('') })
+        }
     } catch (error) {
         const errorMessage = error instanceof Error ? `(${error.message})` : ''
         return new Response(`Failed to generate caption ${errorMessage}`, {