Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions genai-cookbook/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,16 +79,20 @@ The Agentic Cookbook is collection of recipes demonstrating how to build modern
Build a streaming chat interface that maintains conversation context across multiple exchanges. This recipe demonstrates:

- Real-time token streaming using the Vercel AI SDK
- Auto-scrolling message display using Mantine
- Markdown rendering with syntax-highlighted code blocks using Streamdown
- Auto-scrolling message display with smart scroll detection
- Seamless compatibility with Modular MAX and OpenAI-compatible endpoints

### 2. **Image Captioning**

Create an intelligent image captioning system that generates natural language descriptions for uploaded images. Features include:
Create an intelligent image captioning system that generates natural language descriptions for uploaded images with progressive streaming and performance tracking. Features include:

- **NDJSON streaming**: Custom useNDJSON hook for progressive results—captions appear as they're generated
- **Parallel processing**: Multiple images processed simultaneously for maximum speed
- **Performance metrics**: TTFT (time to first token) and duration tracking with human-readable formatting via pretty-ms
- Drag-and-drop image upload with Mantine Dropzone
- Base64 image encoding for API transport
- Customizable prompt for caption generation
- Gallery view with loading states and progress indicators
- Gallery view with loading states and real-time updates

## Architecture

Expand Down
2 changes: 1 addition & 1 deletion genai-cookbook/apps/cookbook/components/CodeToggle.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ export function CodeToggle() {
icon?: React.ReactNode
}) => {
return (
<Button size="compact-sm" radius="xl">
<Button variant="light" size="compact-sm" radius="xl">
<>
{icon}
<Text size="sm" fw="500" p="6px">
Expand Down
12 changes: 10 additions & 2 deletions genai-cookbook/apps/cookbook/components/ThemeToggle.tsx
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
'use client'

import { useEffect, useState } from 'react'
import { ActionIcon, Tooltip, useMantineColorScheme } from '@mantine/core'
import { IconMoon, IconSun } from '@tabler/icons-react'

export function ThemeToggle({ stroke }: { stroke: number }): JSX.Element {
const { setColorScheme, colorScheme } = useMantineColorScheme()
const [mounted, setMounted] = useState(false)

useEffect(() => {
setMounted(true)
}, [])

function toggleColorScheme() {
const result = colorScheme === 'dark' ? 'light' : 'dark'
return setColorScheme(result)
}

const label = colorScheme === 'dark' ? 'Switch to light' : 'Switch to dark'
const resolvedScheme = mounted ? colorScheme : 'light'
const label =
resolvedScheme === 'dark' ? 'Switch to light' : 'Switch to dark'

return (
<Tooltip label={label}>
Expand All @@ -20,7 +28,7 @@ export function ThemeToggle({ stroke }: { stroke: number }): JSX.Element {
aria-label={label}
variant="transparent"
>
{colorScheme === 'dark' ? (
{resolvedScheme === 'dark' ? (
<IconMoon stroke={stroke} />
) : (
<IconSun stroke={stroke} />
Expand Down
31 changes: 16 additions & 15 deletions genai-cookbook/apps/cookbook/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,33 +11,34 @@
"format:check": "prettier --check ."
},
"dependencies": {
"@ai-sdk/openai": "^2.0.23",
"@ai-sdk/react": "^2.0.30",
"@mantine/core": "^7.17.8",
"@mantine/dropzone": "^7.17.8",
"@mantine/hooks": "^7.17.8",
"@ai-sdk/openai": "catalog:",
"@ai-sdk/react": "catalog:",
"@mantine/core": "catalog:",
"@mantine/dropzone": "catalog:",
"@mantine/hooks": "catalog:",
"@modular/recipes": "workspace:*",
"@tabler/icons-react": "^3.34.1",
"ai": "^5.0.28",
"nanoid": "^5.1.5",
"@tabler/icons-react": "catalog:",
"ai": "catalog:",
"nanoid": "catalog:",
"next": "^14",
"openai": "^5.20.2",
"openai": "catalog:",
"pretty-ms": "catalog:",
"react": "^18",
"react-dom": "^18",
"react-syntax-highlighter": "^15.6.6",
"streamdown": "^1.3.0",
"sass": "^1.93.2"
"sass": "^1.93.2",
"streamdown": "catalog:"
},
"devDependencies": {
"@types/node": "^20",
"@types/react": "^18",
"@types/react-dom": "^18",
"@types/node": "catalog:",
"@types/react": "catalog:",
"@types/react-dom": "catalog:",
"@types/react-syntax-highlighter": "^15.5.13",
"eslint": "^8",
"eslint-config-next": "14.2.31",
"postcss": "^8",
"tailwindcss": "^3.4.1",
"typescript": "^5"
"typescript": "catalog:"
},
"packageManager": "pnpm@10.18.1+sha512.77a884a165cbba2d8d1c19e3b4880eee6d2fcabd0d879121e282196b80042351d5eb3ca0935fa599da1dc51265cc68816ad2bddd2a2de5ea9fdf92adbec7cd34"
}
29 changes: 15 additions & 14 deletions genai-cookbook/packages/recipes/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,26 @@
"./*": "./src/*"
},
"dependencies": {
"@ai-sdk/openai": "^2.0.23",
"@ai-sdk/react": "^2.0.30",
"@mantine/core": "^7.17.8",
"@mantine/dropzone": "^7.17.8",
"@mantine/hooks": "^7.17.8",
"@tabler/icons-react": "^3.34.1",
"ai": "^5.0.28",
"nanoid": "^5.1.5",
"openai": "^5.20.2",
"streamdown": "^1.3.0"
"@ai-sdk/openai": "catalog:",
"@ai-sdk/react": "catalog:",
"@mantine/core": "catalog:",
"@mantine/dropzone": "catalog:",
"@mantine/hooks": "catalog:",
"@tabler/icons-react": "catalog:",
"ai": "catalog:",
"nanoid": "catalog:",
"openai": "catalog:",
"pretty-ms": "catalog:",
"streamdown": "catalog:"
},
"peerDependencies": {
"react": "^18",
"react-dom": "^18"
},
"devDependencies": {
"@types/node": "^20",
"@types/react": "^18",
"@types/react-dom": "^18",
"typescript": "^5"
"@types/node": "catalog:",
"@types/react": "catalog:",
"@types/react-dom": "catalog:",
"typescript": "catalog:"
}
}
129 changes: 105 additions & 24 deletions genai-cookbook/packages/recipes/src/image-captioning/api.ts
Original file line number Diff line number Diff line change
@@ -1,42 +1,123 @@
import { generateText } from 'ai'
import { streamText } from 'ai'
import { RecipeContext } from '../types'
import { createOpenAI } from '@ai-sdk/openai'

/*
* The captioning API mirrors our multi-turn chat route but returns a single
* string instead of a streaming response. Because Modular MAX speaks the
* OpenAI-compatible protocol, the Vercel AI SDK can works with Modular MAX
* out of the box.
* Image Captioning API with NDJSON Streaming and Performance Metrics
*
* This API demonstrates progressive response streaming using NDJSON (newline-delimited JSON).
* Instead of waiting for all captions to complete, we stream each result as it's generated,
* providing immediate feedback to users along with detailed performance metrics.
*
* Key concepts:
* - NDJSON format: One JSON object per line, easy to parse progressively
* - Parallel processing: All images caption simultaneously for speed
* - Stream-as-you-go: Results appear in the UI the moment they're ready
* - Performance tracking: TTFT (time to first token) and duration (generation time) per image
* - OpenAI-compatible: Works with Modular MAX or any OpenAI-compatible server
*
* Timing metrics explained:
* - TTFT: Time from request start to first token (measures latency)
* - Duration: Time from first token to completion (measures generation speed)
*/

// ============================================================================
// POST /api — generates an image caption
// ============================================================================
export default async function POST(req: Request, context: RecipeContext) {
const { apiKey, baseUrl, modelName } = context
const { messages } = await req.json()
if (!messages) {
return new Response('Client did not provide messages', { status: 400 })
const body = await req.json()

const isBatch = Array.isArray(body.batch)

if (!isBatch && !body.messages) {
return new Response('Client did not provide messages or batch', { status: 400 })
}

// Use the the Vercel AI SDK to connect to the MAX endpoint
try {
// createOpenAI returns an OpenAI-compatible client
// The Vercel AI SDK's createOpenAI works with any OpenAI-compatible endpoint
const client = createOpenAI({ baseURL: baseUrl, apiKey })

// chat(modelName) works with LLM servers like MAX that
// implement the chat-completions format
const model = client.chat(modelName)

// Finally, we call generateText to get a caption for our images
const { text } = await generateText({
// The recipe UI creates messages in the ModelMessage format,
// so converting from UIMessage to ModelMessage is unnecessary
model: model,
messages: messages,
})
if (isBatch) {
// NDJSON streaming: send results progressively as they complete
const encoder = new TextEncoder()
const stream = new ReadableStream({
async start(controller) {
try {
// Process all images in parallel using Promise.all
// As each caption completes, we immediately stream it to the client
await Promise.all(
body.batch.map(async (item: { imageId: string; messages: any }) => {
try {
const startTime = Date.now()
let firstTokenTime: number | null = null
let ttft: number | null = null
let textChunks: string[] = []

// Use streamText (not generateText) to capture timing metrics
const result = streamText({
model: model,
messages: item.messages,
})

// Consume the stream chunk-by-chunk to collect text and timing
for await (const chunk of result.textStream) {
// Capture TTFT: time from request start to first token
if (ttft === null) {
firstTokenTime = Date.now()
ttft = firstTokenTime - startTime
}
textChunks.push(chunk)
}

// Duration: time from first token to completion (not total time)
const duration = firstTokenTime ? Date.now() - firstTokenTime : null
const text = textChunks.join('')

// Stream result as NDJSON: one JSON object per line with metrics
const line = JSON.stringify({
imageId: item.imageId,
text,
ttft,
duration
}) + '\n'
controller.enqueue(encoder.encode(line))
} catch (error) {
// Send errors per-image so UI can show partial results
const errorMessage = error instanceof Error ? error.message : 'Unknown error'
const line = JSON.stringify({
imageId: item.imageId,
error: errorMessage
}) + '\n'
controller.enqueue(encoder.encode(line))
}
})
)

controller.close()
} catch (error) {
controller.error(error)
}
},
})

return new Response(stream, {
headers: {
'Content-Type': 'application/x-ndjson',
},
})
} else {
// Single caption request: stream and collect text
const result = streamText({
model: model,
messages: body.messages,
})

let textChunks: string[] = []
for await (const chunk of result.textStream) {
textChunks.push(chunk)
}

return Response.json({ text })
return Response.json({ text: textChunks.join('') })
}
} catch (error) {
const errorMessage = error instanceof Error ? `(${error.message})` : ''
return new Response(`Failed to generate caption ${errorMessage}`, {
Expand Down
Loading