human-bee · human-bee · Feb 26, 2026
diff --git a/docs/voice-agent-noisy-tuning.md b/docs/voice-agent-noisy-tuning.md
@@ -7,6 +7,9 @@ This guide documents the runtime tuning path used by `src/lib/agents/realtime/vo
 - Config source: `src/lib/agents/realtime/voice-agent/config.ts`
 - Runtime guards: `src/lib/agents/realtime/voice-agent/runtime-guards.ts`
 - Realtime model wiring:
+  - `modelTransport` (`realtime` | `responses_ws`)
+  - `resolvedRealtimeModel` (adaptive profile aware)
+  - `resolvedResponsesModel`
   - `inputAudioTranscription`
   - `inputAudioNoiseReduction`
   - `turnDetection`
@@ -17,6 +20,12 @@ This guide documents the runtime tuning path used by `src/lib/agents/realtime/vo
 ## Primary Knobs
 
 - `VOICE_AGENT_TRANSCRIPTION_ENABLED`
+- `VOICE_AGENT_MODEL_TRANSPORT` (`realtime` | `responses_ws`)
+- `VOICE_AGENT_RESPONSES_MODEL` (defaults to `gpt-audio-1.5`)
+- `VOICE_AGENT_REALTIME_MODEL_STRATEGY` (`fixed` | `adaptive_profile`)
+- `VOICE_AGENT_REALTIME_MODEL_PRIMARY`
+- `VOICE_AGENT_REALTIME_MODEL_SECONDARY`
+- `VOICE_AGENT_REALTIME_MODEL` (explicit override)
 - `VOICE_AGENT_INPUT_TRANSCRIPTION_MODEL`
 - `VOICE_AGENT_TRANSCRIPTION_LANGUAGE`
 - `VOICE_AGENT_TURN_DETECTION`

diff --git a/example.env.local b/example.env.local
@@ -79,6 +79,12 @@ AGENT_LLM_MODEL=gpt-4.1                                         # (Optional) LLM
 AGENT_STT_MODEL=your-stt-model-name                             # (Optional) STT model for agent (e.g., whisper-large-v3-turbo)
 AGENT_TTS_MODEL=your-tts-model-name                             # (Optional) TTS model for agent (e.g., tts-1)
 AGENT_STT_LANGUAGE=en                                           # (Optional) STT language (e.g., en)
+VOICE_AGENT_MODEL_TRANSPORT=responses_ws                        # (Optional) realtime | responses_ws
+VOICE_AGENT_RESPONSES_MODEL=gpt-audio-1.5                       # (Optional) Responses WS model for tool-calling turns
+VOICE_AGENT_REALTIME_MODEL_STRATEGY=adaptive_profile            # (Optional) fixed | adaptive_profile
+VOICE_AGENT_REALTIME_MODEL_PRIMARY=gpt-realtime-1.5             # (Optional) Primary realtime model for adaptive profile
+VOICE_AGENT_REALTIME_MODEL_SECONDARY=gpt-realtime-mini          # (Optional) Secondary realtime model for adaptive profile
+VOICE_AGENT_REALTIME_MODEL=                                     # (Optional) Explicit realtime model override (wins over strategy/primary/secondary)
 VOICE_AGENT_INPUT_TRANSCRIPTION_MODEL=gpt-4o-mini-transcribe   # (Optional) Voice agent STT model; falls back to AGENT_STT_MODEL
 VOICE_AGENT_TRANSCRIPTION_ENABLED=true                         # (Optional) Enable realtime STT (defaults to true when model provided and multi-participant mode is off)
 VOICE_AGENT_TRANSCRIPTION_LANGUAGE=en                          # (Optional) Language hint for realtime transcription (defaults to AGENT_STT_LANGUAGE)

diff --git a/src/app/settings/models/_lib/guided-config.ts b/src/app/settings/models/_lib/guided-config.ts
@@ -1,7 +1,15 @@
 import type { GuidedField, GuidedSection, ResolvedFieldSource } from './types';
 
 const MODEL_SUGGESTIONS = {
-  openai: ['gpt-realtime', 'gpt-5-mini', 'gpt-4o-mini-transcription', 'whisper-1'],
+  openai: [
+    'gpt-realtime-1.5',
+    'gpt-realtime-mini',
+    'gpt-audio-1.5',
+    'gpt-audio-mini',
+    'gpt-5-mini',
+    'gpt-4o-mini-transcription',
+    'whisper-1',
+  ],
   anthropic: ['claude-haiku-4-5', 'claude-sonnet-4-5'],
   google: ['gemini-2.5-pro', 'gemini-2.5-flash', 'gemini-3-pro-image-preview'],
   cerebras: ['llama3.3-70b', 'gpt-oss-120b', 'qwen3-32b', 'llama3.1-8b'],
@@ -28,6 +36,20 @@ export const GUIDED_SECTIONS: GuidedSection[] = [
         help: 'Used by realtime voice AgentSession model constructor.',
         suggestions: MODEL_SUGGESTIONS.openai,
       },
+      {
+        path: 'models.voiceRealtimePrimary',
+        label: 'Voice Realtime Primary Model',
+        kind: 'string',
+        help: 'Primary voice realtime model (used for full profile in adaptive mode).',
+        suggestions: MODEL_SUGGESTIONS.openai,
+      },
+      {
+        path: 'models.voiceRealtimeSecondary',
+        label: 'Voice Realtime Secondary Model',
+        kind: 'string',
+        help: 'Secondary voice realtime model (used for lite profile in adaptive mode).',
+        suggestions: MODEL_SUGGESTIONS.openai,
+      },
       {
         path: 'models.voiceRouter',
         label: 'Voice Router Model',
@@ -204,6 +226,16 @@ export const GUIDED_SECTIONS: GuidedSection[] = [
         ],
         help: 'Realtime turn detection mode.',
       },
+      {
+        path: 'knobs.voice.realtimeModelStrategy',
+        label: 'Realtime Model Strategy',
+        kind: 'enum',
+        options: [
+          { label: 'Adaptive Profile', value: 'adaptive_profile' },
+          { label: 'Fixed', value: 'fixed' },
+        ],
+        help: 'Choose model by capability profile (`adaptive_profile`) or use fixed realtime model.',
+      },
       {
         path: 'knobs.voice.inputNoiseReduction',
         label: 'Input Noise Reduction',
@@ -325,8 +357,11 @@ const RESTART_REQUIRED_PREFIXES = [
 const NEXT_SESSION_PREFIXES = [
   'knobs.conductor.roomConcurrency',
   'models.voiceRealtime',
+  'models.voiceRealtimePrimary',
+  'models.voiceRealtimeSecondary',
   'models.voiceRouter',
   'models.voiceStt',
+  'knobs.voice.realtimeModelStrategy',
 ];
 
 export const ALL_GUIDED_FIELDS = GUIDED_SECTIONS.flatMap((section) => section.fields);

diff --git a/src/app/settings/models/reference/page.tsx b/src/app/settings/models/reference/page.tsx
@@ -20,6 +20,18 @@ const modelRows: Row[] = [
     rangeOrOptions: 'provider/model id',
     notes: 'Realtime voice conversation model.',
   },
+  {
+    path: 'models.voiceRealtimePrimary',
+    type: 'string',
+    rangeOrOptions: 'provider/model id',
+    notes: 'Primary realtime model (adaptive full-profile target).',
+  },
+  {
+    path: 'models.voiceRealtimeSecondary',
+    type: 'string',
+    rangeOrOptions: 'provider/model id',
+    notes: 'Secondary realtime model (adaptive lite-profile target).',
+  },
   {
     path: 'models.voiceRouter',
     type: 'string',
@@ -143,6 +155,12 @@ const knobRows: Row[] = [
     rangeOrOptions: 'provider/model id',
     notes: 'Knob alias for realtime model.',
   },
+  {
+    path: 'knobs.voice.realtimeModelStrategy',
+    type: 'enum',
+    rangeOrOptions: 'fixed | adaptive_profile',
+    notes: 'Realtime model selection strategy.',
+  },
   {
     path: 'knobs.voice.routerModel',
     type: 'string',
@@ -274,8 +292,9 @@ const knobRows: Row[] = [
 const coverageRows = [
   {
     runtime: 'Realtime voice conversation',
-    coverage: 'covered',
-    keys: 'models.voiceRealtime, knobs.voice.*',
+    coverage: 'partial (transport/model env-only)',
+    keys:
+      'models.voiceRealtime, models.voiceRealtimePrimary, models.voiceRealtimeSecondary, knobs.voice.*, VOICE_AGENT_MODEL_TRANSPORT env, VOICE_AGENT_RESPONSES_MODEL env',
   },
   {
     runtime: 'Voice transcription',

diff --git a/src/components/tool-dispatcher/hooks/useToolRunner.ts b/src/components/tool-dispatcher/hooks/useToolRunner.ts
@@ -932,14 +932,14 @@ export function useToolRunner(options: UseToolRunnerOptions): ToolRunnerApi {
             typeof params?.intentId === 'string' && params.intentId.trim().length > 0
               ? params.intentId.trim()
               : requestId;
-          const idempotencyKey =
-            typeof params?.idempotency_key === 'string' && params.idempotency_key.trim().length > 0
-              ? params.idempotency_key.trim()
-              : `quick-${routeType}-${stableQuickHash(`${targetRoom}|${requestId}|${messageText}`)}`;
           const participantId =
             typeof params?.participant_id === 'string' && params.participant_id.trim().length > 0
               ? params.participant_id.trim()
               : undefined;
+          const idempotencyKey =
+            typeof params?.idempotency_key === 'string' && params.idempotency_key.trim().length > 0
+              ? params.idempotency_key.trim()
+              : `quick-${routeType}-${stableQuickHash(`${targetRoom}|${participantId || 'unknown'}|${requestId}|${messageText}`)}`;
           const participantViewport =
             participantId && participantViewportRef.current.has(participantId)
               ? participantViewportRef.current.get(participantId)?.viewport

diff --git a/src/lib/agents/control-plane/resolver.test.ts b/src/lib/agents/control-plane/resolver.test.ts
@@ -0,0 +1,65 @@
+jest.mock('./profiles', () => ({
+  getModelControlProfilesForResolution: jest.fn(async () => []),
+}));
+
+import { clearModelControlResolverCache, resolveModelControl } from './resolver';
+
+const ENV_KEYS = [
+  'VOICE_AGENT_REALTIME_MODEL',
+  'VOICE_AGENT_REALTIME_MODEL_PRIMARY',
+  'VOICE_AGENT_REALTIME_MODEL_SECONDARY',
+] as const;
+
+describe('model control resolver env defaults', () => {
+  const originalEnv: Record<string, string | undefined> = {
+    VOICE_AGENT_REALTIME_MODEL: process.env.VOICE_AGENT_REALTIME_MODEL,
+    VOICE_AGENT_REALTIME_MODEL_PRIMARY: process.env.VOICE_AGENT_REALTIME_MODEL_PRIMARY,
+    VOICE_AGENT_REALTIME_MODEL_SECONDARY: process.env.VOICE_AGENT_REALTIME_MODEL_SECONDARY,
+  };
+
+  afterEach(() => {
+    clearModelControlResolverCache();
+    for (const key of ENV_KEYS) {
+      const value = originalEnv[key];
+      if (typeof value === 'undefined') {
+        delete process.env[key];
+      } else {
+        process.env[key] = value;
+      }
+    }
+  });
+
+  it('does not force an explicit voiceRealtime model when only primary/secondary defaults apply', async () => {
+    delete process.env.VOICE_AGENT_REALTIME_MODEL;
+    process.env.VOICE_AGENT_REALTIME_MODEL_PRIMARY = 'gpt-realtime-1.5';
+    process.env.VOICE_AGENT_REALTIME_MODEL_SECONDARY = 'gpt-realtime-mini';
+
+    const resolved = await resolveModelControl(
+      {
+        task: 'voice.realtime',
+        room: 'room-a',
+        includeUserScope: false,
+      },
+      { skipCache: true },
+    );
+
+    expect(resolved.effective.models?.voiceRealtime).toBeUndefined();
+    expect(resolved.effective.models?.voiceRealtimePrimary).toBe('gpt-realtime-1.5');
+    expect(resolved.effective.models?.voiceRealtimeSecondary).toBe('gpt-realtime-mini');
+  });
+
+  it('preserves explicit voiceRealtime override when configured', async () => {
+    process.env.VOICE_AGENT_REALTIME_MODEL = 'gpt-realtime-1.5';
+
+    const resolved = await resolveModelControl(
+      {
+        task: 'voice.realtime',
+        room: 'room-a',
+        includeUserScope: false,
+      },
+      { skipCache: true },
+    );
+
+    expect(resolved.effective.models?.voiceRealtime).toBe('gpt-realtime-1.5');
+  });
+});
diff --git a/src/lib/agents/control-plane/resolver.ts b/src/lib/agents/control-plane/resolver.ts
@@ -27,8 +27,11 @@ const APPLY_MODE_BY_PATH: Array<{ prefix: string; mode: ApplyMode }> = [
   { prefix: 'knobs.conductor.taskRetryJitterRatio', mode: 'restart_required' },
   { prefix: 'knobs.conductor.roomConcurrency', mode: 'next_session' },
   { prefix: 'models.voiceRealtime', mode: 'next_session' },
+  { prefix: 'models.voiceRealtimePrimary', mode: 'next_session' },
+  { prefix: 'models.voiceRealtimeSecondary', mode: 'next_session' },
   { prefix: 'models.voiceRouter', mode: 'next_session' },
   { prefix: 'models.voiceStt', mode: 'next_session' },
+  { prefix: 'knobs.voice.realtimeModelStrategy', mode: 'next_session' },
 ];
 
 const deepMerge = <T extends Record<string, unknown>>(target: T, patch: Record<string, unknown>): T => {
@@ -131,7 +134,10 @@ const envDefaults = (): ModelControlPatch => {
     models: {
       canvasSteward: process.env.CANVAS_STEWARD_MODEL,
       voiceRouter: process.env.VOICE_AGENT_ROUTER_MODEL,
+      // Keep explicit override separate so adaptive primary/secondary selection remains effective by default.
       voiceRealtime: process.env.VOICE_AGENT_REALTIME_MODEL,
+      voiceRealtimePrimary: process.env.VOICE_AGENT_REALTIME_MODEL_PRIMARY || 'gpt-realtime-1.5',
+      voiceRealtimeSecondary: process.env.VOICE_AGENT_REALTIME_MODEL_SECONDARY || 'gpt-realtime-mini',
       voiceStt:
         process.env.VOICE_AGENT_STT_MODEL ||
         process.env.VOICE_AGENT_INPUT_TRANSCRIPTION_MODEL ||
@@ -166,6 +172,11 @@ const envDefaults = (): ModelControlPatch => {
             : process.env.VOICE_AGENT_TRANSCRIPTION_ENABLED === 'false'
               ? false
               : undefined,
+        realtimeModelStrategy:
+          process.env.VOICE_AGENT_REALTIME_MODEL_STRATEGY === 'fixed' ||
+          process.env.VOICE_AGENT_REALTIME_MODEL_STRATEGY === 'adaptive_profile'
+            ? process.env.VOICE_AGENT_REALTIME_MODEL_STRATEGY
+            : 'adaptive_profile',
         turnDetection:
           process.env.VOICE_AGENT_TURN_DETECTION === 'none' ||
           process.env.VOICE_AGENT_TURN_DETECTION === 'server_vad' ||

diff --git a/src/lib/agents/control-plane/schemas.ts b/src/lib/agents/control-plane/schemas.ts
@@ -28,6 +28,7 @@ export const voiceKnobPatchSchema = z
     transcriptionEnabled: z.boolean().optional(),
     sttModel: z.string().trim().min(1).max(120).optional(),
     realtimeModel: z.string().trim().min(1).max(120).optional(),
+    realtimeModelStrategy: z.enum(['fixed', 'adaptive_profile']).optional(),
     routerModel: z.string().trim().min(1).max(120).optional(),
     turnDetection: z.enum(['none', 'server_vad', 'semantic_vad']).optional(),
     inputNoiseReduction: z.enum(['none', 'near_field', 'far_field']).optional(),
@@ -79,6 +80,8 @@ export const modelControlModelsSchema = z
     canvasSteward: z.string().trim().min(1).max(120).optional(),
     voiceRouter: z.string().trim().min(1).max(120).optional(),
     voiceRealtime: z.string().trim().min(1).max(120).optional(),
+    voiceRealtimePrimary: z.string().trim().min(1).max(120).optional(),
+    voiceRealtimeSecondary: z.string().trim().min(1).max(120).optional(),
     voiceStt: z.string().trim().min(1).max(120).optional(),
     searchModel: z.string().trim().min(1).max(120).optional(),
     fastDefault: z.string().trim().min(1).max(120).optional(),

diff --git a/src/lib/agents/control-plane/types.ts b/src/lib/agents/control-plane/types.ts
@@ -25,6 +25,7 @@ export type VoiceKnobPatch = {
   transcriptionEnabled?: boolean;
   sttModel?: string;
   realtimeModel?: string;
+  realtimeModelStrategy?: 'fixed' | 'adaptive_profile';
   routerModel?: string;
   turnDetection?: 'none' | 'server_vad' | 'semantic_vad';
   inputNoiseReduction?: 'none' | 'near_field' | 'far_field';
@@ -62,6 +63,8 @@ export type ModelControlPatch = {
     canvasSteward?: string;
     voiceRouter?: string;
     voiceRealtime?: string;
+    voiceRealtimePrimary?: string;
+    voiceRealtimeSecondary?: string;
     voiceStt?: string;
     searchModel?: string;
     fastDefault?: string;

diff --git a/src/lib/agents/realtime/multi-participant-transcription.ts b/src/lib/agents/realtime/multi-participant-transcription.ts
@@ -20,6 +20,7 @@ export type LiveTranscriptionPayload = {
 export type MultiParticipantTranscriptionOptions = {
   room: Room;
   maxParticipants: number;
+  realtimeModel?: string;
   model: string;
   language?: string;
   inputAudioNoiseReduction?: RealtimeNoiseReductionOption | null;
@@ -61,6 +62,7 @@ const getSpeakerLabel = (participant: any): string => {
 export class MultiParticipantTranscriptionManager {
   private room: Room;
   private maxParticipants: number;
+  private realtimeModel?: string;
   private model: string;
   private language?: string;
   private inputAudioNoiseReduction?: RealtimeNoiseReductionOption | null;
@@ -76,6 +78,7 @@ export class MultiParticipantTranscriptionManager {
   constructor(options: MultiParticipantTranscriptionOptions) {
     this.room = options.room;
     this.maxParticipants = Math.max(1, Math.floor(options.maxParticipants));
+    this.realtimeModel = options.realtimeModel;
     this.model = options.model;
     this.language = options.language;
     this.inputAudioNoiseReduction = options.inputAudioNoiseReduction;
@@ -225,6 +228,7 @@ export class MultiParticipantTranscriptionManager {
     });
 
     const llm = new openaiRealtime.RealtimeModel({
+      ...(this.realtimeModel ? { model: this.realtimeModel } : {}),
       inputAudioTranscription: {
         model: this.model,
         ...(this.language ? { language: this.language } : {}),