bug-ops · bug-ops · Feb 27, 2026 · Feb 26, 2026 · Feb 27, 2026 · Feb 27, 2026
diff --git a/crates/zeph-acp/src/agent.rs b/crates/zeph-acp/src/agent.rs
@@ -528,7 +528,10 @@ impl acp::Agent for ZephAcpAgent {
         }
 
         let trimmed_text = text.trim_start();
-        if trimmed_text.starts_with('/') && trimmed_text != "/compact" {
+        if trimmed_text.starts_with('/')
+            && trimmed_text != "/compact"
+            && trimmed_text != "/model refresh"
+        {
             return self
                 .handle_slash_command(&args.session_id, trimmed_text)
                 .await;

diff --git a/crates/zeph-core/src/agent/mod.rs b/crates/zeph-core/src/agent/mod.rs
@@ -458,6 +458,7 @@ impl<C: Channel> Agent<C> {
                 self.channel
                     .send(&format!("Cleared {n} queued messages."))
                     .await?;
+                let _ = self.channel.flush_chunks().await;
                 continue;
             }
 
@@ -474,16 +475,19 @@ impl<C: Channel> Agent<C> {
                 } else {
                     let _ = self.channel.send("Nothing to compact.").await;
                 }
+                let _ = self.channel.flush_chunks().await;
                 continue;
             }
 
             if trimmed == "/clear" {
                 self.clear_history();
+                let _ = self.channel.flush_chunks().await;
                 continue;
             }
 
             if trimmed == "/model" || trimmed.starts_with("/model ") {
                 self.handle_model_command(trimmed).await;
+                let _ = self.channel.flush_chunks().await;
                 continue;
             }
 

diff --git a/crates/zeph-llm/src/any.rs b/crates/zeph-llm/src/any.rs
@@ -105,20 +105,7 @@ impl AnyProvider {
                     })
                     .collect())
             }
-            AnyProvider::Orchestrator(p) => {
-                tracing::debug!(
-                    "list_models_remote: Orchestrator falling back to sync list_models (config-time data)"
-                );
-                Ok(p.list_models()
-                    .into_iter()
-                    .map(|id| crate::model_cache::RemoteModelInfo {
-                        display_name: id.clone(),
-                        id,
-                        context_window: None,
-                        created_at: None,
-                    })
-                    .collect())
-            }
+            AnyProvider::Orchestrator(p) => p.list_models_remote().await,
             #[cfg(feature = "candle")]
             AnyProvider::Candle(_) => Ok(vec![]),
             #[cfg(feature = "mock")]

diff --git a/src/acp.rs b/src/acp.rs
@@ -329,33 +329,133 @@ async fn spawn_acp_agent(
 
 /// Collect model keys from config when `acp.available_models` is not set.
 ///
-/// Each key uses `"{provider_name}:{model}"` format matching the provider factory.
+/// For each configured provider the disk cache is consulted first (24 h TTL).
+/// When the cache is warm the full remote model list is returned; otherwise the
+/// single model from config is used as the fallback so startup is never blocked
+/// on network I/O.  Call `/model refresh` at runtime to populate the caches.
+///
+/// Each key uses `"{provider_name}:{model_id}"` format matching the provider factory.
 #[cfg(feature = "acp")]
 fn discover_models_from_config(config: &zeph_core::config::Config) -> Vec<String> {
+    use zeph_llm::model_cache::ModelCache;
+
+    /// Expand a provider slug using its on-disk cache, or fall back to `fallback`.
+    fn expand_from_cache(slug: &str, fallback: &str) -> Vec<String> {
+        let cache = ModelCache::for_slug(slug);
+        if !cache.is_stale()
+            && let Ok(Some(entries)) = cache.load()
+            && !entries.is_empty()
+        {
+            return entries
+                .into_iter()
+                .map(|m| format!("{slug}:{}", m.id))
+                .collect();
+        }
+        vec![format!("{slug}:{fallback}")]
+    }
+
     let mut models: Vec<String> = Vec::new();
-    models.push(format!("ollama:{}", config.llm.model));
-    if config.secrets.claude_api_key.is_some() {
-        let model = config
+
+    if config.llm.provider == zeph_core::config::ProviderKind::Orchestrator {
+        // Orchestrator: enumerate sub-providers and use their own cache/fallback.
+        if let Some(ref orch) = config.llm.orchestrator {
+            for sub in orch.providers.values() {
+                let slug = sub.provider_type.as_str();
+                let fallback = sub.model.as_deref().unwrap_or("unknown");
+                models.extend(expand_from_cache(slug, fallback));
+            }
+        }
+    } else {
+        // Single provider — use top-level llm section.
+        models.extend(expand_from_cache("ollama", &config.llm.model));
+    }
+
+    // Claude — always add when API key present, even under orchestrator.
+    if config.secrets.claude_api_key.is_some()
+        && config.llm.provider != zeph_core::config::ProviderKind::Orchestrator
+    {
+        let fallback = config
             .llm
             .cloud
             .as_ref()
             .map_or("claude-sonnet-4-5", |c| c.model.as_str());
-        models.push(format!("claude:{model}"));
+        models.extend(expand_from_cache("claude", fallback));
     }
-    if let (Some(_), Some(openai_cfg)) = (&config.secrets.openai_api_key, &config.llm.openai) {
-        models.push(format!("openai:{}", openai_cfg.model));
+
+    // OpenAI — only when API key and config section are present (non-orchestrator).
+    if config.llm.provider != zeph_core::config::ProviderKind::Orchestrator
+        && let (Some(_), Some(openai_cfg)) = (&config.secrets.openai_api_key, &config.llm.openai)
+    {
+        models.extend(expand_from_cache("openai", &openai_cfg.model));
     }
+
+    // Compatible providers.
     if let Some(ref entries) = config.llm.compatible {
         for entry in entries {
             if config.secrets.compatible_api_keys.contains_key(&entry.name) {
-                models.push(format!("{}:{}", entry.name, entry.model));
+                models.extend(expand_from_cache(&entry.name, &entry.model));
             }
         }
     }
+
     models.dedup();
     models
 }
 
+/// Populate model caches for all providers before the ACP server starts.
+///
+/// Uses a 5-second timeout so that a slow or unavailable provider does not block startup.
+/// After a successful fetch, each unique provider slug present in `acp_available_models`
+/// is expanded from its on-disk cache, replacing the single config-time fallback entry.
+#[cfg(feature = "acp")]
+async fn warm_model_caches(deps: &mut AgentDeps) {
+    use zeph_llm::model_cache::ModelCache;
+
+    let provider = deps.provider.clone();
+    let fetch = async move {
+        match provider.list_models_remote().await {
+            Ok(models) => tracing::debug!(count = models.len(), "model cache warmed"),
+            Err(e) => tracing::debug!(error = %e, "model cache warm-up failed"),
+        }
+    };
+
+    if tokio::time::timeout(std::time::Duration::from_secs(5), fetch)
+        .await
+        .is_err()
+    {
+        tracing::debug!("model cache warm-up timed out; using config fallback");
+        return;
+    }
+
+    // Collect unique provider slugs from the current available_models list.
+    let slugs: Vec<String> = deps
+        .acp_available_models
+        .iter()
+        .filter_map(|k| k.split_once(':').map(|(s, _)| s.to_owned()))
+        .collect::<std::collections::HashSet<_>>()
+        .into_iter()
+        .collect();
+
+    for slug in slugs {
+        let cache = ModelCache::for_slug(&slug);
+        if cache.is_stale() {
+            continue;
+        }
+        if let Ok(Some(entries)) = cache.load()
+            && !entries.is_empty()
+        {
+            let new_keys: Vec<String> = entries
+                .into_iter()
+                .map(|m| format!("{slug}:{}", m.id))
+                .collect();
+            deps.acp_available_models
+                .retain(|k| !k.starts_with(&format!("{slug}:")));
+            deps.acp_available_models.extend(new_keys);
+        }
+    }
+    deps.acp_available_models.dedup();
+}
+
 /// Build a `ProviderFactory` from the known named providers in config.
 ///
 /// Each available model key is `"{provider_name}:{model}"`.
@@ -524,6 +624,10 @@ pub(crate) async fn run_acp_server(
     let (mut deps, _keepalive) =
         build_acp_deps(config_path, vault_backend, vault_key, vault_path).await?;
 
+    // Warm model caches before advertising available_models to the ACP client.
+    // A 5-second budget is given; on timeout the fallback list from config is used.
+    warm_model_caches(&mut deps).await;
+
     let mcp_manager_for_acp = Arc::clone(&deps.mcp_manager);
     let server_config = zeph_acp::AcpServerConfig {
         agent_name: deps.acp_agent_name.clone(),
@@ -579,6 +683,8 @@ pub(crate) async fn run_acp_http_server(
     let (mut deps, _keepalive) =
         build_acp_deps(config_path, vault_backend, vault_key, vault_path).await?;
 
+    warm_model_caches(&mut deps).await;
+
     let bind_addr = bind_override.map_or_else(|| "127.0.0.1:9800".to_owned(), str::to_owned);
 
     // CLI flag overrides config/env values for auth token.