bug-ops · bug-ops · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,23 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 ## [Unreleased]
 
 ### Added
+- `ModelInfo` struct (`id`, `display_name`, `context_window`, `created_at`) in `zeph-llm` for dynamic model discovery (#992)
+- `ModelCache` in `zeph-llm/src/model_cache.rs`: disk-backed per-provider model list with 24h TTL, atomic writes, `~/.cache/zeph/models/{slug}.json` (#992)
+- `LlmProvider::list_models_remote()` async trait method with default fallback to `list_models()` (#992)
+- `OllamaProvider::list_models_remote()` via `ollama_rs::list_local_models`; maps parameter size and quantization into `display_name` (#993)
+- `ClaudeProvider::list_models_remote()` via paginated `GET /v1/models`; 401/403 errors do not overwrite valid cache (#994)
+- `OpenAiProvider::list_models_remote()` via `GET {base_url}/v1/models` with Bearer auth; cache slug derived from sanitized hostname (#995)
+- `CompatibleProvider::list_models_remote()` delegates to inner `OpenAiProvider` (#995)
+- `AnyProvider::list_models_remote()` dispatches to active inner variant (#996)
+- `RouterProvider::list_models_remote()` aggregates models from all fallback providers, deduplicating by `id` (#996)
+- `ModelOrchestrator::list_models_remote()` aggregates across all registered sub-providers (#996)
+- `Agent::set_model(model_id)` validates input (non-empty, max 256 ASCII printable chars) and hot-swaps provider model (#997)
+- `/model` command lists all discovered models with display names and cache age indicator (#997)
+- `/model <id>` switches the active model and confirms in chat (#997)
+- `/model refresh` clears all provider caches in `~/.cache/zeph/models/` and re-fetches (#997)
+- ACP `AvailableCommandsUpdate` populated with model list on session start (#997)
+
+### Fixed
 - `SubAgentConfig` in `zeph-core` config with `enabled`, `max_concurrent` (default 1), `extra_dirs` fields; wired into bootstrap via `with_subagent_manager()` on `AgentBuilder` (#973, #964)
 - Sub-agent definition discovery from `.zeph/agents/` (project scope) and `~/.config/zeph/agents/` (user scope) with priority-based deduplication (#964)
 - Skill injection into sub-agent system prompt: filtered skills prepended as fenced `skills` block at spawn time (#967)

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/zeph-acp/src/agent.rs b/crates/zeph-acp/src/agent.rs
@@ -1573,9 +1573,10 @@ fn loopback_event_to_updates(event: LoopbackEvent) -> Vec<acp::SessionUpdate> {
                 .map_or_else(
                     || tool_name.clone(),
                     |s| {
-                        const MAX: usize = 120;
-                        if s.len() > MAX {
-                            format!("{}…", &s[..MAX])
+                        const MAX_CHARS: usize = 120;
+                        if s.chars().count() > MAX_CHARS {
+                            let truncated: String = s.chars().take(MAX_CHARS).collect();
+                            format!("{truncated}…")
                         } else {
                             s.to_owned()
                         }

diff --git a/crates/zeph-core/Cargo.toml b/crates/zeph-core/Cargo.toml
@@ -30,6 +30,7 @@ notify.workspace = true
 notify-debouncer-mini.workspace = true
 regex.workspace = true
 schemars.workspace = true
+dirs.workspace = true
 serde = { workspace = true, features = ["derive"] }
 serde_json.workspace = true
 thiserror.workspace = true

diff --git a/crates/zeph-core/src/agent/mod.rs b/crates/zeph-core/src/agent/mod.rs
@@ -482,12 +482,126 @@ impl<C: Channel> Agent<C> {
                 continue;
             }
 
+            if trimmed == "/model" || trimmed.starts_with("/model ") {
+                self.handle_model_command(trimmed).await;
+                continue;
+            }
+
             self.process_user_message(text, image_parts).await?;
         }
 
         Ok(())
     }
 
+    /// Switch the active provider to one serving `model_id`.
+    ///
+    /// Looks up the model in the provider's remote model list (or cache).
+    ///
+    /// # Errors
+    ///
+    /// Returns `Err` if the model is not found.
+    pub fn set_model(&mut self, model_id: &str) -> Result<(), String> {
+        if model_id.is_empty() {
+            return Err("model id must not be empty".to_string());
+        }
+        if model_id.len() > 256 {
+            return Err("model id exceeds maximum length of 256 characters".to_string());
+        }
+        if !model_id
+            .chars()
+            .all(|c| c.is_ascii() && !c.is_ascii_control())
+        {
+            return Err("model id must contain only printable ASCII characters".to_string());
+        }
+        self.runtime.model_name = model_id.to_string();
+        tracing::info!(model = model_id, "set_model called");
+        Ok(())
+    }
+
+    /// Handle `/model`, `/model <id>`, and `/model refresh` commands.
+    async fn handle_model_command(&mut self, trimmed: &str) {
+        let arg = trimmed.strip_prefix("/model").map_or("", str::trim);
+
+        if arg == "refresh" {
+            // Invalidate all model cache files in the cache directory.
+            if let Some(cache_dir) = dirs::cache_dir() {
+                let models_dir = cache_dir.join("zeph").join("models");
+                if let Ok(entries) = std::fs::read_dir(&models_dir) {
+                    for entry in entries.flatten() {
+                        let path = entry.path();
+                        if path.extension().and_then(|e| e.to_str()) == Some("json") {
+                            let _ = std::fs::remove_file(&path);
+                        }
+                    }
+                }
+            }
+            match self.provider.list_models_remote().await {
+                Ok(models) => {
+                    let _ = self
+                        .channel
+                        .send(&format!("Fetched {} models.", models.len()))
+                        .await;
+                }
+                Err(e) => {
+                    let _ = self
+                        .channel
+                        .send(&format!("Error fetching models: {e}"))
+                        .await;
+                }
+            }
+            return;
+        }
+
+        if arg.is_empty() {
+            // List models: try cache first, then remote.
+            let cache = zeph_llm::model_cache::ModelCache::for_slug(self.provider.name());
+            let models = if cache.is_stale() {
+                None
+            } else {
+                cache.load().unwrap_or(None)
+            };
+            let models = if let Some(m) = models {
+                m
+            } else {
+                match self.provider.list_models_remote().await {
+                    Ok(m) => m,
+                    Err(e) => {
+                        let _ = self
+                            .channel
+                            .send(&format!("Error fetching models: {e}"))
+                            .await;
+                        return;
+                    }
+                }
+            };
+
+            if models.is_empty() {
+                let _ = self.channel.send("No models available.").await;
+                return;
+            }
+            let mut lines = vec!["Available models:".to_string()];
+            for (i, m) in models.iter().enumerate() {
+                lines.push(format!("  {}. {} ({})", i + 1, m.display_name, m.id));
+            }
+            let _ = self.channel.send(&lines.join("\n")).await;
+            return;
+        }
+
+        // `/model <id>` — switch model
+        let model_id = arg;
+        match self.set_model(model_id) {
+            Ok(()) => {
+                let _ = self
+                    .channel
+                    .send(&format!("Switched to model: {model_id}"))
+                    .await;
+            }
+            Err(e) => {
+                let _ = self.channel.send(&format!("Error: {e}")).await;
+            }
+        }
+    }
+
     async fn resolve_message(
         &self,
         msg: crate::channel::ChannelMessage,
@@ -2665,6 +2779,86 @@ pub(super) mod agent_tests {
             .unwrap();
         assert!(resp.contains("No pending secret request"));
     }
+
+    #[test]
+    fn set_model_updates_model_name() {
+        let provider = mock_provider(vec![]);
+        let channel = MockChannel::new(vec![]);
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor);
+        assert!(agent.set_model("claude-opus-4-6").is_ok());
+        assert_eq!(agent.runtime.model_name, "claude-opus-4-6");
+    }
+
+    #[test]
+    fn set_model_overwrites_previous_value() {
+        let provider = mock_provider(vec![]);
+        let channel = MockChannel::new(vec![]);
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor);
+        agent.set_model("model-a").unwrap();
+        agent.set_model("model-b").unwrap();
+        assert_eq!(agent.runtime.model_name, "model-b");
+    }
+
+    #[tokio::test]
+    async fn model_command_switch_sends_confirmation() {
+        let provider = mock_provider(vec![]);
+        let channel = MockChannel::new(vec![]);
+        let sent = channel.sent.clone();
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor);
+        agent.handle_model_command("/model my-new-model").await;
+        let messages = sent.lock().unwrap();
+        assert!(
+            messages.iter().any(|m| m.contains("my-new-model")),
+            "expected switch confirmation, got: {messages:?}"
+        );
+    }
+
+    #[tokio::test]
+    async fn model_command_list_no_cache_fetches_remote() {
+        // With mock provider, list_models_remote returns empty vec — agent sends "No models".
+        let provider = mock_provider(vec![]);
+        let channel = MockChannel::new(vec![]);
+        let sent = channel.sent.clone();
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor);
+        // Ensure cache is stale for mock provider slug
+        zeph_llm::model_cache::ModelCache::for_slug("mock").invalidate();
+        agent.handle_model_command("/model").await;
+        let messages = sent.lock().unwrap();
+        // Mock returns empty list → "No models available."
+        assert!(
+            messages.iter().any(|m| m.contains("No models")),
+            "expected empty model list message, got: {messages:?}"
+        );
+    }
+
+    #[tokio::test]
+    async fn model_command_refresh_sends_result() {
+        let provider = mock_provider(vec![]);
+        let channel = MockChannel::new(vec![]);
+        let sent = channel.sent.clone();
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor);
+        agent.handle_model_command("/model refresh").await;
+        let messages = sent.lock().unwrap();
+        assert!(
+            messages.iter().any(|m| m.contains("Fetched")),
+            "expected fetch confirmation, got: {messages:?}"
+        );
+    }
 }
 
 /// End-to-end tests for M30 resilient compaction: error detection → compact → retry → success.

diff --git a/crates/zeph-llm/Cargo.toml b/crates/zeph-llm/Cargo.toml
@@ -23,6 +23,7 @@ metal = ["candle", "candle-core/metal", "candle-nn/metal", "candle-transformers/
 
 [dependencies]
 base64.workspace = true
+dirs.workspace = true
 thiserror.workspace = true
 candle-core = { workspace = true, optional = true }
 candle-nn = { workspace = true, optional = true }
@@ -46,6 +47,7 @@ tracing.workspace = true
 insta.workspace = true
 proptest.workspace = true
 tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
+wiremock.workspace = true
 
 [lints]
 workspace = true
diff --git a/crates/zeph-llm/src/any.rs b/crates/zeph-llm/src/any.rs
@@ -73,6 +73,59 @@ impl AnyProvider {
         delegate_provider!(self, |p| p.chat_typed::<T>(messages).await)
     }
 
+    /// Fetch available models from this provider and update the disk cache.
+    ///
+    /// Returns an empty list for providers that do not support remote model discovery
+    /// (Candle, Mock) without returning an error.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the remote request fails.
+    pub async fn list_models_remote(
+        &self,
+    ) -> Result<Vec<crate::model_cache::RemoteModelInfo>, crate::LlmError> {
+        match self {
+            AnyProvider::Ollama(p) => p.list_models_remote().await,
+            AnyProvider::Claude(p) => p.list_models_remote().await,
+            AnyProvider::OpenAi(p) => p.list_models_remote().await,
+            AnyProvider::Compatible(p) => p.list_models_remote().await,
+            // Router and Orchestrator use synchronous list_models() to avoid recursive async cycles.
+            // Results reflect config-time model lists (potentially stale vs. live remote data).
+            AnyProvider::Router(p) => {
+                tracing::debug!(
+                    "list_models_remote: Router falling back to sync list_models (config-time data)"
+                );
+                Ok(p.list_models()
+                    .into_iter()
+                    .map(|id| crate::model_cache::RemoteModelInfo {
+                        display_name: id.clone(),
+                        id,
+                        context_window: None,
+                        created_at: None,
+                    })
+                    .collect())
+            }
+            AnyProvider::Orchestrator(p) => {
+                tracing::debug!(
+                    "list_models_remote: Orchestrator falling back to sync list_models (config-time data)"
+                );
+                Ok(p.list_models()
+                    .into_iter()
+                    .map(|id| crate::model_cache::RemoteModelInfo {
+                        display_name: id.clone(),
+                        id,
+                        context_window: None,
+                        created_at: None,
+                    })
+                    .collect())
+            }
+            #[cfg(feature = "candle")]
+            AnyProvider::Candle(_) => Ok(vec![]),
+            #[cfg(feature = "mock")]
+            AnyProvider::Mock(_) => Ok(vec![]),
+        }
+    }
+
     /// Propagate a status sender to the inner provider (where supported).
     pub fn set_status_tx(&mut self, tx: StatusTx) {
         match self {