Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,23 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
## [Unreleased]

### Added
- `ModelInfo` struct (`id`, `display_name`, `context_window`, `created_at`) in `zeph-llm` for dynamic model discovery (#992)
- `ModelCache` in `zeph-llm/src/model_cache.rs`: disk-backed per-provider model list with 24h TTL, atomic writes, `~/.cache/zeph/models/{slug}.json` (#992)
- `LlmProvider::list_models_remote()` async trait method with default fallback to `list_models()` (#992)
- `OllamaProvider::list_models_remote()` via `ollama_rs::list_local_models`; maps parameter size and quantization into `display_name` (#993)
- `ClaudeProvider::list_models_remote()` via paginated `GET /v1/models`; 401/403 errors do not overwrite valid cache (#994)
- `OpenAiProvider::list_models_remote()` via `GET {base_url}/v1/models` with Bearer auth; cache slug derived from sanitized hostname (#995)
- `CompatibleProvider::list_models_remote()` delegates to inner `OpenAiProvider` (#995)
- `AnyProvider::list_models_remote()` dispatches to active inner variant (#996)
- `RouterProvider::list_models_remote()` aggregates models from all fallback providers, deduplicating by `id` (#996)
- `ModelOrchestrator::list_models_remote()` aggregates across all registered sub-providers (#996)
- `Agent::set_model(model_id)` validates input (non-empty, max 256 ASCII printable chars) and hot-swaps provider model (#997)
- `/model` command lists all discovered models with display names and cache age indicator (#997)
- `/model <id>` switches the active model and confirms in chat (#997)
- `/model refresh` clears all provider caches in `~/.cache/zeph/models/` and re-fetches (#997)
- ACP `AvailableCommandsUpdate` populated with model list on session start (#997)

### Fixed
- `SubAgentConfig` in `zeph-core` config with `enabled`, `max_concurrent` (default 1), `extra_dirs` fields; wired into bootstrap via `with_subagent_manager()` on `AgentBuilder` (#973, #964)
- Sub-agent definition discovery from `.zeph/agents/` (project scope) and `~/.config/zeph/agents/` (user scope) with priority-based deduplication (#964)
- Skill injection into sub-agent system prompt: filtered skills prepended as fenced `skills` block at spawn time (#967)
Expand Down
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions crates/zeph-acp/src/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1573,9 +1573,10 @@ fn loopback_event_to_updates(event: LoopbackEvent) -> Vec<acp::SessionUpdate> {
.map_or_else(
|| tool_name.clone(),
|s| {
const MAX: usize = 120;
if s.len() > MAX {
format!("{}…", &s[..MAX])
const MAX_CHARS: usize = 120;
if s.chars().count() > MAX_CHARS {
let truncated: String = s.chars().take(MAX_CHARS).collect();
format!("{truncated}…")
} else {
s.to_owned()
}
Expand Down
1 change: 1 addition & 0 deletions crates/zeph-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ notify.workspace = true
notify-debouncer-mini.workspace = true
regex.workspace = true
schemars.workspace = true
dirs.workspace = true
serde = { workspace = true, features = ["derive"] }
serde_json.workspace = true
thiserror.workspace = true
Expand Down
194 changes: 194 additions & 0 deletions crates/zeph-core/src/agent/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -482,12 +482,126 @@ impl<C: Channel> Agent<C> {
continue;
}

if trimmed == "/model" || trimmed.starts_with("/model ") {
self.handle_model_command(trimmed).await;
continue;
}

self.process_user_message(text, image_parts).await?;
}

Ok(())
}

/// Switch the active provider to one serving `model_id`.
///
/// Looks up the model in the provider's remote model list (or cache).
///
/// # Errors
///
/// Returns `Err` if the model is not found.
pub fn set_model(&mut self, model_id: &str) -> Result<(), String> {
if model_id.is_empty() {
return Err("model id must not be empty".to_string());
}
if model_id.len() > 256 {
return Err("model id exceeds maximum length of 256 characters".to_string());
}
if !model_id
.chars()
.all(|c| c.is_ascii() && !c.is_ascii_control())
{
return Err("model id must contain only printable ASCII characters".to_string());
}
self.runtime.model_name = model_id.to_string();
tracing::info!(model = model_id, "set_model called");
Ok(())
}

/// Handle `/model`, `/model <id>`, and `/model refresh` commands.
async fn handle_model_command(&mut self, trimmed: &str) {
let arg = trimmed.strip_prefix("/model").map_or("", str::trim);

if arg == "refresh" {
// Invalidate all model cache files in the cache directory.
if let Some(cache_dir) = dirs::cache_dir() {
let models_dir = cache_dir.join("zeph").join("models");
if let Ok(entries) = std::fs::read_dir(&models_dir) {
for entry in entries.flatten() {
let path = entry.path();
if path.extension().and_then(|e| e.to_str()) == Some("json") {
let _ = std::fs::remove_file(&path);
}
}
}
}
match self.provider.list_models_remote().await {
Ok(models) => {
let _ = self
.channel
.send(&format!("Fetched {} models.", models.len()))
.await;
}
Err(e) => {
let _ = self
.channel
.send(&format!("Error fetching models: {e}"))
.await;
}
}
return;
}

if arg.is_empty() {
// List models: try cache first, then remote.
let cache = zeph_llm::model_cache::ModelCache::for_slug(self.provider.name());
let models = if cache.is_stale() {
None
} else {
cache.load().unwrap_or(None)
};
let models = if let Some(m) = models {
m
} else {
match self.provider.list_models_remote().await {
Ok(m) => m,
Err(e) => {
let _ = self
.channel
.send(&format!("Error fetching models: {e}"))
.await;
return;
}
}
};

if models.is_empty() {
let _ = self.channel.send("No models available.").await;
return;
}
let mut lines = vec!["Available models:".to_string()];
for (i, m) in models.iter().enumerate() {
lines.push(format!(" {}. {} ({})", i + 1, m.display_name, m.id));
}
let _ = self.channel.send(&lines.join("\n")).await;
return;
}

// `/model <id>` — switch model
let model_id = arg;
match self.set_model(model_id) {
Ok(()) => {
let _ = self
.channel
.send(&format!("Switched to model: {model_id}"))
.await;
}
Err(e) => {
let _ = self.channel.send(&format!("Error: {e}")).await;
}
}
}

async fn resolve_message(
&self,
msg: crate::channel::ChannelMessage,
Expand Down Expand Up @@ -2665,6 +2779,86 @@ pub(super) mod agent_tests {
.unwrap();
assert!(resp.contains("No pending secret request"));
}

#[test]
fn set_model_updates_model_name() {
let provider = mock_provider(vec![]);
let channel = MockChannel::new(vec![]);
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();

let mut agent = Agent::new(provider, channel, registry, None, 5, executor);
assert!(agent.set_model("claude-opus-4-6").is_ok());
assert_eq!(agent.runtime.model_name, "claude-opus-4-6");
}

#[test]
fn set_model_overwrites_previous_value() {
let provider = mock_provider(vec![]);
let channel = MockChannel::new(vec![]);
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();

let mut agent = Agent::new(provider, channel, registry, None, 5, executor);
agent.set_model("model-a").unwrap();
agent.set_model("model-b").unwrap();
assert_eq!(agent.runtime.model_name, "model-b");
}

#[tokio::test]
async fn model_command_switch_sends_confirmation() {
let provider = mock_provider(vec![]);
let channel = MockChannel::new(vec![]);
let sent = channel.sent.clone();
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();

let mut agent = Agent::new(provider, channel, registry, None, 5, executor);
agent.handle_model_command("/model my-new-model").await;
let messages = sent.lock().unwrap();
assert!(
messages.iter().any(|m| m.contains("my-new-model")),
"expected switch confirmation, got: {messages:?}"
);
}

#[tokio::test]
async fn model_command_list_no_cache_fetches_remote() {
// With mock provider, list_models_remote returns empty vec — agent sends "No models".
let provider = mock_provider(vec![]);
let channel = MockChannel::new(vec![]);
let sent = channel.sent.clone();
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();

let mut agent = Agent::new(provider, channel, registry, None, 5, executor);
// Ensure cache is stale for mock provider slug
zeph_llm::model_cache::ModelCache::for_slug("mock").invalidate();
agent.handle_model_command("/model").await;
let messages = sent.lock().unwrap();
// Mock returns empty list → "No models available."
assert!(
messages.iter().any(|m| m.contains("No models")),
"expected empty model list message, got: {messages:?}"
);
}

#[tokio::test]
async fn model_command_refresh_sends_result() {
let provider = mock_provider(vec![]);
let channel = MockChannel::new(vec![]);
let sent = channel.sent.clone();
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();

let mut agent = Agent::new(provider, channel, registry, None, 5, executor);
agent.handle_model_command("/model refresh").await;
let messages = sent.lock().unwrap();
assert!(
messages.iter().any(|m| m.contains("Fetched")),
"expected fetch confirmation, got: {messages:?}"
);
}
}

/// End-to-end tests for M30 resilient compaction: error detection → compact → retry → success.
Expand Down
2 changes: 2 additions & 0 deletions crates/zeph-llm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ metal = ["candle", "candle-core/metal", "candle-nn/metal", "candle-transformers/

[dependencies]
base64.workspace = true
dirs.workspace = true
thiserror.workspace = true
candle-core = { workspace = true, optional = true }
candle-nn = { workspace = true, optional = true }
Expand All @@ -46,6 +47,7 @@ tracing.workspace = true
insta.workspace = true
proptest.workspace = true
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
wiremock.workspace = true

[lints]
workspace = true
53 changes: 53 additions & 0 deletions crates/zeph-llm/src/any.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,59 @@ impl AnyProvider {
delegate_provider!(self, |p| p.chat_typed::<T>(messages).await)
}

/// Fetch available models from this provider and update the disk cache.
///
/// Returns an empty list for providers that do not support remote model discovery
/// (Candle, Mock) without returning an error.
///
/// # Errors
///
/// Returns an error if the remote request fails.
pub async fn list_models_remote(
&self,
) -> Result<Vec<crate::model_cache::RemoteModelInfo>, crate::LlmError> {
match self {
AnyProvider::Ollama(p) => p.list_models_remote().await,
AnyProvider::Claude(p) => p.list_models_remote().await,
AnyProvider::OpenAi(p) => p.list_models_remote().await,
AnyProvider::Compatible(p) => p.list_models_remote().await,
// Router and Orchestrator use synchronous list_models() to avoid recursive async cycles.
// Results reflect config-time model lists (potentially stale vs. live remote data).
AnyProvider::Router(p) => {
tracing::debug!(
"list_models_remote: Router falling back to sync list_models (config-time data)"
);
Ok(p.list_models()
.into_iter()
.map(|id| crate::model_cache::RemoteModelInfo {
display_name: id.clone(),
id,
context_window: None,
created_at: None,
})
.collect())
}
AnyProvider::Orchestrator(p) => {
tracing::debug!(
"list_models_remote: Orchestrator falling back to sync list_models (config-time data)"
);
Ok(p.list_models()
.into_iter()
.map(|id| crate::model_cache::RemoteModelInfo {
display_name: id.clone(),
id,
context_window: None,
created_at: None,
})
.collect())
}
#[cfg(feature = "candle")]
AnyProvider::Candle(_) => Ok(vec![]),
#[cfg(feature = "mock")]
AnyProvider::Mock(_) => Ok(vec![]),
}
}

/// Propagate a status sender to the inner provider (where supported).
pub fn set_status_tx(&mut self, tx: StatusTx) {
match self {
Expand Down
Loading
Loading