Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion crates/zeph-acp/src/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,10 @@ impl acp::Agent for ZephAcpAgent {
}

let trimmed_text = text.trim_start();
if trimmed_text.starts_with('/') && trimmed_text != "/compact" {
if trimmed_text.starts_with('/')
&& trimmed_text != "/compact"
&& trimmed_text != "/model refresh"
{
return self
.handle_slash_command(&args.session_id, trimmed_text)
.await;
Expand Down
4 changes: 4 additions & 0 deletions crates/zeph-core/src/agent/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ impl<C: Channel> Agent<C> {
self.channel
.send(&format!("Cleared {n} queued messages."))
.await?;
let _ = self.channel.flush_chunks().await;
continue;
}

Expand All @@ -474,16 +475,19 @@ impl<C: Channel> Agent<C> {
} else {
let _ = self.channel.send("Nothing to compact.").await;
}
let _ = self.channel.flush_chunks().await;
continue;
}

if trimmed == "/clear" {
self.clear_history();
let _ = self.channel.flush_chunks().await;
continue;
}

if trimmed == "/model" || trimmed.starts_with("/model ") {
self.handle_model_command(trimmed).await;
let _ = self.channel.flush_chunks().await;
continue;
}

Expand Down
15 changes: 1 addition & 14 deletions crates/zeph-llm/src/any.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,20 +105,7 @@ impl AnyProvider {
})
.collect())
}
AnyProvider::Orchestrator(p) => {
tracing::debug!(
"list_models_remote: Orchestrator falling back to sync list_models (config-time data)"
);
Ok(p.list_models()
.into_iter()
.map(|id| crate::model_cache::RemoteModelInfo {
display_name: id.clone(),
id,
context_window: None,
created_at: None,
})
.collect())
}
AnyProvider::Orchestrator(p) => p.list_models_remote().await,
#[cfg(feature = "candle")]
AnyProvider::Candle(_) => Ok(vec![]),
#[cfg(feature = "mock")]
Expand Down
122 changes: 114 additions & 8 deletions src/acp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,33 +329,133 @@ async fn spawn_acp_agent(

/// Collect model keys from config when `acp.available_models` is not set.
///
/// Each key uses `"{provider_name}:{model}"` format matching the provider factory.
/// For each configured provider the disk cache is consulted first (24 h TTL).
/// When the cache is warm the full remote model list is returned; otherwise the
/// single model from config is used as the fallback so startup is never blocked
/// on network I/O. Call `/model refresh` at runtime to populate the caches.
///
/// Each key uses `"{provider_name}:{model_id}"` format matching the provider factory.
#[cfg(feature = "acp")]
fn discover_models_from_config(config: &zeph_core::config::Config) -> Vec<String> {
use zeph_llm::model_cache::ModelCache;

/// Expand a provider slug using its on-disk cache, or fall back to `fallback`.
fn expand_from_cache(slug: &str, fallback: &str) -> Vec<String> {
let cache = ModelCache::for_slug(slug);
if !cache.is_stale()
&& let Ok(Some(entries)) = cache.load()
&& !entries.is_empty()
{
return entries
.into_iter()
.map(|m| format!("{slug}:{}", m.id))
.collect();
}
vec![format!("{slug}:{fallback}")]
}

let mut models: Vec<String> = Vec::new();
models.push(format!("ollama:{}", config.llm.model));
if config.secrets.claude_api_key.is_some() {
let model = config

if config.llm.provider == zeph_core::config::ProviderKind::Orchestrator {
// Orchestrator: enumerate sub-providers and use their own cache/fallback.
if let Some(ref orch) = config.llm.orchestrator {
for sub in orch.providers.values() {
let slug = sub.provider_type.as_str();
let fallback = sub.model.as_deref().unwrap_or("unknown");
models.extend(expand_from_cache(slug, fallback));
}
}
} else {
// Single provider — use top-level llm section.
models.extend(expand_from_cache("ollama", &config.llm.model));
}

// Claude — always add when API key present, even under orchestrator.
if config.secrets.claude_api_key.is_some()
&& config.llm.provider != zeph_core::config::ProviderKind::Orchestrator
{
let fallback = config
.llm
.cloud
.as_ref()
.map_or("claude-sonnet-4-5", |c| c.model.as_str());
models.push(format!("claude:{model}"));
models.extend(expand_from_cache("claude", fallback));
}
if let (Some(_), Some(openai_cfg)) = (&config.secrets.openai_api_key, &config.llm.openai) {
models.push(format!("openai:{}", openai_cfg.model));

// OpenAI — only when API key and config section are present (non-orchestrator).
if config.llm.provider != zeph_core::config::ProviderKind::Orchestrator
&& let (Some(_), Some(openai_cfg)) = (&config.secrets.openai_api_key, &config.llm.openai)
{
models.extend(expand_from_cache("openai", &openai_cfg.model));
}

// Compatible providers.
if let Some(ref entries) = config.llm.compatible {
for entry in entries {
if config.secrets.compatible_api_keys.contains_key(&entry.name) {
models.push(format!("{}:{}", entry.name, entry.model));
models.extend(expand_from_cache(&entry.name, &entry.model));
}
}
}

models.dedup();
models
}

/// Populate model caches for all providers before the ACP server starts.
///
/// Uses a 5-second timeout so that a slow or unavailable provider does not block startup.
/// After a successful fetch, each unique provider slug present in `acp_available_models`
/// is expanded from its on-disk cache, replacing the single config-time fallback entry.
#[cfg(feature = "acp")]
async fn warm_model_caches(deps: &mut AgentDeps) {
use zeph_llm::model_cache::ModelCache;

let provider = deps.provider.clone();
let fetch = async move {
match provider.list_models_remote().await {
Ok(models) => tracing::debug!(count = models.len(), "model cache warmed"),
Err(e) => tracing::debug!(error = %e, "model cache warm-up failed"),
}
};

if tokio::time::timeout(std::time::Duration::from_secs(5), fetch)
.await
.is_err()
{
tracing::debug!("model cache warm-up timed out; using config fallback");
return;
}

// Collect unique provider slugs from the current available_models list.
let slugs: Vec<String> = deps
.acp_available_models
.iter()
.filter_map(|k| k.split_once(':').map(|(s, _)| s.to_owned()))
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();

for slug in slugs {
let cache = ModelCache::for_slug(&slug);
if cache.is_stale() {
continue;
}
if let Ok(Some(entries)) = cache.load()
&& !entries.is_empty()
{
let new_keys: Vec<String> = entries
.into_iter()
.map(|m| format!("{slug}:{}", m.id))
.collect();
deps.acp_available_models
.retain(|k| !k.starts_with(&format!("{slug}:")));
deps.acp_available_models.extend(new_keys);
}
}
deps.acp_available_models.dedup();
}

/// Build a `ProviderFactory` from the known named providers in config.
///
/// Each available model key is `"{provider_name}:{model}"`.
Expand Down Expand Up @@ -524,6 +624,10 @@ pub(crate) async fn run_acp_server(
let (mut deps, _keepalive) =
build_acp_deps(config_path, vault_backend, vault_key, vault_path).await?;

// Warm model caches before advertising available_models to the ACP client.
// A 5-second budget is given; on timeout the fallback list from config is used.
warm_model_caches(&mut deps).await;

let mcp_manager_for_acp = Arc::clone(&deps.mcp_manager);
let server_config = zeph_acp::AcpServerConfig {
agent_name: deps.acp_agent_name.clone(),
Expand Down Expand Up @@ -579,6 +683,8 @@ pub(crate) async fn run_acp_http_server(
let (mut deps, _keepalive) =
build_acp_deps(config_path, vault_backend, vault_key, vault_path).await?;

warm_model_caches(&mut deps).await;

let bind_addr = bind_override.map_or_else(|| "127.0.0.1:9800".to_owned(), str::to_owned);

// CLI flag overrides config/env values for auth token.
Expand Down
Loading