Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
- `schema` feature flag in `zeph-llm` gating `schemars` dependency and typed output API (#879)

### Changed
- Extract ContextManager, ToolOrchestrator, LearningEngine from Agent god object into standalone structs with pure delegation (#830, #836, #837, #838)
- Secret type wraps inner value in `Zeroizing<String>` for memory zeroization on drop; `Clone` removed (#865)
- AgeVaultProvider secrets HashMap uses `Zeroizing<String>` values (#866)
- Age private key reads, decrypt plaintext buffer, and encrypt JSON buffer wrapped in `Zeroizing` (#874)
Expand Down
3 changes: 3 additions & 0 deletions crates/zeph-core/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ Core orchestration crate for the Zeph agent. Manages the main agent loop, bootst
| Module | Description |
|--------|-------------|
| `agent` | `Agent<C>` — main loop driving inference and tool execution; ToolExecutor erased via `Box<dyn ErasedToolExecutor>`; supports external cancellation via `with_cancel_signal()` |
| `agent::context_manager` | `ContextManager` — owns token budget, compaction threshold, and safety margin; `should_compact()` evaluates tiktoken-based usage against threshold |
| `agent::tool_orchestrator` | `ToolOrchestrator` — owns max iteration limit, doom-loop detection (rolling hash window), summarization flag, and overflow config |
| `agent::learning_engine` | `LearningEngine` — owns `LearningConfig`, tracks per-turn reflection state; delegates self-learning decisions to `is_enabled()` / `mark_reflection_used()` |
| `agent::tool_execution` | Tool call handling, redaction, result processing; `call_llm_with_retry()` / `call_chat_with_tools_retry()` — auto-detect `ContextLengthExceeded`, compact context, and retry (max 2 attempts) |
| `agent::message_queue` | Message queue management |
| `agent::builder` | Agent builder API |
Expand Down
18 changes: 9 additions & 9 deletions crates/zeph-core/src/agent/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ impl<C: Channel> Agent<C> {

#[must_use]
pub fn with_max_tool_iterations(mut self, max: usize) -> Self {
self.runtime.max_tool_iterations = max;
self.tool_orchestrator.max_iterations = max;
self
}

Expand Down Expand Up @@ -139,7 +139,7 @@ impl<C: Channel> Agent<C> {

#[must_use]
pub fn with_learning(mut self, config: LearningConfig) -> Self {
self.learning_config = Some(config);
self.learning_engine.config = Some(config);
self
}

Expand Down Expand Up @@ -176,19 +176,19 @@ impl<C: Channel> Agent<C> {

#[must_use]
pub fn with_token_safety_margin(mut self, margin: f32) -> Self {
self.runtime.token_safety_margin = margin;
self.context_manager.token_safety_margin = margin;
self
}

#[must_use]
pub fn with_tool_summarization(mut self, enabled: bool) -> Self {
self.runtime.summarize_tool_output_enabled = enabled;
self.tool_orchestrator.summarize_tool_output_enabled = enabled;
self
}

#[must_use]
pub fn with_overflow_config(mut self, config: zeph_tools::OverflowConfig) -> Self {
self.runtime.overflow_config = config;
self.tool_orchestrator.overflow_config = config;
self
}

Expand Down Expand Up @@ -218,11 +218,11 @@ impl<C: Channel> Agent<C> {
prune_protect_tokens: usize,
) -> Self {
if budget_tokens > 0 {
self.context_state.budget = Some(ContextBudget::new(budget_tokens, reserve_ratio));
self.context_manager.budget = Some(ContextBudget::new(budget_tokens, reserve_ratio));
}
self.context_state.compaction_threshold = compaction_threshold;
self.context_state.compaction_preserve_tail = compaction_preserve_tail;
self.context_state.prune_protect_tokens = prune_protect_tokens;
self.context_manager.compaction_threshold = compaction_threshold;
self.context_manager.compaction_preserve_tail = compaction_preserve_tail;
self.context_manager.prune_protect_tokens = prune_protect_tokens;
self
}

Expand Down
54 changes: 14 additions & 40 deletions crates/zeph-core/src/agent/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,34 +71,8 @@ fn truncate_chars(s: &str, max_chars: usize) -> String {
}

impl<C: Channel> Agent<C> {
#[allow(
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss
)]
pub(super) fn should_compact(&self) -> bool {
let Some(ref budget) = self.context_state.budget else {
return false;
};
let margin = self.runtime.token_safety_margin;
let total_tokens: usize = self
.messages
.iter()
.map(|m| {
(self.token_counter.count_tokens(&m.content) as f64 * f64::from(margin)) as usize
})
.sum();
let threshold =
(budget.max_tokens() as f32 * self.context_state.compaction_threshold) as usize;
let should = total_tokens > threshold;
tracing::debug!(
total_tokens,
threshold,
message_count = self.messages.len(),
should_compact = should,
"context budget check"
);
should
self.context_manager.should_compact(&self.messages)
}

fn build_chunk_prompt(messages: &[Message]) -> String {
Expand Down Expand Up @@ -418,7 +392,7 @@ impl<C: Channel> Agent<C> {
}

pub(super) async fn compact_context(&mut self) -> Result<(), super::error::AgentError> {
let preserve_tail = self.context_state.compaction_preserve_tail;
let preserve_tail = self.context_manager.compaction_preserve_tail;

if self.messages.len() <= preserve_tail + 1 {
return Ok(());
Expand Down Expand Up @@ -505,7 +479,7 @@ impl<C: Channel> Agent<C> {
/// Returns the number of tokens freed.
#[allow(clippy::cast_precision_loss)]
pub(super) fn prune_tool_outputs(&mut self, min_to_free: usize) -> usize {
let protect = self.context_state.prune_protect_tokens;
let protect = self.context_manager.prune_protect_tokens;
let mut tail_tokens = 0usize;
let mut protection_boundary = self.messages.len();
if protect > 0 {
Expand Down Expand Up @@ -747,7 +721,7 @@ impl<C: Channel> Agent<C> {
}

let budget = self
.context_state
.context_manager
.budget
.as_ref()
.map_or(0, ContextBudget::max_tokens);
Expand All @@ -756,7 +730,7 @@ impl<C: Channel> Agent<C> {
.iter()
.map(|m| self.token_counter.count_tokens(&m.content))
.sum();
let threshold = (budget as f32 * self.context_state.compaction_threshold) as usize;
let threshold = (budget as f32 * self.context_manager.compaction_threshold) as usize;
let min_to_free = total_tokens.saturating_sub(threshold);

let freed = self.prune_tool_outputs(min_to_free);
Expand Down Expand Up @@ -1082,7 +1056,7 @@ impl<C: Channel> Agent<C> {
&mut self,
query: &str,
) -> Result<(), super::error::AgentError> {
let Some(ref budget) = self.context_state.budget else {
let Some(ref budget) = self.context_manager.budget else {
return Ok(());
};
let _ = self.channel.send_status("building context...").await;
Expand Down Expand Up @@ -1343,7 +1317,7 @@ impl<C: Channel> Agent<C> {

let effective_mode = match self.skill_state.prompt_mode {
crate::config::SkillPromptMode::Auto => {
if let Some(ref budget) = self.context_state.budget
if let Some(ref budget) = self.context_manager.budget
&& budget.max_tokens() < 8192
{
crate::config::SkillPromptMode::Compact
Expand Down Expand Up @@ -1515,7 +1489,7 @@ mod tests {
// Auto mode: budget < 8192 → Compact
let effective_mode = match crate::config::SkillPromptMode::Auto {
crate::config::SkillPromptMode::Auto => {
if let Some(ref budget) = agent.context_state.budget
if let Some(ref budget) = agent.context_manager.budget
&& budget.max_tokens() < 8192
{
crate::config::SkillPromptMode::Compact
Expand All @@ -1541,7 +1515,7 @@ mod tests {
// Auto mode: budget >= 8192 → Full
let effective_mode = match crate::config::SkillPromptMode::Auto {
crate::config::SkillPromptMode::Auto => {
if let Some(ref budget) = agent.context_state.budget
if let Some(ref budget) = agent.context_manager.budget
&& budget.max_tokens() < 8192
{
crate::config::SkillPromptMode::Compact
Expand Down Expand Up @@ -1695,7 +1669,7 @@ mod tests {

let agent = Agent::new(provider, channel, registry, None, 5, executor)
.with_context_budget(0, 0.20, 0.75, 4, 0);
assert!(agent.context_state.budget.is_none());
assert!(agent.context_manager.budget.is_none());
}

#[test]
Expand All @@ -1708,13 +1682,13 @@ mod tests {
let agent = Agent::new(provider, channel, registry, None, 5, executor)
.with_context_budget(4096, 0.20, 0.80, 6, 0);

assert!(agent.context_state.budget.is_some());
assert!(agent.context_manager.budget.is_some());
assert_eq!(
agent.context_state.budget.as_ref().unwrap().max_tokens(),
agent.context_manager.budget.as_ref().unwrap().max_tokens(),
4096
);
assert!((agent.context_state.compaction_threshold - 0.80).abs() < f32::EPSILON);
assert_eq!(agent.context_state.compaction_preserve_tail, 6);
assert!((agent.context_manager.compaction_threshold - 0.80).abs() < f32::EPSILON);
assert_eq!(agent.context_manager.compaction_preserve_tail, 6);
}

#[tokio::test]
Expand Down
115 changes: 115 additions & 0 deletions crates/zeph-core/src/agent/context_manager.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
// SPDX-License-Identifier: MIT OR Apache-2.0

use std::sync::Arc;

use zeph_memory::TokenCounter;

use crate::context::ContextBudget;

pub(crate) struct ContextManager {
pub(super) budget: Option<ContextBudget>,
pub(super) compaction_threshold: f32,
pub(super) compaction_preserve_tail: usize,
pub(super) prune_protect_tokens: usize,
pub(crate) token_counter: Arc<TokenCounter>,
pub(super) token_safety_margin: f32,
}

impl ContextManager {
#[must_use]
pub(crate) fn new(token_counter: Arc<TokenCounter>) -> Self {
Self {
budget: None,
compaction_threshold: 0.80,
compaction_preserve_tail: 6,
prune_protect_tokens: 40_000,
token_counter,
token_safety_margin: 1.0,
}
}

#[allow(
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss
)]
pub(super) fn should_compact(&self, messages: &[zeph_llm::provider::Message]) -> bool {
let Some(ref budget) = self.budget else {
return false;
};
let margin = self.token_safety_margin;
let total_tokens: usize = messages
.iter()
.map(|m| {
(self.token_counter.count_tokens(&m.content) as f64 * f64::from(margin)) as usize
})
.sum();
let threshold = (budget.max_tokens() as f32 * self.compaction_threshold) as usize;
let should = total_tokens > threshold;
tracing::debug!(
total_tokens,
threshold,
message_count = messages.len(),
should_compact = should,
"context budget check"
);
should
}
}

#[cfg(test)]
mod tests {
use super::*;
use zeph_llm::provider::{Message, Role};

fn make_counter() -> Arc<TokenCounter> {
Arc::new(TokenCounter::new())
}

fn msg(content: &str) -> Message {
Message {
role: Role::User,
content: content.to_string(),
..Default::default()
}
}

#[test]
fn new_defaults() {
let cm = ContextManager::new(make_counter());
assert!(cm.budget.is_none());
assert!((cm.compaction_threshold - 0.80).abs() < f32::EPSILON);
assert_eq!(cm.compaction_preserve_tail, 6);
assert_eq!(cm.prune_protect_tokens, 40_000);
}

#[test]
fn should_compact_no_budget() {
let cm = ContextManager::new(make_counter());
assert!(!cm.should_compact(&[msg("hello")]));
}

#[test]
fn should_compact_below_threshold() {
let mut cm = ContextManager::new(make_counter());
cm.budget = Some(ContextBudget::new(100_000, 0.1));
assert!(!cm.should_compact(&[msg("short")]));
}

#[test]
fn should_compact_above_threshold() {
let mut cm = ContextManager::new(make_counter());
cm.budget = Some(ContextBudget::new(100, 0.1));
cm.compaction_threshold = 0.01;
let big = msg(&"x".repeat(500));
assert!(cm.should_compact(&[big]));
}

#[test]
fn should_compact_empty_messages() {
let mut cm = ContextManager::new(make_counter());
cm.budget = Some(ContextBudget::new(100, 0.1));
assert!(!cm.should_compact(&[]));
}
}
15 changes: 8 additions & 7 deletions crates/zeph-core/src/agent/learning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@

use super::{Agent, Channel, LlmProvider};

use super::{LearningConfig, Message, Role, SemanticMemory};
use super::{Message, Role, SemanticMemory};
use crate::config::LearningConfig;
use zeph_llm::provider::MessageMetadata;

use std::path::PathBuf;

impl<C: Channel> Agent<C> {
pub(super) fn is_learning_enabled(&self) -> bool {
self.learning_config.as_ref().is_some_and(|c| c.enabled)
self.learning_engine.is_enabled()
}

async fn is_skill_trusted_for_learning(&self, skill_name: &str) -> bool {
Expand Down Expand Up @@ -55,10 +56,10 @@ impl<C: Channel> Agent<C> {
error_context: &str,
tool_output: &str,
) -> Result<bool, super::error::AgentError> {
if self.reflection_used || !self.is_learning_enabled() {
if self.learning_engine.was_reflection_used() || !self.is_learning_enabled() {
return Ok(false);
}
self.reflection_used = true;
self.learning_engine.mark_reflection_used();

let skill_name = self.skill_state.active_skill_names.first().cloned();

Expand Down Expand Up @@ -133,7 +134,7 @@ impl<C: Channel> Agent<C> {
let Some(memory) = &self.memory_state.memory else {
return Ok(());
};
let Some(config) = self.learning_config.as_ref() else {
let Some(config) = self.learning_engine.config.as_ref() else {
return Ok(());
};

Expand Down Expand Up @@ -372,7 +373,7 @@ impl<C: Channel> Agent<C> {
let Some(memory) = &self.memory_state.memory else {
return;
};
let Some(config) = &self.learning_config else {
let Some(config) = &self.learning_engine.config else {
return;
};
let Ok(Some(metrics)) = memory.sqlite().skill_metrics(skill_name).await else {
Expand Down Expand Up @@ -1183,7 +1184,7 @@ mod tests {
.with_learning(learning_config_enabled());

// Mark reflection as already used
agent.reflection_used = true;
agent.learning_engine.mark_reflection_used();

let result = agent.attempt_self_reflection("error", "output").await;
assert!(result.is_ok());
Expand Down
Loading
Loading