bug-ops · bug-ops · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 ## [Unreleased]
 
 ### Added
+- `MemoryToolExecutor` in `zeph-core` exposes `memory_search` and `memory_save` as native tools the model can invoke explicitly
+- `memory_search` queries SemanticMemory recall, key facts, and session summaries; `memory_save` persists content to long-term memory
+- `MemoryToolExecutor` registered conditionally — only when memory backend is configured
+- `MemoryState.memory` refactored to `Option<Arc<SemanticMemory>>` for shared access
 - WebSocket connection lifecycle hardening: `AtomicUsize` slot reservation before upgrade handshake eliminates TOCTOU between capacity check and `DashMap` insertion; 30s ping / 90s pong-timeout keepalive; binary frame rejection with close code 1003; graceful disconnect with 1s write-task drain window to ensure close frame delivery per RFC 6455 (#936)
 - Bearer token authentication middleware for ACP HTTP and WebSocket transports (`auth.rs`): constant-time token comparison via `subtle::ConstantTimeEq`, configurable via `acp.auth_bearer_token` / `ZEPH_ACP_AUTH_TOKEN` env var; no-auth open mode when token is unset (#936)
 - Agent discovery manifest endpoint `GET /.well-known/acp.json`: returns agent name, version, supported transports, and authentication type; publicly accessible (exempt from bearer auth), controlled by `acp.discovery_enabled` (default `true`) / `ZEPH_ACP_DISCOVERY_ENABLED` env var (#936)

diff --git a/README.md b/README.md
@@ -65,7 +65,7 @@ zeph --tui         # run with TUI dashboard
 |---|---|
 | **Hybrid inference** | Ollama, Claude, OpenAI, Candle (GGUF), any OpenAI-compatible API. Multi-model orchestrator with fallback chains. Response cache with blake3 hashing and TTL. Ollama native tool calling via `llm.ollama.tool_use = true` |
 | **Skills-first architecture** | YAML+Markdown skill files with semantic matching, self-learning evolution, 4-tier trust model, and compact prompt mode for small-context models |
-| **Semantic memory** | SQLite + Qdrant (or embedded SQLite vector search) with MMR re-ranking, temporal decay scoring, resilient compaction (reactive retry, middle-out tool response removal, 9-section structured prompt, LLM-free fallback), durable compaction with message visibility control, tool-pair summarization (LLM-based, configurable cutoff), credential scrubbing, cross-session recall, vector retrieval, autosave assistant responses, snapshot export/import, configurable SQLite pool, and background response-cache cleanup |
+| **Semantic memory** | SQLite + Qdrant (or embedded SQLite vector search) with MMR re-ranking, temporal decay scoring, resilient compaction (reactive retry, middle-out tool response removal, 9-section structured prompt, LLM-free fallback), durable compaction with message visibility control, tool-pair summarization (LLM-based, configurable cutoff), credential scrubbing, cross-session recall, vector retrieval, autosave assistant responses, snapshot export/import, configurable SQLite pool, background response-cache cleanup, and native `memory_search`/`memory_save` tools the model can invoke explicitly |
 | **Multi-channel I/O** | CLI, Telegram, Discord, Slack, TUI — all with streaming. Vision and speech-to-text input |
 | **Protocols** | MCP client (stdio + HTTP), A2A agent-to-agent communication, ACP server for IDE integration (stdio + HTTP+SSE + WebSocket, multi-session with LRU eviction, persistence, idle reaper, permission persistence, multi-modal prompts, runtime model switching, session modes (ask/architect/code), MCP server management via `ext_method`, session export/import), sub-agent orchestration. MCP tools exposed as native `ToolDefinition`s — used via structured tool_use with Claude and OpenAI |
 | **Defense-in-depth** | Shell sandbox (blocklist + confirmation patterns for process substitution, here-strings, eval), tool permissions, secret redaction, SSRF protection (HTTPS-only, DNS validation, address pinning, redirect chain re-validation), skill trust quarantine, audit logging. Secrets held in memory as `Zeroizing<String>` — wiped on drop |

diff --git a/crates/zeph-core/Cargo.toml b/crates/zeph-core/Cargo.toml
@@ -29,6 +29,7 @@ futures.workspace = true
 notify.workspace = true
 notify-debouncer-mini.workspace = true
 regex.workspace = true
+schemars.workspace = true
 serde = { workspace = true, features = ["derive"] }
 serde_json.workspace = true
 thiserror.workspace = true

diff --git a/crates/zeph-core/src/agent/builder.rs b/crates/zeph-core/src/agent/builder.rs
@@ -62,7 +62,7 @@ impl<C: Channel> Agent<C> {
     #[must_use]
     pub fn with_memory(
         mut self,
-        memory: SemanticMemory,
+        memory: Arc<SemanticMemory>,
         conversation_id: zeph_memory::ConversationId,
         history_limit: u32,
         recall_limit: usize,

diff --git a/crates/zeph-core/src/agent/context.rs b/crates/zeph-core/src/agent/context.rs
@@ -1908,8 +1908,13 @@ mod tests {
         let registry = create_test_registry();
         let executor = MockToolExecutor::no_tools();
 
-        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
-            .with_memory(memory, cid, 50, 5, 50);
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor).with_memory(
+            std::sync::Arc::new(memory),
+            cid,
+            50,
+            5,
+            50,
+        );
         let msg_count = agent.messages.len();
 
         agent.inject_summaries(0).await.unwrap();
@@ -1925,8 +1930,13 @@ mod tests {
         let registry = create_test_registry();
         let executor = MockToolExecutor::no_tools();
 
-        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
-            .with_memory(memory, cid, 50, 5, 50);
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor).with_memory(
+            std::sync::Arc::new(memory),
+            cid,
+            50,
+            5,
+            50,
+        );
         let msg_count = agent.messages.len();
 
         agent.inject_summaries(1000).await.unwrap();
@@ -1944,8 +1954,13 @@ mod tests {
         let registry = create_test_registry();
         let executor = MockToolExecutor::no_tools();
 
-        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
-            .with_memory(memory, cid, 50, 5, 50);
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor).with_memory(
+            std::sync::Arc::new(memory),
+            cid,
+            50,
+            5,
+            50,
+        );
 
         agent.messages.push(Message {
             role: Role::User,
@@ -1977,8 +1992,13 @@ mod tests {
         let registry = create_test_registry();
         let executor = MockToolExecutor::no_tools();
 
-        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
-            .with_memory(memory, cid, 50, 5, 50);
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor).with_memory(
+            std::sync::Arc::new(memory),
+            cid,
+            50,
+            5,
+            50,
+        );
 
         agent.messages.insert(
             1,
@@ -2026,8 +2046,13 @@ mod tests {
         let registry = create_test_registry();
         let executor = MockToolExecutor::no_tools();
 
-        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
-            .with_memory(memory, cid, 50, 5, 50);
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor).with_memory(
+            std::sync::Arc::new(memory),
+            cid,
+            50,
+            5,
+            50,
+        );
 
         agent.messages.push(Message {
             role: Role::User,
@@ -2204,8 +2229,13 @@ mod tests {
         let registry = create_test_registry();
         let executor = MockToolExecutor::no_tools();
 
-        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
-            .with_memory(memory, cid, 50, 5, 50);
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor).with_memory(
+            std::sync::Arc::new(memory),
+            cid,
+            50,
+            5,
+            50,
+        );
         let msg_count = agent.messages.len();
 
         agent.inject_cross_session_context("test", 0).await.unwrap();
@@ -2280,7 +2310,7 @@ mod tests {
         let (memory, cid) = create_memory_with_summaries(provider.clone(), &[]).await;
 
         let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
-            .with_memory(memory, cid, 50, 5, 50)
+            .with_memory(std::sync::Arc::new(memory), cid, 50, 5, 50)
             .with_context_budget(10000, 0.20, 0.80, 2, 0);
 
         for i in 0..10 {

diff --git a/crates/zeph-core/src/agent/learning.rs b/crates/zeph-core/src/agent/learning.rs
@@ -897,7 +897,7 @@ mod tests {
         let config = learning_config_enabled(); // min_failures = 2
         let agent = Agent::new(provider, channel, registry, None, 5, executor)
             .with_learning(config.clone())
-            .with_memory(memory, cid, 50, 5, 50);
+            .with_memory(std::sync::Arc::new(memory), cid, 50, 5, 50);
 
         let mem = agent.memory_state.memory.as_ref().unwrap();
         let allowed = agent
@@ -949,7 +949,7 @@ mod tests {
         let config = learning_config_enabled(); // improve_threshold = 0.7
         let agent = Agent::new(provider, channel, registry, None, 5, executor)
             .with_learning(config.clone())
-            .with_memory(memory, cid, 50, 5, 50);
+            .with_memory(std::sync::Arc::new(memory), cid, 50, 5, 50);
 
         let mem = agent.memory_state.memory.as_ref().unwrap();
         let allowed = agent
@@ -999,7 +999,7 @@ mod tests {
         };
         let agent = Agent::new(provider, channel, registry, None, 5, executor)
             .with_learning(config.clone())
-            .with_memory(memory, cid, 50, 5, 50);
+            .with_memory(std::sync::Arc::new(memory), cid, 50, 5, 50);
 
         let mem = agent.memory_state.memory.as_ref().unwrap();
         let allowed = agent
@@ -1025,7 +1025,7 @@ mod tests {
         let config = learning_config_enabled();
         let agent = Agent::new(provider, channel, registry, None, 5, executor)
             .with_learning(config.clone())
-            .with_memory(memory, cid, 50, 5, 50);
+            .with_memory(std::sync::Arc::new(memory), cid, 50, 5, 50);
 
         let mem = agent.memory_state.memory.as_ref().unwrap();
         let allowed = agent
@@ -1104,7 +1104,7 @@ mod tests {
                 improve_threshold: 0.7,
                 ..learning_config_enabled()
             })
-            .with_memory(memory, cid, 50, 5, 50);
+            .with_memory(std::sync::Arc::new(memory), cid, 50, 5, 50);
 
         let result = agent
             .generate_improved_skill("test-skill", "exit code 1", "response", None)
@@ -1147,7 +1147,7 @@ mod tests {
                 improve_threshold: 0.7,
                 ..learning_config_enabled()
             })
-            .with_memory(memory, cid, 50, 5, 50);
+            .with_memory(std::sync::Arc::new(memory), cid, 50, 5, 50);
 
         let result = agent
             .generate_improved_skill("test-skill", "exit code 1", "response", None)

diff --git a/crates/zeph-core/src/agent/mod.rs b/crates/zeph-core/src/agent/mod.rs
@@ -75,7 +75,7 @@ pub(crate) fn format_tool_output(tool_name: &str, body: &str) -> String {
 }
 
 pub(super) struct MemoryState {
-    pub(super) memory: Option<SemanticMemory>,
+    pub(super) memory: Option<Arc<SemanticMemory>>,
     pub(super) conversation_id: Option<zeph_memory::ConversationId>,
     pub(super) history_limit: u32,
     pub(super) recall_limit: usize,

diff --git a/crates/zeph-core/src/agent/persistence.rs b/crates/zeph-core/src/agent/persistence.rs
@@ -202,8 +202,13 @@ mod tests {
             .await
             .unwrap();
 
-        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
-            .with_memory(memory, cid, 50, 5, 100);
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor).with_memory(
+            std::sync::Arc::new(memory),
+            cid,
+            50,
+            5,
+            100,
+        );
 
         let messages_before = agent.messages.len();
         agent.load_history().await.unwrap();
@@ -233,8 +238,13 @@ mod tests {
             .await
             .unwrap();
 
-        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
-            .with_memory(memory, cid, 50, 5, 100);
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor).with_memory(
+            std::sync::Arc::new(memory),
+            cid,
+            50,
+            5,
+            100,
+        );
 
         let messages_before = agent.messages.len();
         agent.load_history().await.unwrap();
@@ -252,8 +262,13 @@ mod tests {
         let memory = test_memory(&AnyProvider::Mock(zeph_llm::mock::MockProvider::default())).await;
         let cid = memory.sqlite().create_conversation().await.unwrap();
 
-        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
-            .with_memory(memory, cid, 50, 5, 100);
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor).with_memory(
+            std::sync::Arc::new(memory),
+            cid,
+            50,
+            5,
+            100,
+        );
 
         let messages_before = agent.messages.len();
         agent.load_history().await.unwrap();
@@ -287,7 +302,7 @@ mod tests {
 
         let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
             .with_metrics(tx)
-            .with_memory(memory, cid, 50, 5, 100)
+            .with_memory(std::sync::Arc::new(memory), cid, 50, 5, 100)
             .with_autosave_config(false, 20);
 
         agent
@@ -323,7 +338,7 @@ mod tests {
         // autosave_assistant=true but min_length=1000 — short content falls back to save_only
         let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
             .with_metrics(tx)
-            .with_memory(memory, cid, 50, 5, 100)
+            .with_memory(std::sync::Arc::new(memory), cid, 50, 5, 100)
             .with_autosave_config(true, 1000);
 
         agent.persist_message(Role::Assistant, "too short").await;
@@ -357,7 +372,7 @@ mod tests {
         let min_length = 10usize;
         let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
             .with_metrics(tx)
-            .with_memory(memory, cid, 50, 5, 100)
+            .with_memory(std::sync::Arc::new(memory), cid, 50, 5, 100)
             .with_autosave_config(true, min_length);
 
         // Exact boundary: len == min_length → embed path.
@@ -386,7 +401,7 @@ mod tests {
         let min_length = 10usize;
         let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
             .with_metrics(tx)
-            .with_memory(memory, cid, 50, 5, 100)
+            .with_memory(std::sync::Arc::new(memory), cid, 50, 5, 100)
             .with_autosave_config(true, min_length);
 
         // One below boundary: len == min_length - 1 → save_only path, no embedding.
@@ -421,8 +436,13 @@ mod tests {
         let cid = memory.sqlite().create_conversation().await.unwrap();
 
         // threshold=100 ensures no summarization is triggered
-        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
-            .with_memory(memory, cid, 50, 5, 100);
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor).with_memory(
+            std::sync::Arc::new(memory),
+            cid,
+            50,
+            5,
+            100,
+        );
 
         assert_eq!(agent.memory_state.unsummarized_count, 0);
 
@@ -444,8 +464,13 @@ mod tests {
         let cid = memory.sqlite().create_conversation().await.unwrap();
 
         // threshold=1 so the second persist triggers summarization check (count > threshold)
-        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
-            .with_memory(memory, cid, 50, 5, 1);
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor).with_memory(
+            std::sync::Arc::new(memory),
+            cid,
+            50,
+            5,
+            1,
+        );
 
         agent.persist_message(Role::User, "msg1").await;
         agent.persist_message(Role::User, "msg2").await;
@@ -484,7 +509,7 @@ mod tests {
         // autosave_assistant=false — but User role always takes embedding path
         let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
             .with_metrics(tx)
-            .with_memory(memory, cid, 50, 5, 100)
+            .with_memory(std::sync::Arc::new(memory), cid, 50, 5, 100)
             .with_autosave_config(false, 20);
 
         let long_user_msg = "A".repeat(100);

diff --git a/crates/zeph-core/src/lib.rs b/crates/zeph-core/src/lib.rs
@@ -21,6 +21,7 @@ pub mod vault;
 
 pub mod hash;
 pub mod http;
+pub mod memory_tools;
 pub mod subagent;
 
 pub use agent::Agent;