Skip to content

Commit a757390

Browse files
authored
feat: add checkpoint (#65)
1 parent bc86f7c commit a757390

File tree

8 files changed

+442
-1
lines changed

8 files changed

+442
-1
lines changed

README.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ go run cmd/example/codemode_utcp_workflow/main.go
134134
# Agent-to-Agent Communication via UTCP
135135
go run cmd/example/agent_as_tool/main.go
136136
go run cmd/example/agent_as_utcp_codemode/main.go
137+
138+
# Agent State Persistence (Checkpoint/Restore)
139+
go run cmd/example/checkpoint/main.go
137140
```
138141

139142
#### Example Descriptions
@@ -144,6 +147,7 @@ go run cmd/example/agent_as_utcp_codemode/main.go
144147

145148
- **`cmd/example/agent_as_tool/main.go`**: Demonstrates exposing agents as UTCP tools using `RegisterAsUTCPProvider()`, enabling agent-to-agent communication and hierarchical agent architectures.
146149
- **`cmd/example/agent_as_utcp_codemode/main.go`**: Shows an agent exposed as a UTCP tool and orchestrated via CodeMode, illustrating natural language to tool call generation.
150+
- **`cmd/example/checkpoint/main.go`**: Demonstrates how to checkpoint an agent's state to disk and restore it later, preserving conversation history and shared space memberships.
147151

148152

149153
## Project Structure
@@ -340,6 +344,38 @@ fmt.Println(result["response"])
340344
- **Standardization**: Uses the standard UTCP schema for inputs and outputs.
341345
- **Zero Overhead**: Uses an in-process transport when running within the same Go application, avoiding network latency.
342346

347+
### Agent State Persistence
348+
349+
Lattice supports **Checkpointing and Restoration**, allowing you to pause agents mid-task, persist their state to disk or a database, and resume them later (even after a crash or restart).
350+
351+
**Key Methods:**
352+
- `agent.Checkpoint()`: Serializes the agent's state (system prompt, short-term memory, shared space memberships) to a `[]byte`.
353+
- `agent.Restore(data []byte)`: Rehydrates an agent instance from a checkpoint.
354+
355+
**Example:**
356+
357+
```go
358+
// 1. Checkpoint the agent
359+
data, err := agent.Checkpoint()
360+
if err != nil {
361+
log.Fatal(err)
362+
}
363+
// Save 'data' to file/DB...
364+
365+
// 2. Restore the agent (later or after crash)
366+
// Create a fresh agent instance first
367+
newAgent, err := agent.New(opts)
368+
if err != nil {
369+
log.Fatal(err)
370+
}
371+
372+
// Restore state
373+
if err := newAgent.Restore(data); err != nil {
374+
log.Fatal(err)
375+
}
376+
// newAgent now has the same memory and context as the original
377+
```
378+
343379
## Why Use TOON?
344380

345381
**Token-Oriented Object Notation (TOON)** is integrated into Lattice to dramatically reduce token consumption when passing structured data to and from LLMs. This is especially critical for AI agent workflows where context windows are precious and API costs scale with token usage.

agent.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,46 @@ func (a *Agent) Flush(ctx context.Context, sessionID string) error {
385385
return a.memory.FlushToLongTerm(ctx, sessionID)
386386
}
387387

388+
// Checkpoint serializes the agent's current state (system prompt and short-term memory)
389+
// to a byte slice. This can be saved to disk or a database to pause the agent.
390+
func (a *Agent) Checkpoint() ([]byte, error) {
391+
a.mu.Lock()
392+
defer a.mu.Unlock()
393+
394+
state := AgentState{
395+
SystemPrompt: a.systemPrompt,
396+
ShortTerm: a.memory.ExportShortTerm(),
397+
Timestamp: time.Now(),
398+
}
399+
400+
if a.Shared != nil {
401+
state.JoinedSpaces = a.Shared.ExportJoinedSpaces()
402+
}
403+
404+
return json.Marshal(state)
405+
}
406+
407+
// Restore rehydrates the agent's state from a checkpoint.
408+
// It restores the system prompt and short-term memory.
409+
func (a *Agent) Restore(data []byte) error {
410+
a.mu.Lock()
411+
defer a.mu.Unlock()
412+
413+
var state AgentState
414+
if err := json.Unmarshal(data, &state); err != nil {
415+
return err
416+
}
417+
418+
a.systemPrompt = state.SystemPrompt
419+
a.memory.ImportShortTerm(state.ShortTerm)
420+
421+
if a.Shared != nil && len(state.JoinedSpaces) > 0 {
422+
a.Shared.ImportJoinedSpaces(state.JoinedSpaces)
423+
}
424+
425+
return nil
426+
}
427+
388428
func (a *Agent) executeTool(
389429
ctx context.Context,
390430
sessionID, toolName string,

agent_checkpoint_test.go

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
package agent
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
"github.com/Protocol-Lattice/go-agent/src/memory"
8+
)
9+
10+
func TestAgentCheckpointAndRestore(t *testing.T) {
11+
ctx := context.Background()
12+
13+
// 1. Setup initial agent
14+
mem := memory.NewSessionMemory(&memory.MemoryBank{}, 10)
15+
// Use DummyEmbedder to avoid external calls and ensure speed
16+
mem = mem.WithEmbedder(memory.DummyEmbedder{})
17+
18+
agent, err := New(Options{
19+
Model: &stubModel{response: "ok"},
20+
Memory: mem,
21+
SystemPrompt: "Initial Prompt",
22+
})
23+
if err != nil {
24+
t.Fatalf("New returned error: %v", err)
25+
}
26+
27+
// 2. Add some state (memory)
28+
sessionID := "test-session"
29+
// storeMemory is unexported but accessible in the same package
30+
agent.storeMemory(sessionID, "user", "Hello world", nil)
31+
agent.storeMemory(sessionID, "assistant", "Hi there", nil)
32+
33+
// 3. Checkpoint
34+
data, err := agent.Checkpoint()
35+
if err != nil {
36+
t.Fatalf("Checkpoint failed: %v", err)
37+
}
38+
39+
if len(data) == 0 {
40+
t.Fatal("Checkpoint returned empty data")
41+
}
42+
43+
// 4. Create new agent (simulate restart)
44+
newMem := memory.NewSessionMemory(&memory.MemoryBank{}, 10)
45+
newMem = newMem.WithEmbedder(memory.DummyEmbedder{})
46+
47+
newAgent, err := New(Options{
48+
Model: &stubModel{response: "ok"},
49+
Memory: newMem,
50+
SystemPrompt: "Default Prompt", // Different from initial
51+
})
52+
if err != nil {
53+
t.Fatalf("New returned error: %v", err)
54+
}
55+
56+
// 5. Restore
57+
if err := newAgent.Restore(data); err != nil {
58+
t.Fatalf("Restore failed: %v", err)
59+
}
60+
61+
// 6. Verify
62+
if newAgent.systemPrompt != "Initial Prompt" {
63+
t.Errorf("System prompt not restored. Got %q, want %q", newAgent.systemPrompt, "Initial Prompt")
64+
}
65+
66+
// Verify memory
67+
// We can use RetrieveContext to check if memories are there.
68+
records, err := newAgent.memory.RetrieveContext(ctx, sessionID, "", 10)
69+
if err != nil {
70+
t.Fatalf("RetrieveContext failed: %v", err)
71+
}
72+
73+
if len(records) != 2 {
74+
t.Errorf("Expected 2 memory records, got %d", len(records))
75+
}
76+
77+
// Check content
78+
foundUser := false
79+
foundAssistant := false
80+
for _, r := range records {
81+
if r.Content == "Hello world" {
82+
foundUser = true
83+
}
84+
if r.Content == "Hi there" {
85+
foundAssistant = true
86+
}
87+
}
88+
89+
if !foundUser {
90+
t.Error("User memory not found")
91+
}
92+
if !foundAssistant {
93+
t.Error("Assistant memory not found")
94+
}
95+
}
96+
97+
func TestAgentCheckpointSharedSpaces(t *testing.T) {
98+
// Setup
99+
mem := memory.NewSessionMemory(&memory.MemoryBank{}, 10).WithEmbedder(memory.DummyEmbedder{})
100+
// Grant permissions in registry
101+
mem.Spaces.Grant("team:alpha", "agent-1", memory.SpaceRoleWriter, 0)
102+
mem.Spaces.Grant("team:beta", "agent-1", memory.SpaceRoleWriter, 0)
103+
104+
shared := memory.NewSharedSession(mem, "agent-1", "team:alpha")
105+
106+
agent, _ := New(Options{
107+
Model: &stubModel{response: "ok"},
108+
Memory: mem,
109+
Shared: shared,
110+
})
111+
112+
// Join another space
113+
if err := agent.Shared.Join("team:beta"); err != nil {
114+
t.Fatalf("Join failed: %v", err)
115+
}
116+
117+
// Checkpoint
118+
data, err := agent.Checkpoint()
119+
if err != nil {
120+
t.Fatalf("Checkpoint failed: %v", err)
121+
}
122+
123+
// Restore to new agent
124+
newMem := memory.NewSessionMemory(&memory.MemoryBank{}, 10).WithEmbedder(memory.DummyEmbedder{})
125+
// Simulate persistent registry: grant permissions again
126+
newMem.Spaces.Grant("team:alpha", "agent-1", memory.SpaceRoleWriter, 0)
127+
newMem.Spaces.Grant("team:beta", "agent-1", memory.SpaceRoleWriter, 0)
128+
129+
newShared := memory.NewSharedSession(newMem, "agent-1") // No initial spaces
130+
newAgent, _ := New(Options{
131+
Model: &stubModel{response: "ok"},
132+
Memory: newMem,
133+
Shared: newShared,
134+
})
135+
136+
if err := newAgent.Restore(data); err != nil {
137+
t.Fatalf("Restore failed: %v", err)
138+
}
139+
140+
// Verify spaces
141+
spaces := newAgent.Shared.Spaces()
142+
foundAlpha := false
143+
foundBeta := false
144+
for _, s := range spaces {
145+
if s == "team:alpha" {
146+
foundAlpha = true
147+
}
148+
if s == "team:beta" {
149+
foundBeta = true
150+
}
151+
}
152+
153+
if !foundAlpha {
154+
t.Error("Expected to be joined to team:alpha")
155+
}
156+
if !foundBeta {
157+
t.Error("Expected to be joined to team:beta")
158+
}
159+
}

cmd/example/README.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,33 @@ User Input → Manager Agent → agent.researcher (Researcher Agent) → Result
8989

9090
---
9191

92+
---
93+
94+
### 4. Agent State Persistence (Checkpoint/Restore)
95+
**File:** `cmd/example/checkpoint/main.go`
96+
97+
Demonstrates how to save an agent's state to disk and restore it later, preserving conversation history and shared space memberships.
98+
99+
**Key Concepts:**
100+
- `agent.Checkpoint()`: Serializes state to `[]byte`
101+
- `agent.Restore()`: Rehydrates state from `[]byte`
102+
- Persisting short-term memory and shared space context
103+
- Resuming conversations across process restarts
104+
105+
**Run:**
106+
```bash
107+
go run cmd/example/checkpoint/main.go
108+
```
109+
110+
**What it shows:**
111+
1. Creating an agent and having a conversation
112+
2. Checkpointing the agent to a JSON file
113+
3. Creating a fresh agent instance
114+
4. Restoring the state from the file
115+
5. Verifying the agent remembers the previous conversation
116+
117+
---
118+
92119
## CodeMode Pattern Explained
93120

94121
CodeMode is a powerful feature that allows agents to orchestrate tools through generated Go code.

0 commit comments

Comments
 (0)