feat: adding cached llm (#66)

Raezil · web-flow · commit 3b21c273ff05 · 2025-11-24T17:00:56.000+01:00
diff --git a/PERFORMANCE_OPTIMIZATIONS.md b/PERFORMANCE_OPTIMIZATIONS.md
@@ -67,6 +67,7 @@ New environment variables for tuning performance:
 
 - `AGENT_LLM_CACHE_SIZE` - LRU cache size for LLM responses (default: 1000)
 - `AGENT_LLM_CACHE_TTL` - Cache TTL in seconds (default: 300)
+- `AGENT_LLM_CACHE_PATH` - Path to cache file for persistence (default: .agent_cache.json)
 - `AGENT_CONCURRENT_OPS` - Max concurrent operations (default: 10)
 - `AGENT_BATCH_SIZE` - Batch size for batch operations (default: 50)
 
diff --git a/src/cache/lru_cache.go b/src/cache/lru_cache.go
@@ -121,3 +121,45 @@ func HashKey(prompt string) string {
 	h := sha256.Sum256([]byte(prompt))
 	return hex.EncodeToString(h[:])
 }
+
+// Dump returns a slice of cache entries for persistence
+func (c *LRUCache) Dump() map[string]CacheEntry {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	dump := make(map[string]CacheEntry, len(c.items))
+	for k, elem := range c.items {
+		dump[k] = elem.Value.(*entry).value
+	}
+	return dump
+}
+
+// Restore populates the cache from a map of entries
+func (c *LRUCache) Restore(dump map[string]CacheEntry) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	c.lru.Init()
+	c.items = make(map[string]*list.Element, c.capacity)
+
+	for k, v := range dump {
+		// Check expiry during restore
+		if time.Now().After(v.ExpiresAt) {
+			continue
+		}
+
+		// Add to cache
+		ent := &entry{key: k, value: v}
+		elem := c.lru.PushFront(ent)
+		c.items[k] = elem
+	}
+
+	// Enforce capacity
+	for c.lru.Len() > c.capacity {
+		oldest := c.lru.Back()
+		if oldest != nil {
+			c.lru.Remove(oldest)
+			delete(c.items, oldest.Value.(*entry).key)
+		}
+	}
+}
diff --git a/src/models/cached.go b/src/models/cached.go
@@ -0,0 +1,140 @@
+package models
+
+import (
+	"context"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"os"
+	"strconv"
+	"time"
+
+	"github.com/Protocol-Lattice/go-agent/src/cache"
+)
+
+// CachedLLM wraps an Agent and caches Generate calls.
+type CachedLLM struct {
+	Agent    Agent
+	Cache    *cache.LRUCache
+	FilePath string
+}
+
+// NewCachedLLM creates a new CachedLLM wrapper.
+func NewCachedLLM(agent Agent, size int, ttl time.Duration, filePath string) *CachedLLM {
+	c := &CachedLLM{
+		Agent:    agent,
+		Cache:    cache.NewLRUCache(size, ttl),
+		FilePath: filePath,
+	}
+	if filePath != "" {
+		c.load()
+	}
+	return c
+}
+
+func (c *CachedLLM) load() {
+	f, err := os.Open(c.FilePath)
+	if err != nil {
+		return // ignore errors (file not found, etc)
+	}
+	defer f.Close()
+
+	var dump map[string]cache.CacheEntry
+	if err := json.NewDecoder(f).Decode(&dump); err == nil {
+		c.Cache.Restore(dump)
+	}
+}
+
+func (c *CachedLLM) save() {
+	if c.FilePath == "" {
+		return
+	}
+	dump := c.Cache.Dump()
+
+	// Atomic write: write to temp, then rename
+	tmp := c.FilePath + ".tmp"
+	f, err := os.Create(tmp)
+	if err != nil {
+		return
+	}
+
+	if err := json.NewEncoder(f).Encode(dump); err != nil {
+		f.Close()
+		os.Remove(tmp)
+		return
+	}
+	f.Close()
+	os.Rename(tmp, c.FilePath)
+}
+
+// Generate checks the cache before calling the underlying agent.
+func (c *CachedLLM) Generate(ctx context.Context, prompt string) (any, error) {
+	key := cache.HashKey(prompt)
+	if val, ok := c.Cache.Get(key); ok {
+		return val, nil
+	}
+
+	res, err := c.Agent.Generate(ctx, prompt)
+	if err != nil {
+		return nil, err
+	}
+
+	c.Cache.Set(key, res)
+	c.save()
+	return res, nil
+}
+
+// GenerateWithFiles checks the cache (including file hashes) before calling the underlying agent.
+func (c *CachedLLM) GenerateWithFiles(ctx context.Context, prompt string, files []File) (any, error) {
+	// Create a cache key that includes the prompt and all file contents
+	h := sha256.New()
+	h.Write([]byte(prompt))
+	for _, f := range files {
+		h.Write([]byte(f.Name))
+		h.Write([]byte(f.MIME))
+		h.Write(f.Data)
+	}
+	key := hex.EncodeToString(h.Sum(nil))
+
+	if val, ok := c.Cache.Get(key); ok {
+		return val, nil
+	}
+
+	res, err := c.Agent.GenerateWithFiles(ctx, prompt, files)
+	if err != nil {
+		return nil, err
+	}
+
+	c.Cache.Set(key, res)
+	c.save()
+	return res, nil
+}
+
+// TryCreateCachedLLM checks env vars and wraps the agent if caching is enabled.
+func TryCreateCachedLLM(agent Agent) Agent {
+	sizeStr := os.Getenv("AGENT_LLM_CACHE_SIZE")
+	if sizeStr == "" {
+		return agent
+	}
+
+	size, err := strconv.Atoi(sizeStr)
+	if err != nil || size <= 0 {
+		return agent
+	}
+
+	ttlStr := os.Getenv("AGENT_LLM_CACHE_TTL")
+	ttl := 300 * time.Second // default 5 mins
+	if ttlStr != "" {
+		if sec, err := strconv.Atoi(ttlStr); err == nil && sec > 0 {
+			ttl = time.Duration(sec) * time.Second
+		}
+	}
+
+	path := os.Getenv("AGENT_LLM_CACHE_PATH")
+	if path == "" {
+		// Default to local directory if not specified, but only if size is set
+		path = ".agent_cache.json"
+	}
+
+	return NewCachedLLM(agent, size, ttl, path)
+}
diff --git a/src/models/cached_test.go b/src/models/cached_test.go
@@ -0,0 +1,94 @@
+package models
+
+import (
+	"context"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+type MockAgent struct {
+	CallCount int32
+}
+
+func (m *MockAgent) Generate(ctx context.Context, prompt string) (any, error) {
+	atomic.AddInt32(&m.CallCount, 1)
+	return "mock response", nil
+}
+
+func (m *MockAgent) GenerateWithFiles(ctx context.Context, prompt string, files []File) (any, error) {
+	atomic.AddInt32(&m.CallCount, 1)
+	return "mock response with files", nil
+}
+
+func TestCachedLLM_Generate(t *testing.T) {
+	mock := &MockAgent{}
+	cached := NewCachedLLM(mock, 10, time.Minute, "")
+
+	ctx := context.Background()
+	prompt := "hello"
+
+	// First call - should hit the agent
+	_, err := cached.Generate(ctx, prompt)
+	if err != nil {
+		t.Fatalf("first call failed: %v", err)
+	}
+	if count := atomic.LoadInt32(&mock.CallCount); count != 1 {
+		t.Errorf("expected 1 call, got %d", count)
+	}
+
+	// Second call - should hit the cache
+	_, err = cached.Generate(ctx, prompt)
+	if err != nil {
+		t.Fatalf("second call failed: %v", err)
+	}
+	if count := atomic.LoadInt32(&mock.CallCount); count != 1 {
+		t.Errorf("expected 1 call (cached), got %d", count)
+	}
+
+	// Different prompt - should hit the agent
+	_, err = cached.Generate(ctx, "world")
+	if err != nil {
+		t.Fatalf("third call failed: %v", err)
+	}
+	if count := atomic.LoadInt32(&mock.CallCount); count != 2 {
+		t.Errorf("expected 2 calls, got %d", count)
+	}
+}
+
+func TestCachedLLM_GenerateWithFiles(t *testing.T) {
+	mock := &MockAgent{}
+	cached := NewCachedLLM(mock, 10, time.Minute, "")
+
+	ctx := context.Background()
+	prompt := "analyze"
+	files := []File{{Name: "a.txt", Data: []byte("content")}}
+
+	// First call
+	_, err := cached.GenerateWithFiles(ctx, prompt, files)
+	if err != nil {
+		t.Fatalf("first call failed: %v", err)
+	}
+	if count := atomic.LoadInt32(&mock.CallCount); count != 1 {
+		t.Errorf("expected 1 call, got %d", count)
+	}
+
+	// Second call - same files
+	_, err = cached.GenerateWithFiles(ctx, prompt, files)
+	if err != nil {
+		t.Fatalf("second call failed: %v", err)
+	}
+	if count := atomic.LoadInt32(&mock.CallCount); count != 1 {
+		t.Errorf("expected 1 call, got %d", count)
+	}
+
+	// Different file content
+	files2 := []File{{Name: "a.txt", Data: []byte("different")}}
+	_, err = cached.GenerateWithFiles(ctx, prompt, files2)
+	if err != nil {
+		t.Fatalf("third call failed: %v", err)
+	}
+	if count := atomic.LoadInt32(&mock.CallCount); count != 2 {
+		t.Errorf("expected 2 calls, got %d", count)
+	}
+}
diff --git a/src/models/helper.go b/src/models/helper.go
@@ -48,18 +48,27 @@ var (
 
 // NewLLMProvider returns a concrete Agent.
 func NewLLMProvider(ctx context.Context, provider string, model string, promptPrefix string) (Agent, error) {
+	var agent Agent
+	var err error
+
 	switch provider {
 	case "openai":
-		return NewOpenAILLM(model, promptPrefix), nil
+		agent = NewOpenAILLM(model, promptPrefix)
 	case "gemini", "google":
-		return NewGeminiLLM(ctx, model, promptPrefix)
+		agent, err = NewGeminiLLM(ctx, model, promptPrefix)
 	case "ollama":
-		return NewOllamaLLM(model, promptPrefix)
+		agent, err = NewOllamaLLM(model, promptPrefix)
 	case "anthropic", "claude":
-		return NewAnthropicLLM(model, promptPrefix), nil
+		agent = NewAnthropicLLM(model, promptPrefix)
 	default:
 		return nil, fmt.Errorf("unknown provider: %s", provider)
 	}
+
+	if err != nil {
+		return nil, err
+	}
+
+	return TryCreateCachedLLM(agent), nil
 }
 
 // sanitizeForGemini coerces edge cases again and filters to what Gemini will accept.