Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions internal/benchmarks/client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Package benchmarks provides tokenledger integration for agentapi.
//
// This enables agentapi to use dynamic benchmark data for routing decisions.
// Integrates with tokenledger for:
// - Model quality scores
// - Cost per token
// - Latency metrics
package benchmarks

import (
"sync"
"time"
)

// BenchmarkData represents benchmark data for a model
type BenchmarkData struct {
ModelID string `json:"model_id"`
Provider string `json:"provider,omitempty"`
IntelligenceIndex *float64 `json:"intelligence_index,omitempty"`
CodingIndex *float64 `json:"coding_index,omitempty"`
SpeedTPS *float64 `json:"speed_tps,omitempty"`
LatencyMs *float64 `json:"latency_ms,omitempty"`
PricePer1MInput *float64 `json:"price_per_1m_input,omitempty"`
PricePer1MOutput *float64 `json:"price_per_1m_output,omitempty"`
ContextWindow *int64 `json:"context_window,omitempty"`
UpdatedAt time.Time `json:"updated_at"`
}

// Client fetches benchmarks from tokenledger
type Client struct {
tokenledgerURL string
cacheTTL time.Duration
cache map[string]BenchmarkData
mu sync.RWMutex
}

// NewClient creates a new tokenledger benchmark client
func NewClient(tokenledgerURL string, cacheTTL time.Duration) *Client {
return &Client{
tokenledgerURL: tokenledgerURL,
cacheTTL: cacheTTL,
cache: make(map[string]BenchmarkData),
}
}

// GetBenchmark returns benchmark data for a model
func (c *Client) GetBenchmark(modelID string) (*BenchmarkData, error) {
c.mu.RLock()
if data, ok := c.cache[modelID]; ok {
c.mu.RUnlock()
return &data, nil
}
c.mu.RUnlock()

// In production, this would call tokenledger HTTP API
// For now, return nil to use fallback
return nil, nil
}
130 changes: 130 additions & 0 deletions internal/benchmarks/store.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Package benchmarks provides unified benchmark access with fallback.
package benchmarks

import (
"sync"
)

// Hardcoded fallback values
var (
qualityProxy = map[string]float64{
"claude-opus-4.6": 0.95,
"claude-opus-4.6-1m": 0.96,
"claude-sonnet-4.6": 0.88,
"claude-haiku-4.5": 0.75,
"gpt-5.3-codex-high": 0.92,
"gpt-5.3-codex": 0.82,
"claude-4.5-opus-high-thinking": 0.94,
"claude-4.5-opus-high": 0.92,
"claude-4.5-sonnet-thinking": 0.85,
"claude-4-sonnet": 0.80,
"gpt-4.5": 0.85,
"gpt-4o": 0.82,
"gpt-4o-mini": 0.70,
"gemini-2.5-pro": 0.90,
"gemini-2.5-flash": 0.78,
"gemini-2.0-flash": 0.72,
"llama-4-maverick": 0.80,
"llama-4-scout": 0.75,
"deepseek-v3": 0.82,
"deepseek-chat": 0.75,
}

costPer1kProxy = map[string]float64{
"claude-opus-4.6": 15.00,
"claude-opus-4.6-1m": 15.00,
"claude-sonnet-4.6": 3.00,
"claude-haiku-4.5": 0.25,
"gpt-5.3-codex-high": 10.00,
"gpt-5.3-codex": 5.00,
"claude-4.5-opus-high-thinking": 15.00,
"claude-4.5-opus-high": 15.00,
"claude-4.5-sonnet-thinking": 3.00,
"claude-4-sonnet": 3.00,
"gpt-4.5": 5.00,
"gpt-4o": 2.50,
"gpt-4o-mini": 0.15,
"gemini-2.5-pro": 1.50,
"gemini-2.5-flash": 0.10,
"gemini-2.0-flash": 0.05,
"llama-4-maverick": 0.40,
"llama-4-scout": 0.20,
"deepseek-v3": 0.60,
"deepseek-chat": 0.30,
}

latencyMsProxy = map[string]int{
"claude-opus-4.6": 2500,
"claude-sonnet-4.6": 1500,
"claude-haiku-4.5": 800,
"gpt-5.3-codex-high": 2000,
"gpt-4o": 1800,
"gemini-2.5-pro": 1200,
"gemini-2.5-flash": 500,
"deepseek-v3": 1500,
}
)

// Store provides unified benchmark access with fallback
type Store struct {
mu sync.RWMutex
fallback *FallbackProvider
client *Client
}

// FallbackProvider provides hardcoded benchmark values
type FallbackProvider struct {
QualityProxy map[string]float64
CostPer1kProxy map[string]float64
LatencyMsProxy map[string]int
}

// NewStore creates a new benchmark store with fallback
func NewStore() *Store {
return &Store{
fallback: &FallbackProvider{
QualityProxy: qualityProxy,
CostPer1kProxy: costPer1kProxy,
LatencyMsProxy: latencyMsProxy,
},
client: nil,
}
}

// GetQuality returns quality score for a model
func (s *Store) GetQuality(modelID string) float64 {
// Try dynamic source first
if s.client != nil {
if data, err := s.client.GetBenchmark(modelID); err == nil && data != nil && data.IntelligenceIndex != nil {
return *data.IntelligenceIndex / 100.0
}
}

// Fallback to hardcoded
s.mu.RLock()
defer s.mu.RUnlock()
if q, ok := s.fallback.QualityProxy[modelID]; ok {
return q
}
return 0.5 // Default
}

// GetCost returns cost per 1k tokens for a model
func (s *Store) GetCost(modelID string) float64 {
s.mu.RLock()
defer s.mu.RUnlock()
if c, ok := s.fallback.CostPer1kProxy[modelID]; ok {
return c
}
return 1.0 // Default
}

// GetLatency returns latency in ms for a model
func (s *Store) GetLatency(modelID string) int {
s.mu.RLock()
defer s.mu.RUnlock()
if l, ok := s.fallback.LatencyMsProxy[modelID]; ok {
return l
}
return 2000 // Default
}
Loading