EinStack · roma-glushko · Mar 24, 2024 · Mar 19, 2024 · Mar 19, 2024 · Mar 19, 2024
diff --git a/docs/docs.go b/docs/docs.go
@@ -193,11 +193,15 @@ const docTemplate = `{
         "anthropic.Config": {
             "type": "object",
             "required": [
+                "apiVersion",
                 "baseUrl",
                 "chatEndpoint",
                 "model"
             ],
             "properties": {
+                "apiVersion": {
+                    "type": "string"
+                },
                 "baseUrl": {
                     "type": "string"
                 },
@@ -910,13 +914,13 @@ const docTemplate = `{
             "type": "object",
             "properties": {
                 "promptTokens": {
-                    "type": "number"
+                    "type": "integer"
                 },
                 "responseTokens": {
-                    "type": "number"
+                    "type": "integer"
                 },
                 "totalTokens": {
-                    "type": "number"
+                    "type": "integer"
                 }
             }
         }

diff --git a/docs/swagger.json b/docs/swagger.json
@@ -190,11 +190,15 @@
         "anthropic.Config": {
             "type": "object",
             "required": [
+                "apiVersion",
                 "baseUrl",
                 "chatEndpoint",
                 "model"
             ],
             "properties": {
+                "apiVersion": {
+                    "type": "string"
+                },
                 "baseUrl": {
                     "type": "string"
                 },
@@ -907,13 +911,13 @@
             "type": "object",
             "properties": {
                 "promptTokens": {
-                    "type": "number"
+                    "type": "integer"
                 },
                 "responseTokens": {
-                    "type": "number"
+                    "type": "integer"
                 },
                 "totalTokens": {
-                    "type": "number"
+                    "type": "integer"
                 }
             }
         }

diff --git a/docs/swagger.yaml b/docs/swagger.yaml
@@ -2,6 +2,8 @@ basePath: /
 definitions:
   anthropic.Config:
     properties:
+      apiVersion:
+        type: string
       baseUrl:
         type: string
       chatEndpoint:
@@ -11,6 +13,7 @@ definitions:
       model:
         type: string
     required:
+    - apiVersion
     - baseUrl
     - chatEndpoint
     - model
@@ -488,11 +491,11 @@ definitions:
   schemas.TokenUsage:
     properties:
       promptTokens:
-        type: number
+        type: integer
       responseTokens:
-        type: number
+        type: integer
       totalTokens:
-        type: number
+        type: integer
     type: object
 externalDocs:
   description: Documentation

diff --git a/pkg/api/schemas/chat.go b/pkg/api/schemas/chat.go
@@ -43,9 +43,9 @@ type ModelResponse struct {
 }
 
 type TokenUsage struct {
-	PromptTokens   float64 `json:"promptTokens"`
-	ResponseTokens float64 `json:"responseTokens"`
-	TotalTokens    float64 `json:"totalTokens"`
+	PromptTokens   int `json:"promptTokens"`
+	ResponseTokens int `json:"responseTokens"`
+	TotalTokens    int `json:"totalTokens"`
 }
 
 // ChatMessage is a message in a chat request.

diff --git a/pkg/providers/anthropic/chat.go b/pkg/providers/anthropic/chat.go
@@ -61,6 +61,8 @@ func NewChatMessagesFromUnifiedRequest(request *schemas.ChatRequest) []ChatMessa
 }
 
 // Chat sends a chat request to the specified anthropic model.
+//
+//	Ref: https://docs.anthropic.com/claude/reference/messages_post
 func (c *Client) Chat(ctx context.Context, request *schemas.ChatRequest) (*schemas.ChatResponse, error) {
 	// Create a new chat request
 	chatRequest := c.createChatRequestSchema(request)
@@ -70,10 +72,6 @@ func (c *Client) Chat(ctx context.Context, request *schemas.ChatRequest) (*schem
 		return nil, err
 	}
 
-	if len(chatResponse.ModelResponse.Message.Content) == 0 {
-		return nil, ErrEmptyResponse
-	}
-
 	return chatResponse, nil
 }
 
@@ -97,12 +95,13 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche
 		return nil, fmt.Errorf("unable to create anthropic chat request: %w", err)
 	}
 
-	req.Header.Set("Authorization", "Bearer "+string(c.config.APIKey))
+	req.Header.Set("x-api-key", string(c.config.APIKey)) // must be in lower case
+	req.Header.Set("anthropic-version", c.apiVersion)
 	req.Header.Set("Content-Type", "application/json")
 
 	// TODO: this could leak information from messages which may not be a desired thing to have
-	c.telemetry.Logger.Debug(
-		"anthropic chat request",
+	c.tel.L().Debug(
+		"Anthropic chat request",
 		zap.String("chat_url", c.chatURL),
 		zap.Any("payload", payload),
 	)
@@ -121,38 +120,43 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche
 	// Read the response body into a byte slice
 	bodyBytes, err := io.ReadAll(resp.Body)
 	if err != nil {
-		c.telemetry.Logger.Error("failed to read anthropic chat response", zap.Error(err))
+		c.tel.L().Error("Failed to read anthropic chat response", zap.Error(err))
 		return nil, err
 	}
 
 	// Parse the response JSON
-	var anthropicCompletion ChatCompletion
+	var anthropicResponse ChatCompletion
 
-	err = json.Unmarshal(bodyBytes, &anthropicCompletion)
+	err = json.Unmarshal(bodyBytes, &anthropicResponse)
 	if err != nil {
-		c.telemetry.Logger.Error("failed to parse anthropic chat response", zap.Error(err))
+		c.tel.L().Error("Failed to parse anthropic chat response", zap.Error(err))
 		return nil, err
 	}
 
+	if len(anthropicResponse.Content) == 0 {
+		return nil, ErrEmptyResponse
+	}
+
+	completion := anthropicResponse.Content[0]
+	usage := anthropicResponse.Usage
+
 	// Map response to ChatResponse schema
 	response := schemas.ChatResponse{
-		ID:        anthropicCompletion.ID,
+		ID:        anthropicResponse.ID,
 		Created:   int(time.Now().UTC().Unix()), // not provided by anthropic
 		Provider:  providerName,
-		ModelName: anthropicCompletion.Model,
+		ModelName: anthropicResponse.Model,
 		Cached:    false,
 		ModelResponse: schemas.ModelResponse{
-			SystemID: map[string]string{
-				"system_fingerprint": anthropicCompletion.ID,
-			},
+			SystemID: map[string]string{},
 			Message: schemas.ChatMessage{
-				Role:    anthropicCompletion.Content[0].Type,
-				Content: anthropicCompletion.Content[0].Text,
+				Role:    completion.Type,
+				Content: completion.Text,
 			},
 			TokenUsage: schemas.TokenUsage{
-				PromptTokens:   0, // Anthropic doesn't send prompt tokens
-				ResponseTokens: 0,
-				TotalTokens:    0,
+				PromptTokens:   usage.InputTokens,
+				ResponseTokens: usage.OutputTokens,
+				TotalTokens:    usage.InputTokens + usage.OutputTokens,
 			},
 		},
 	}

diff --git a/pkg/providers/anthropic/client.go b/pkg/providers/anthropic/client.go
@@ -22,11 +22,12 @@ var (
 type Client struct {
 	baseURL             string
 	chatURL             string
+	apiVersion          string
 	chatRequestTemplate *ChatRequest
 	errMapper           *ErrorMapper
 	config              *Config
 	httpClient          *http.Client
-	telemetry           *telemetry.Telemetry
+	tel                 *telemetry.Telemetry
 }
 
 // NewClient creates a new OpenAI client for the OpenAI API.
@@ -39,6 +40,7 @@ func NewClient(providerConfig *Config, clientConfig *clients.ClientConfig, tel *
 	c := &Client{
 		baseURL:             providerConfig.BaseURL,
 		chatURL:             chatURL,
+		apiVersion:          providerConfig.APIVersion,
 		config:              providerConfig,
 		chatRequestTemplate: NewChatRequestFromConfig(providerConfig),
 		errMapper:           NewErrorMapper(tel),
@@ -50,7 +52,7 @@ func NewClient(providerConfig *Config, clientConfig *clients.ClientConfig, tel *
 				MaxIdleConnsPerHost: 2,
 			},
 		},
-		telemetry: tel,
+		tel: tel,
 	}
 
 	return c, nil

diff --git a/pkg/providers/anthropic/config.go b/pkg/providers/anthropic/config.go
@@ -14,7 +14,6 @@ type Params struct {
 	MaxTokens     int      `yaml:"max_tokens,omitempty" json:"max_tokens"`
 	StopSequences []string `yaml:"stop,omitempty" json:"stop"`
 	Metadata      *string  `yaml:"metadata,omitempty" json:"metadata"`
-	// Stream           bool             `json:"stream,omitempty"` // TODO: we are not supporting this at the moment
 }
 
 func DefaultParams() Params {
@@ -38,6 +37,7 @@ func (p *Params) UnmarshalYAML(unmarshal func(interface{}) error) error {
 
 type Config struct {
 	BaseURL       string        `yaml:"baseUrl" json:"baseUrl" validate:"required"`
+	APIVersion    string        `yaml:"apiVersion" json:"apiVersion" validate:"required"`
 	ChatEndpoint  string        `yaml:"chatEndpoint" json:"chatEndpoint" validate:"required"`
 	Model         string        `yaml:"model" json:"model" validate:"required"`
 	APIKey        fields.Secret `yaml:"api_key" json:"-" validate:"required"`
@@ -50,6 +50,7 @@ func DefaultConfig() *Config {
 
 	return &Config{
 		BaseURL:       "https://api.anthropic.com/v1",
+		APIVersion:    "2023-06-01",
 		ChatEndpoint:  "/messages",
 		Model:         "claude-instant-1.2",
 		DefaultParams: &defaultParams,

diff --git a/pkg/providers/anthropic/schamas.go b/pkg/providers/anthropic/schamas.go
@@ -1,6 +1,16 @@
 package anthropic
 
-// Anthropic Chat Response
+type Content struct {
+	Type string `json:"type"`
+	Text string `json:"text"`
+}
+
+type Usage struct {
+	InputTokens  int `json:"input_tokens"`
+	OutputTokens int `json:"output_tokens"`
+}
+
+// ChatCompletion is an Anthropic Chat Response
 type ChatCompletion struct {
 	ID           string    `json:"id"`
 	Type         string    `json:"type"`
@@ -9,9 +19,5 @@ type ChatCompletion struct {
 	Content      []Content `json:"content"`
 	StopReason   string    `json:"stop_reason"`
 	StopSequence string    `json:"stop_sequence"`
-}
-
-type Content struct {
-	Type string `json:"type"`
-	Text string `json:"text"`
+	Usage        Usage     `json:"usage"`
 }
diff --git a/pkg/providers/anthropic/testdata/chat.success.json b/pkg/providers/anthropic/testdata/chat.success.json
@@ -1,7 +1,7 @@
 {
   "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
   "type": "message",
-  "model": "claude-2.1",
+  "model": "claude-instant-1.2",
   "role": "assistant",
   "content": [
     {
@@ -10,5 +10,9 @@
     }
   ],
   "stop_reason": "end_turn",
-  "stop_sequence": null
-}
+  "stop_sequence": null,
+  "usage":{
+    "input_tokens": 24,
+    "output_tokens": 13
+  }
+}
diff --git a/pkg/providers/bedrock/chat.go b/pkg/providers/bedrock/chat.go
@@ -99,6 +99,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche
 	err = json.Unmarshal(result.Body, &bedrockCompletion)
 	if err != nil {
 		c.telemetry.Logger.Error("failed to parse bedrock chat response", zap.Error(err))
+
 		return nil, err
 	}
 
@@ -118,9 +119,9 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche
 				Name:    "",
 			},
 			TokenUsage: schemas.TokenUsage{
-				PromptTokens:   float64(bedrockCompletion.Results[0].TokenCount),
+				PromptTokens:   bedrockCompletion.Results[0].TokenCount,
 				ResponseTokens: -1,
-				TotalTokens:    float64(bedrockCompletion.Results[0].TokenCount),
+				TotalTokens:    bedrockCompletion.Results[0].TokenCount,
 			},
 		},
 	}

diff --git a/pkg/providers/cohere/schemas.go b/pkg/providers/cohere/schemas.go
@@ -15,10 +15,10 @@ type ChatCompletion struct {
 }
 
 type TokenCount struct {
-	PromptTokens   float64 `json:"prompt_tokens"`
-	ResponseTokens float64 `json:"response_tokens"`
-	TotalTokens    float64 `json:"total_tokens"`
-	BilledTokens   float64 `json:"billed_tokens"`
+	PromptTokens   int `json:"prompt_tokens"`
+	ResponseTokens int `json:"response_tokens"`
+	TotalTokens    int `json:"total_tokens"`
+	BilledTokens   int `json:"billed_tokens"`
 }
 
 type Meta struct {

diff --git a/pkg/providers/lang.go b/pkg/providers/lang.go
@@ -89,7 +89,7 @@ func (m *LanguageModel) Chat(ctx context.Context, request *schemas.ChatRequest)
 
 	if err == nil {
 		// record latency per token to normalize measurements
-		m.chatLatency.Add(float64(time.Since(startedAt)) / resp.ModelResponse.TokenUsage.ResponseTokens)
+		m.chatLatency.Add(float64(time.Since(startedAt)) / float64(resp.ModelResponse.TokenUsage.ResponseTokens))
 
 		// successful response
 		resp.ModelID = m.modelID

diff --git a/pkg/providers/ollama/chat.go b/pkg/providers/ollama/chat.go
@@ -195,9 +195,9 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche
 				Content: ollamaCompletion.Message.Content,
 			},
 			TokenUsage: schemas.TokenUsage{
-				PromptTokens:   float64(ollamaCompletion.EvalCount),
-				ResponseTokens: float64(ollamaCompletion.EvalCount),
-				TotalTokens:    float64(ollamaCompletion.EvalCount),
+				PromptTokens:   ollamaCompletion.EvalCount,
+				ResponseTokens: ollamaCompletion.EvalCount,
+				TotalTokens:    ollamaCompletion.EvalCount,
 			},
 		},
 	}

diff --git a/pkg/providers/openai/schemas.go b/pkg/providers/openai/schemas.go
@@ -47,9 +47,9 @@ type Choice struct {
 }
 
 type Usage struct {
-	PromptTokens     float64 `json:"prompt_tokens"`
-	CompletionTokens float64 `json:"completion_tokens"`
-	TotalTokens      float64 `json:"total_tokens"`
+	PromptTokens     int `json:"prompt_tokens"`
+	CompletionTokens int `json:"completion_tokens"`
+	TotalTokens      int `json:"total_tokens"`
 }
 
 // ChatCompletionChunk represents SSEvent a chat response is broken down on chat streaming