Skip to content

Commit

Permalink
#78: Normalize response latency by response token count & renamed Tok…
Browse files Browse the repository at this point in the history
…enCount to TokenUsage
  • Loading branch information
roma-glushko committed Jan 29, 2024
1 parent 0b032c7 commit 7301bb7
Show file tree
Hide file tree
Showing 10 changed files with 16 additions and 17 deletions.
4 changes: 2 additions & 2 deletions docs/docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -650,11 +650,11 @@ const docTemplate = `{
}
},
"tokenCount": {
"$ref": "#/definitions/schemas.TokenCount"
"$ref": "#/definitions/schemas.TokenUsage"
}
}
},
"schemas.TokenCount": {
"schemas.TokenUsage": {
"type": "object",
"properties": {
"promptTokens": {
Expand Down
4 changes: 2 additions & 2 deletions docs/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -647,11 +647,11 @@
}
},
"tokenCount": {
"$ref": "#/definitions/schemas.TokenCount"
"$ref": "#/definitions/schemas.TokenUsage"
}
}
},
"schemas.TokenCount": {
"schemas.TokenUsage": {
"type": "object",
"properties": {
"promptTokens": {
Expand Down
4 changes: 2 additions & 2 deletions docs/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -358,9 +358,9 @@ definitions:
type: string
type: object
tokenCount:
$ref: '#/definitions/schemas.TokenCount'
$ref: '#/definitions/schemas.TokenUsage'
type: object
schemas.TokenCount:
schemas.TokenUsage:
properties:
promptTokens:
type: number
Expand Down
4 changes: 2 additions & 2 deletions pkg/api/schemas/language.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ type UnifiedChatResponse struct {
type ProviderResponse struct {
SystemID map[string]string `json:"responseId,omitempty"`
Message ChatMessage `json:"message"`
TokenCount TokenCount `json:"tokenCount"`
TokenUsage TokenUsage `json:"tokenCount"`
}

type TokenCount struct {
type TokenUsage struct {
PromptTokens float64 `json:"promptTokens"`
ResponseTokens float64 `json:"responseTokens"`
TotalTokens float64 `json:"totalTokens"`
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/anthropic/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche
Content: anthropicCompletion.Content[0].Text,
Name: "",
},
TokenCount: schemas.TokenCount{
TokenUsage: schemas.TokenCount{

Check failure on line 181 in pkg/providers/anthropic/chat.go

View workflow job for this annotation

GitHub Actions / Build

undefined: schemas.TokenCount

Check failure on line 181 in pkg/providers/anthropic/chat.go

View workflow job for this annotation

GitHub Actions / Vulnerability Check

undefined: schemas.TokenCount

Check failure on line 181 in pkg/providers/anthropic/chat.go

View workflow job for this annotation

GitHub Actions / Vulnerability Check

undefined: schemas.TokenCount

Check failure on line 181 in pkg/providers/anthropic/chat.go

View workflow job for this annotation

GitHub Actions / Tests

undefined: schemas.TokenCount
PromptTokens: 0, // Anthropic doesn't send prompt tokens
ResponseTokens: 0,
TotalTokens: 0,
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/azureopenai/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche
Content: openAICompletion.Choices[0].Message.Content,
Name: "",
},
TokenCount: schemas.TokenCount{
TokenUsage: schemas.TokenCount{

Check failure on line 193 in pkg/providers/azureopenai/chat.go

View workflow job for this annotation

GitHub Actions / Build

undefined: schemas.TokenCount

Check failure on line 193 in pkg/providers/azureopenai/chat.go

View workflow job for this annotation

GitHub Actions / Vulnerability Check

undefined: schemas.TokenCount

Check failure on line 193 in pkg/providers/azureopenai/chat.go

View workflow job for this annotation

GitHub Actions / Vulnerability Check

undefined: schemas.TokenCount

Check failure on line 193 in pkg/providers/azureopenai/chat.go

View workflow job for this annotation

GitHub Actions / Tests

undefined: schemas.TokenCount
PromptTokens: openAICompletion.Usage.PromptTokens,
ResponseTokens: openAICompletion.Usage.CompletionTokens,
TotalTokens: openAICompletion.Usage.TotalTokens,
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/cohere/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche
Content: cohereCompletion.Text,
Name: "",
},
TokenCount: schemas.TokenCount{
TokenUsage: schemas.TokenCount{

Check failure on line 198 in pkg/providers/cohere/chat.go

View workflow job for this annotation

GitHub Actions / Build

undefined: schemas.TokenCount

Check failure on line 198 in pkg/providers/cohere/chat.go

View workflow job for this annotation

GitHub Actions / Vulnerability Check

undefined: schemas.TokenCount

Check failure on line 198 in pkg/providers/cohere/chat.go

View workflow job for this annotation

GitHub Actions / Vulnerability Check

undefined: schemas.TokenCount

Check failure on line 198 in pkg/providers/cohere/chat.go

View workflow job for this annotation

GitHub Actions / Tests

undefined: schemas.TokenCount
PromptTokens: cohereCompletion.TokenCount.PromptTokens,
ResponseTokens: cohereCompletion.TokenCount.ResponseTokens,
TotalTokens: cohereCompletion.TokenCount.TotalTokens,
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/octoml/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche
Content: openAICompletion.Choices[0].Message.Content,
Name: "",
},
TokenCount: schemas.TokenCount{
TokenUsage: schemas.TokenCount{

Check failure on line 179 in pkg/providers/octoml/chat.go

View workflow job for this annotation

GitHub Actions / Build

undefined: schemas.TokenCount

Check failure on line 179 in pkg/providers/octoml/chat.go

View workflow job for this annotation

GitHub Actions / Vulnerability Check

undefined: schemas.TokenCount

Check failure on line 179 in pkg/providers/octoml/chat.go

View workflow job for this annotation

GitHub Actions / Vulnerability Check

undefined: schemas.TokenCount

Check failure on line 179 in pkg/providers/octoml/chat.go

View workflow job for this annotation

GitHub Actions / Tests

undefined: schemas.TokenCount
PromptTokens: openAICompletion.Usage.PromptTokens,
ResponseTokens: openAICompletion.Usage.CompletionTokens,
TotalTokens: openAICompletion.Usage.TotalTokens,
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/openai/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche
Content: openAICompletion.Choices[0].Message.Content,
Name: "",
},
TokenCount: schemas.TokenCount{
TokenUsage: schemas.TokenCount{

Check failure on line 193 in pkg/providers/openai/chat.go

View workflow job for this annotation

GitHub Actions / Build

undefined: schemas.TokenCount

Check failure on line 193 in pkg/providers/openai/chat.go

View workflow job for this annotation

GitHub Actions / Vulnerability Check

undefined: schemas.TokenCount

Check failure on line 193 in pkg/providers/openai/chat.go

View workflow job for this annotation

GitHub Actions / Vulnerability Check

undefined: schemas.TokenCount

Check failure on line 193 in pkg/providers/openai/chat.go

View workflow job for this annotation

GitHub Actions / Tests

undefined: schemas.TokenCount
PromptTokens: openAICompletion.Usage.PromptTokens,
ResponseTokens: openAICompletion.Usage.CompletionTokens,
TotalTokens: openAICompletion.Usage.TotalTokens,
Expand Down
7 changes: 3 additions & 4 deletions pkg/providers/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,13 @@ func (m *LangModel) Weight() int {
}

func (m *LangModel) Chat(ctx context.Context, request *schemas.UnifiedChatRequest) (*schemas.UnifiedChatResponse, error) {
// TODO: we may want to track time-to-first-byte to "normalize" response latency wrt response size
startedAt := time.Now()
resp, err := m.client.Chat(ctx, request)

// Do we want to track latency in case of errors as well?
m.latency.Add(float64(time.Since(startedAt)))

if err == nil {
// record latency per token to normalize measurements
m.latency.Add(float64(time.Since(startedAt)) / resp.ModelResponse.TokenUsage.ResponseTokens)

// successful response
resp.ModelID = m.modelID

Expand Down

0 comments on commit 7301bb7

Please sign in to comment.