Skip to content

Commit

Permalink
doc: update chat message count example
Browse files Browse the repository at this point in the history
  • Loading branch information
pkoukk committed Jun 28, 2023
1 parent f360d8f commit e87301f
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 43 deletions.
45 changes: 23 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@ func main() {
```

### Counting Tokens For Chat API Calls
Below is an example function for counting tokens for messages passed to gpt-3.5-turbo-0301 or gpt-4-0314.
Below is an example function for counting tokens for messages passed to gpt-3.5-turbo or gpt-4.

The following code was written by @nasa1024 based on [openai-cookbook](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) examples.
The following code was written based on [openai-cookbook](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) examples at `Wednesday, 28 June 2023`.

Please note that the token calculation method for the message may change at any time, so this code may not necessarily be applicable in the future.

Expand Down Expand Up @@ -131,29 +131,30 @@ func NumTokensFromMessages(messages []openai.ChatCompletionMessage, model string
}

var tokensPerMessage, tokensPerName int

if model == "gpt-3.5-turbo-0613" ||
model == "gpt-3.5-turbo-16k-0613" ||
model == "gpt-4-0314" ||
model == "gpt-4-32k-0314" ||
model == "gpt-4-0613" ||
model == "gpt-4-32k-0613" ||
model == "gpt-3.5-turbo-16k" {
switch model {
case "gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613",
"gpt-4-0314",
"gpt-4-32k-0314",
"gpt-4-0613",
"gpt-4-32k-0613":
tokensPerMessage = 3
tokensPerName = -1
} else if model == "gpt-3.5-turbo-0301" {
tokensPerName = 1
case "gpt-3.5-turbo-0301":
tokensPerMessage = 4 // every message follows <|start|>{role/name}\n{content}<|end|>\n
tokensPerName = -1 // if there's a name, the role is omitted
} else if model == "gpt-3.5-turbo" {
log.Println("warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
return NumTokensFromMessages(messages, "gpt-3.5-turbo-0613")
} else if model == "gpt-4" {
log.Println("warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
return NumTokensFromMessages(messages, "gpt-4-0613")
} else {
err := errors.New("warning: model not found. Using cl100k_base encoding")
log.Println(err)
return
default:
if strings.Contains(model, "gpt-3.5-turbo") {
log.Println("warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
return NumTokensFromMessages(messages, "gpt-3.5-turbo-0613")
} else if strings.Contains(model, "gpt-4") {
log.Println("warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
return NumTokensFromMessages(messages, "gpt-4-0613")
} else {
err = fmt.Errorf("num_tokens_from_messages() is not implemented for model %s. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.", model)
log.Println(err)
return
}
}

for _, message := range messages {
Expand Down
45 changes: 24 additions & 21 deletions README_zh-hans.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ func main() {
```

### 计算chat API消息当中的token消耗
这段代码由@nasa1024根据[官方示例](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)编写
这段代码根据[官方示例](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)编写

编写时间: `2023-06-28`

请注意,消息的token计算方式可能随时会发生改变,以下代码并不一定在将来适用,如果您需要精确的计算,请关注官方文档。

Expand All @@ -115,7 +117,6 @@ import (
"github.com/sashabaranov/go-openai"
)

// OpenAI Cookbook: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
func NumTokensFromMessages(messages []openai.ChatCompletionMessage, model string) (numTokens int) {
tkm, err := tiktoken.EncodingForModel(model)
if err != nil {
Expand All @@ -125,28 +126,30 @@ func NumTokensFromMessages(messages []openai.ChatCompletionMessage, model string
}

var tokensPerMessage, tokensPerName int

if model == "gpt-3.5-turbo-0613" ||
model == "gpt-3.5-turbo-16k-0613" ||
model == "gpt-4-0314" ||
model == "gpt-4-32k-0314" ||
model == "gpt-4-0613" ||
model == "gpt-4-32k-0613" {
switch model {
case "gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613",
"gpt-4-0314",
"gpt-4-32k-0314",
"gpt-4-0613",
"gpt-4-32k-0613":
tokensPerMessage = 3
tokensPerName = -1
} else if model == "gpt-3.5-turbo-0301" {
tokensPerName = 1
case "gpt-3.5-turbo-0301":
tokensPerMessage = 4 // every message follows <|start|>{role/name}\n{content}<|end|>\n
tokensPerName = -1 // if there's a name, the role is omitted
} else if model == "gpt-3.5-turbo" {
log.Println("warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
return NumTokensFromMessages(messages, "gpt-3.5-turbo-0613")
} else if model == "gpt-4" {
log.Println("warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
return NumTokensFromMessages(messages, "gpt-4-0613")
} else {
err := errors.New("warning: model not found. Using cl100k_base encoding")
log.Println(err)
return
default:
if strings.Contains(model, "gpt-3.5-turbo") {
log.Println("warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
return NumTokensFromMessages(messages, "gpt-3.5-turbo-0613")
} else if strings.Contains(model, "gpt-4") {
log.Println("warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
return NumTokensFromMessages(messages, "gpt-4-0613")
} else {
err = fmt.Errorf("num_tokens_from_messages() is not implemented for model %s. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.", model)
log.Println(err)
return
}
}

for _, message := range messages {
Expand Down

0 comments on commit e87301f

Please sign in to comment.