doc: update chat message count example

pkoukk · Jun 28, 2023 · e87301f · e87301f
1 parent f360d8f
commit e87301f
Show file tree

Hide file tree

Showing 2 changed files with 47 additions and 43 deletions.
diff --git a/README.md b/README.md
@@ -100,9 +100,9 @@ func main()  {
 ```
 
 ### Counting Tokens For Chat API Calls
-Below is an example function for counting tokens for messages passed to gpt-3.5-turbo-0301 or gpt-4-0314.
+Below is an example function for counting tokens for messages passed to gpt-3.5-turbo or gpt-4.
 
-The following code was written by @nasa1024 based on [openai-cookbook](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)     examples.
+The following code was written based on [openai-cookbook](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)  examples at `Wednesday, 28 June 2023`.
 
 Please note that the token calculation method for the message may change at any time, so this code may not necessarily be applicable in the future.
 
@@ -131,29 +131,30 @@ func NumTokensFromMessages(messages []openai.ChatCompletionMessage, model string
 	}
 
 	var tokensPerMessage, tokensPerName int
-
-	if model == "gpt-3.5-turbo-0613" ||
-		model == "gpt-3.5-turbo-16k-0613" ||
-		model == "gpt-4-0314" ||
-		model == "gpt-4-32k-0314" ||
-		model == "gpt-4-0613" ||
-		model == "gpt-4-32k-0613" ||
-		model == "gpt-3.5-turbo-16k" {
+	switch model {
+	case "gpt-3.5-turbo-0613",
+		"gpt-3.5-turbo-16k-0613",
+		"gpt-4-0314",
+		"gpt-4-32k-0314",
+		"gpt-4-0613",
+		"gpt-4-32k-0613":
 		tokensPerMessage = 3
-		tokensPerName = -1
-	} else if model == "gpt-3.5-turbo-0301" {
+		tokensPerName = 1
+	case "gpt-3.5-turbo-0301":
 		tokensPerMessage = 4 // every message follows <|start|>{role/name}\n{content}<|end|>\n
 		tokensPerName = -1   // if there's a name, the role is omitted
-	} else if model == "gpt-3.5-turbo" {
-		log.Println("warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
-		return NumTokensFromMessages(messages, "gpt-3.5-turbo-0613")
-	} else if model == "gpt-4" {
-		log.Println("warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
-		return NumTokensFromMessages(messages, "gpt-4-0613")
-	} else {
-		err := errors.New("warning: model not found. Using cl100k_base encoding")
-		log.Println(err)
-		return
+	default:
+		if strings.Contains(model, "gpt-3.5-turbo") {
+			log.Println("warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
+			return NumTokensFromMessages(messages, "gpt-3.5-turbo-0613")
+		} else if strings.Contains(model, "gpt-4") {
+			log.Println("warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
+			return NumTokensFromMessages(messages, "gpt-4-0613")
+		} else {
+			err = fmt.Errorf("num_tokens_from_messages() is not implemented for model %s. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.", model)
+			log.Println(err)
+			return
+		}
 	}
 
 	for _, message := range messages {

diff --git a/README_zh-hans.md b/README_zh-hans.md
@@ -99,7 +99,9 @@ func main()  {
 ```
 
 ### 计算chat API消息当中的token消耗
-这段代码由@nasa1024根据[官方示例](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)编写
+这段代码根据[官方示例](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)编写
+
+编写时间： `2023-06-28`
 
 请注意，消息的token计算方式可能随时会发生改变，以下代码并不一定在将来适用，如果您需要精确的计算，请关注官方文档。
 
@@ -115,7 +117,6 @@ import (
 	"github.com/sashabaranov/go-openai"
 )
 
-// OpenAI Cookbook: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
 func NumTokensFromMessages(messages []openai.ChatCompletionMessage, model string) (numTokens int) {
 	tkm, err := tiktoken.EncodingForModel(model)
 	if err != nil {
@@ -125,28 +126,30 @@ func NumTokensFromMessages(messages []openai.ChatCompletionMessage, model string
 	}
 
 	var tokensPerMessage, tokensPerName int
-
-	if model == "gpt-3.5-turbo-0613" ||
-		model == "gpt-3.5-turbo-16k-0613" ||
-		model == "gpt-4-0314" ||
-		model == "gpt-4-32k-0314" ||
-		model == "gpt-4-0613" ||
-		model == "gpt-4-32k-0613" {
+	switch model {
+	case "gpt-3.5-turbo-0613",
+		"gpt-3.5-turbo-16k-0613",
+		"gpt-4-0314",
+		"gpt-4-32k-0314",
+		"gpt-4-0613",
+		"gpt-4-32k-0613":
 		tokensPerMessage = 3
-		tokensPerName = -1
-	} else if model == "gpt-3.5-turbo-0301" {
+		tokensPerName = 1
+	case "gpt-3.5-turbo-0301":
 		tokensPerMessage = 4 // every message follows <|start|>{role/name}\n{content}<|end|>\n
 		tokensPerName = -1   // if there's a name, the role is omitted
-	} else if model == "gpt-3.5-turbo" {
-		log.Println("warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
-		return NumTokensFromMessages(messages, "gpt-3.5-turbo-0613")
-	} else if model == "gpt-4" {
-		log.Println("warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
-		return NumTokensFromMessages(messages, "gpt-4-0613")
-	} else {
-		err := errors.New("warning: model not found. Using cl100k_base encoding")
-		log.Println(err)
-		return
+	default:
+		if strings.Contains(model, "gpt-3.5-turbo") {
+			log.Println("warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
+			return NumTokensFromMessages(messages, "gpt-3.5-turbo-0613")
+		} else if strings.Contains(model, "gpt-4") {
+			log.Println("warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
+			return NumTokensFromMessages(messages, "gpt-4-0613")
+		} else {
+			err = fmt.Errorf("num_tokens_from_messages() is not implemented for model %s. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.", model)
+			log.Println(err)
+			return
+		}
 	}
 
 	for _, message := range messages {