64bit · 64bit · Mar 5, 2023 · Mar 4, 2023
diff --git a/async-openai/src/types/types.rs b/async-openai/src/types/types.rs
@@ -724,6 +724,12 @@ pub struct CreateChatCompletionRequest {
     /// The messages to generate chat completions for, in the [chat format](https://platform.openai.com/docs/guides/chat/introduction).
     pub messages: Vec<ChatCompletionRequestMessage>, // min: 1
 
+    /// The maximum number of [tokens](/tokenizer) to generate in the completion.
+    ///
+    /// The token count of your prompt plus `max_tokens` cannot exceed the model's context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_tokens: Option<u16>,
+
     /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
     ///
     /// We generally recommend altering this or `top_p` but not both.

diff --git a/examples/chat-stream/src/main.rs b/examples/chat-stream/src/main.rs
@@ -12,6 +12,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
 
     let request = CreateChatCompletionRequestArgs::default()
         .model("gpt-3.5-turbo")
+        .max_tokens(1024u16)
         .messages([ChatCompletionRequestMessageArgs::default()
             .content("write a song if Coldplay and AR Rahman collaborated together")
             .role(Role::User)

diff --git a/examples/chat/src/main.rs b/examples/chat/src/main.rs
@@ -10,6 +10,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
     let client = Client::new();
 
     let request = CreateChatCompletionRequestArgs::default()
+        .max_tokens(512u16)
         .model("gpt-3.5-turbo")
         .messages([
             ChatCompletionRequestMessageArgs::default()