jamesrochabrun · jamesrochabrun · Aug 10, 2025 · Aug 10, 2025
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,7 @@ DerivedData/
 .swiftpm/configuration/registries.json
 .swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata
 .netrc
+
+# Xcode Swift Package Manager
+**/xcshareddata/swiftpm/
+**/project.xcworkspace/xcshareddata/swiftpm/
diff --git a/README.md b/README.md
@@ -403,6 +403,7 @@ public struct ChatCompletionParameters: Encodable {
    /// A list of messages comprising the conversation so far. [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models)
    public var messages: [Message]
    /// ID of the model to use. See the [model endpoint compatibility](https://platform.openai.com/docs/models/how-we-use-your-data) table for details on which models work with the Chat API.
+   /// Supports GPT-4, GPT-4o, GPT-5, and other models. For GPT-5 family: .gpt5, .gpt5Mini, .gpt5Nano
    public var model: String
    /// Whether or not to store the output of this chat completion request for use in our [model distillation](https://platform.openai.com/docs/guides/distillation) or [evals](https://platform.openai.com/docs/guides/evals) products.
    /// Defaults to false
@@ -1290,6 +1291,47 @@ OpenAI's most advanced interface for generating model responses. Supports text a
 - Improved conversation state management with `previousResponseId`
 - Real-time text streaming, function calls, and tool usage events
 - Support for reasoning summaries, web search, file search, and image generation events
+- **NEW**: Support for GPT-5 models (gpt-5, gpt-5-mini, gpt-5-nano)
+- **NEW**: Verbosity parameter for controlling response detail level
+
+#### ModelResponseParameter
+
+The `ModelResponseParameter` provides a comprehensive interface for creating model responses:
+
+```swift
+let parameters = ModelResponseParameter(
+    input: .text("What is the answer to life, the universe, and everything?"),
+    model: .gpt5,  // Support for GPT-5, GPT-5-mini, GPT-5-nano
+    text: TextConfiguration(
+        format: .text,
+        verbosity: "low"  // NEW: Control response verbosity ("low", "medium", "high")
+    ),
+    temperature: 0.7
+)
+
+let response = try await service.responseCreate(parameters)
+```
+
+#### Available GPT-5 Models
+
+```swift
+public enum Model {
+    case gpt5        // Complex reasoning, broad world knowledge, and code-heavy or multi-step agentic tasks
+    case gpt5Mini    // Cost-optimized reasoning and chat; balances speed, cost, and capability
+    case gpt5Nano    // High-throughput tasks, especially simple instruction-following or classification
+    // ... other models
+}
+```
+
+#### TextConfiguration with Verbosity
+
+```swift
+// Create a text configuration with verbosity control
+let textConfig = TextConfiguration(
+    format: .text,       // Can be .text, .jsonObject, or .jsonSchema
+    verbosity: "medium"  // Controls response detail level
+)
+```
 
 Related guides:
 

diff --git a/Sources/OpenAI/Public/Parameters/Chat/ChatCompletionParameters.swift b/Sources/OpenAI/Public/Parameters/Chat/ChatCompletionParameters.swift
@@ -15,6 +15,7 @@ public struct ChatCompletionParameters: Encodable {
     model: Model,
     store: Bool? = nil,
     reasoningEffort: ReasoningEffort? = nil,
+    verbosity: Verbosity? = nil,
     metadata: [String: String]? = nil,
     frequencyPenalty: Double? = nil,
     functionCall: FunctionCall? = nil,
@@ -44,6 +45,7 @@ public struct ChatCompletionParameters: Encodable {
     self.model = model.value
     self.store = store
     self.reasoningEffort = reasoningEffort?.rawValue
+    self.verbosity = verbosity?.rawValue
     self.metadata = metadata
     self.frequencyPenalty = frequencyPenalty
     self.functionCall = functionCall
@@ -399,6 +401,21 @@ public struct ChatCompletionParameters: Encodable {
     case low
     case medium
     case high
+    /// The new minimal setting produces very few reasoning tokens for cases where you need the fastest possible time-to-first-token. We often see better performance when the model can produce a few tokens when needed versus none. The default is medium.
+    ///
+    /// The minimal setting performs especially well in coding and instruction following scenarios, adhering closely to given directions. However, it may require prompting to act more proactively. To improve the model's reasoning quality, even at minimal effort, encourage it to “think” or outline its steps before answering.
+    case minimal
+  }
+
+  /// Verbosity determines how many output tokens are generated. Lowering the number of tokens reduces overall latency. While the model's reasoning approach stays mostly the same, the model finds ways to answer more concisely—which can either improve or diminish answer quality, depending on your use case. Here are some scenarios for both ends of the verbosity spectrum:
+  /// High verbosity: Use when you need the model to provide thorough explanations of documents or perform extensive code refactoring.
+  /// Low verbosity: Best for situations where you want concise answers or simple code generation, such as SQL queries.
+  /// Models before GPT-5 have used medium verbosity by default. With GPT-5, we make this option configurable as one of high, medium, or low.
+  /// When generating code, medium and high verbosity levels yield longer, more structured code with inline explanations, while low verbosity produces shorter, more concise code with minimal commentary.
+  public enum Verbosity: String, Encodable {
+    case high
+    case medium
+    case low
   }
 
   /// A list of messages comprising the conversation so far. [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models)
@@ -411,6 +428,8 @@ public struct ChatCompletionParameters: Encodable {
   /// Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
   /// Defaults to medium o1 models only
   public var reasoningEffort: String?
+  /// Verbosity determines how many output tokens are generated. Lowering the number of tokens reduces overall latency.
+  public var verbosity: String?
   /// Developer-defined tags and values used for filtering completions in the [dashboard](https://platform.openai.com/chat-completions).
   public var metadata: [String: String]?
   /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. Defaults to 0
@@ -485,6 +504,7 @@ public struct ChatCompletionParameters: Encodable {
     case model
     case store
     case reasoningEffort = "reasoning_effort"
+    case verbosity
     case metadata
     case frequencyPenalty = "frequency_penalty"
     case toolChoice = "tool_choice"

diff --git a/Sources/OpenAI/Public/Parameters/Model.swift b/Sources/OpenAI/Public/Parameters/Model.swift
@@ -62,6 +62,13 @@ public enum Model {
   /// Vision
   case gpt4VisionPreview // Vision
 
+  /// Complex reasoning, broad world knowledge, and code-heavy or multi-step agentic tasks
+  case gpt5
+  /// Cost-optimized reasoning and chat; balances speed, cost, and capability
+  case gpt5Mini
+  /// High-throughput tasks, especially simple instruction-following or classification
+  case gpt5Nano
+
   /// Images
   case dalle2
   case dalle3
@@ -93,6 +100,9 @@ public enum Model {
     case .gpt40125Preview: "gpt-4-0125-preview"
     case .gpt4Turbo20240409: "gpt-4-turbo-2024-04-09"
     case .gpt4turbo: "gpt-4-turbo"
+    case .gpt5: "gpt-5"
+    case .gpt5Mini: "gpt-5-mini"
+    case .gpt5Nano: "gpt-5-nano"
     case .custom(let model): model
     }
   }

diff --git a/Sources/OpenAI/Public/Shared/Reasoning.swift b/Sources/OpenAI/Public/Shared/Reasoning.swift
@@ -16,8 +16,11 @@ public struct Reasoning: Codable {
   }
 
   /// Defaults to medium
-  /// Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently supported values are low, medium, and high.
+  /// Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently supported values are low, medium, high and minimal.
   /// Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
+  /// The new minimal setting produces very few reasoning tokens for cases where you need the fastest possible time-to-first-token. We often see better performance when the model can produce a few tokens when needed versus none. The default is medium.
+  ///
+  /// The minimal setting performs especially well in coding and instruction following scenarios, adhering closely to given directions. However, it may require prompting to act more proactively. To improve the model's reasoning quality, even at minimal effort, encourage it to “think” or outline its steps before answering.
   public var effort: String?
 
   /// computer_use_preview only

diff --git a/Sources/OpenAI/Public/Shared/TextConfiguration.swift b/Sources/OpenAI/Public/Shared/TextConfiguration.swift
@@ -14,8 +14,12 @@ public struct TextConfiguration: Codable {
   /// An object specifying the format that the model must output
   public var format: FormatType
 
-  public init(format: FormatType) {
+  /// The verbosity level for the response (e.g., "low", "medium", "high")
+  public var verbosity: String?
+
+  public init(format: FormatType, verbosity: String? = nil) {
     self.format = format
+    self.verbosity = verbosity
   }
 }