llamastack · nathan-weinberg · Feb 4, 2026 · Feb 4, 2026 · skamenan7 · Feb 4, 2026
@@ -7580,6 +7580,10 @@ components:
               type: string
             type: object
           - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
         store:
           type: boolean
           title: Store
@@ -8035,6 +8039,10 @@ components:
               type: string
             type: object
           - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
         store:
           type: boolean
           title: Store
@@ -12518,6 +12526,13 @@ components:
             type: object
           - type: 'null'
           description: Dictionary of metadata key-value pairs to attach to the response.
+        presence_penalty:
+          anyOf:
+          - type: number
+            maximum: 2.0
+            minimum: -2.0
+          - type: 'null'
+          description: Penalizes new tokens based on whether they appear in the text so far.
       additionalProperties: false
       type: object
       required:

@@ -21,9 +21,9 @@ This documentation is auto-generated from the OpenAI API specification compariso
 | **Overall Conformance Score** | 82.9% |
 | **Endpoints Implemented** | 28/114 |
 | **Total Properties Checked** | 2598 |
-| **Schema/Type Issues** | 263 |
-| **Missing Properties** | 182 |
-| **Total Issues to Fix** | 445 |
+| **Schema/Type Issues** | 264 |
+| **Missing Properties** | 180 |
+| **Total Issues to Fix** | 444 |
 
 ## Category Scores
 
@@ -38,7 +38,7 @@ Categories are sorted by conformance score (lowest first, needing most attention
 | Files | 54.8% | 42 | 11 | 8 |
 | Vector stores | 65.2% | 310 | 94 | 14 |
 | Embeddings | 71.4% | 14 | 4 | 0 |
-| Responses | 81.8% | 225 | 22 | 19 |
+| Responses | 82.2% | 225 | 23 | 17 |
 | Chat | 83.1% | 402 | 18 | 50 |
 | Conversations | 98.0% | 1323 | 22 | 4 |
 
@@ -936,18 +936,17 @@ Below is a detailed breakdown of conformance issues and missing properties for e
 
 ### Responses
 
-**Score:** 81.8% · **Issues:** 22 · **Missing:** 19
+**Score:** 82.2% · **Issues:** 23 · **Missing:** 17
 
 #### `/responses`
 
 **POST**
 
 <details>
-<summary>Missing Properties (19)</summary>
+<summary>Missing Properties (17)</summary>
 
 - `requestBody.content.application/json.properties.background`
 - `requestBody.content.application/json.properties.frequency_penalty`
-- `requestBody.content.application/json.properties.presence_penalty`
 - `requestBody.content.application/json.properties.prompt_cache_key`
 - `requestBody.content.application/json.properties.safety_identifier`
 - `requestBody.content.application/json.properties.service_tier`
@@ -959,7 +958,6 @@ Below is a detailed breakdown of conformance issues and missing properties for e
 - `responses.200.content.application/json.properties.background`
 - `responses.200.content.application/json.properties.frequency_penalty`
 - `responses.200.content.application/json.properties.incomplete_details`
-- `responses.200.content.application/json.properties.presence_penalty`
 - `responses.200.content.application/json.properties.prompt_cache_key`
 - `responses.200.content.application/json.properties.safety_identifier`
 - `responses.200.content.application/json.properties.service_tier`
@@ -968,7 +966,7 @@ Below is a detailed breakdown of conformance issues and missing properties for e
 </details>
 
 <details>
-<summary>Schema Issues (22)</summary>
+<summary>Schema Issues (23)</summary>
 
 | Property | Issues |
 |----------|--------|
@@ -986,6 +984,7 @@ Below is a detailed breakdown of conformance issues and missing properties for e
 | `responses.200.content.application/json.properties.object` | Enum removed: ['response'] |
 | `responses.200.content.application/json.properties.output.items` | Union variants added: 7; Union variants removed: 4 |
 | `responses.200.content.application/json.properties.parallel_tool_calls` | Type removed: ['boolean']; Nullable added (OpenAI non-nullable); Union variants added: 2; Default changed: None -> True |
+| `responses.200.content.application/json.properties.presence_penalty` | Type removed: ['number']; Nullable added (OpenAI non-nullable); Union variants added: 2 |
 | `responses.200.content.application/json.properties.reasoning` | Union variants added: 1; Union variants removed: 1 |
 | `responses.200.content.application/json.properties.temperature` | Type removed: ['number']; Nullable added (OpenAI non-nullable); Union variants added: 2 |
 | `responses.200.content.application/json.properties.text` | Type added: ['object'] |

@@ -4161,6 +4161,10 @@ components:
               type: string
             type: object
           - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
         store:
           type: boolean
           title: Store
@@ -4616,6 +4620,10 @@ components:
               type: string
             type: object
           - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
         store:
           type: boolean
           title: Store
@@ -9101,6 +9109,13 @@ components:
             type: object
           - type: 'null'
           description: Dictionary of metadata key-value pairs to attach to the response.
+        presence_penalty:
+          anyOf:
+          - type: number
+            maximum: 2.0
+            minimum: -2.0
+          - type: 'null'
+          description: Penalizes new tokens based on whether they appear in the text so far.
       additionalProperties: false
       type: object
       required:

@@ -4239,6 +4239,10 @@ components:
               type: string
             type: object
           - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
         store:
           type: boolean
           title: Store
@@ -4683,6 +4687,10 @@ components:
               type: string
             type: object
           - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
         store:
           type: boolean
           title: Store

@@ -6032,6 +6032,10 @@ components:
               type: string
             type: object
           - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
         store:
           type: boolean
           title: Store
@@ -6487,6 +6491,10 @@ components:
               type: string
             type: object
           - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
         store:
           type: boolean
           title: Store
@@ -10930,6 +10938,13 @@ components:
             type: object
           - type: 'null'
           description: Dictionary of metadata key-value pairs to attach to the response.
+        presence_penalty:
+          anyOf:
+          - type: number
+            maximum: 2.0
+            minimum: -2.0
+          - type: 'null'
+          description: Penalizes new tokens based on whether they appear in the text so far.
       additionalProperties: false
       type: object
       required:

@@ -97,9 +97,9 @@
     },
     "conformance": {
       "score": 82.9,
-      "issues": 263,
-      "missing_properties": 182,
-      "total_problems": 445,
+      "issues": 264,
+      "missing_properties": 180,
+      "total_problems": 444,
       "total_properties": 2598
     }
   },
@@ -1695,9 +1695,9 @@
       ]
     },
     "Responses": {
-      "score": 81.8,
-      "issues": 22,
-      "missing_properties": 19,
+      "score": 82.2,
+      "issues": 23,
+      "missing_properties": 17,
       "total_properties": 225,
       "endpoints": [
         {
@@ -1708,7 +1708,6 @@
               "missing_properties": [
                 "POST.requestBody.content.application/json.properties.background",
                 "POST.requestBody.content.application/json.properties.frequency_penalty",
-                "POST.requestBody.content.application/json.properties.presence_penalty",
                 "POST.requestBody.content.application/json.properties.prompt_cache_key",
                 "POST.requestBody.content.application/json.properties.safety_identifier",
                 "POST.requestBody.content.application/json.properties.service_tier",
@@ -1720,7 +1719,6 @@
                 "POST.responses.200.content.application/json.properties.background",
                 "POST.responses.200.content.application/json.properties.frequency_penalty",
                 "POST.responses.200.content.application/json.properties.incomplete_details",
-                "POST.responses.200.content.application/json.properties.presence_penalty",
                 "POST.responses.200.content.application/json.properties.prompt_cache_key",
                 "POST.responses.200.content.application/json.properties.safety_identifier",
                 "POST.responses.200.content.application/json.properties.service_tier",
@@ -1830,6 +1828,14 @@
                     "Default changed: None -> True"
                   ]
                 },
+                {
+                  "property": "POST.responses.200.content.application/json.properties.presence_penalty",
+                  "details": [
+                    "Type removed: ['number']",
+                    "Nullable added (OpenAI non-nullable)",
+                    "Union variants added: 2"
+                  ]
+                },
                 {
                   "property": "POST.responses.200.content.application/json.properties.reasoning",
                   "details": [
@@ -1887,8 +1893,8 @@
                   ]
                 }
               ],
-              "missing_count": 19,
-              "issues_count": 22
+              "missing_count": 17,
+              "issues_count": 23
             }
           ]
         }

@@ -7580,6 +7580,10 @@ components:
               type: string
             type: object
           - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
         store:
           type: boolean
           title: Store
@@ -8035,6 +8039,10 @@ components:
               type: string
             type: object
           - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
         store:
           type: boolean
           title: Store
@@ -12518,6 +12526,13 @@ components:
             type: object
           - type: 'null'
           description: Dictionary of metadata key-value pairs to attach to the response.
+        presence_penalty:
+          anyOf:
+          - type: number
+            maximum: 2.0
+            minimum: -2.0
+          - type: 'null'
+          description: Penalizes new tokens based on whether they appear in the text so far.
       additionalProperties: false
       type: object
       required:

@@ -128,6 +128,7 @@ async def create_openai_response(
             request.max_output_tokens,
             request.reasoning,
             request.metadata,
+            request.presence_penalty,
         )
         return result
 

@@ -472,6 +472,7 @@ async def create_openai_response(
         reasoning: OpenAIResponseReasoning | None = None,
         max_output_tokens: int | None = None,
         metadata: dict[str, str] | None = None,
+        presence_penalty: float | None = None,
     ):
         stream = bool(stream)
         text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
@@ -528,6 +529,7 @@ async def create_openai_response(
             max_output_tokens=max_output_tokens,
             metadata=metadata,
             include=include,
+            presence_penalty=presence_penalty,
         )
 
         if stream:
@@ -585,6 +587,7 @@ async def _create_streaming_response(
         max_output_tokens: int | None = None,
         metadata: dict[str, str] | None = None,
         include: list[ResponseItemInclude] | None = None,
+        presence_penalty: float | None = None,
     ) -> AsyncIterator[OpenAIResponseObjectStream]:
         # These should never be None when called from create_openai_response (which sets defaults)
         # but we assert here to help mypy understand the types
@@ -651,6 +654,7 @@ async def _create_streaming_response(
                 metadata=metadata,
                 include=include,
                 store=store,
+                presence_penalty=presence_penalty,
             )
 
             final_response = None

@@ -146,6 +146,7 @@ def __init__(
         metadata: dict[str, str] | None = None,
         include: list[ResponseItemInclude] | None = None,
         store: bool | None = True,
+        presence_penalty: float | None = None,
     ):
         self.inference_api = inference_api
         self.ctx = ctx
@@ -171,6 +172,7 @@ def __init__(
         self.store = store
         self.include = include
         self.store = bool(store) if store is not None else True
+        self.presence_penalty = presence_penalty
         self.sequence_number = 0
         # Store MCP tool mapping that gets built during tool processing
         self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
@@ -209,6 +211,7 @@ async def _create_refusal_response(self, violation_message: str) -> OpenAIRespon
             output=[OpenAIResponseMessage(role="assistant", content=[refusal_content], type="message")],
             max_output_tokens=self.max_output_tokens,
             metadata=self.metadata,
+            presence_penalty=self.presence_penalty,
             store=self.store,
         )
 
@@ -251,6 +254,7 @@ def _snapshot_response(
             reasoning=self.reasoning,
             max_output_tokens=self.max_output_tokens,
             metadata=self.metadata,
+            presence_penalty=self.presence_penalty,
             store=self.store,
         )
 
@@ -371,6 +375,7 @@ async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
                     parallel_tool_calls=effective_parallel_tool_calls,
                     reasoning_effort=self.reasoning.effort if self.reasoning else None,
                     max_completion_tokens=remaining_output_tokens,
+                    presence_penalty=self.presence_penalty,
                 )
                 completion_result = await self.inference_api.openai_chat_completion(params)
 

@@ -129,6 +129,12 @@ class CreateResponseRequest(BaseModel):
         default=None,
         description="Dictionary of metadata key-value pairs to attach to the response.",
     )
+    presence_penalty: float | None = Field(
+        default=None,
+        ge=-2.0,
+        le=2.0,
+        description="Penalizes new tokens based on whether they appear in the text so far.",
+    )
 
 
 class RetrieveResponseRequest(BaseModel):