llamastack · cdoern · Feb 3, 2026 · Feb 4, 2026 · Feb 4, 2026
@@ -7404,6 +7404,9 @@ components:
       description: Web search tool configuration for OpenAI response inputs.
     OpenAIResponseObjectWithInput:
       properties:
+        background:
+          type: boolean
+          title: Background
         created_at:
           type: integer
           title: Created At
@@ -7859,6 +7862,9 @@ components:
       description: Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
     OpenAIResponseObject:
       properties:
+        background:
+          type: boolean
+          title: Background
         created_at:
           type: integer
           title: Created At
@@ -12354,6 +12360,10 @@ components:
           type: string
           title: Model
           description: The underlying LLM used for completions.
+        background:
+          type: boolean
+          title: Background
+          description: Whether to run the model response in the background. When true, returns immediately with status 'queued'.
         prompt:
           anyOf:
           - $ref: '#/components/schemas/OpenAIResponsePrompt'

@@ -22,8 +22,8 @@ This documentation is auto-generated from the OpenAI API specification compariso
 | **Endpoints Implemented** | 28/114 |
 | **Total Properties Checked** | 2598 |
 | **Schema/Type Issues** | 263 |
-| **Missing Properties** | 182 |
-| **Total Issues to Fix** | 445 |
+| **Missing Properties** | 180 |
+| **Total Issues to Fix** | 443 |
 
 ## Category Scores
 
@@ -38,7 +38,7 @@ Categories are sorted by conformance score (lowest first, needing most attention
 | Files | 54.8% | 42 | 11 | 8 |
 | Vector stores | 65.2% | 310 | 94 | 14 |
 | Embeddings | 71.4% | 14 | 4 | 0 |
-| Responses | 81.8% | 225 | 22 | 19 |
+| Responses | 82.7% | 225 | 22 | 17 |
 | Chat | 83.1% | 402 | 18 | 50 |
 | Conversations | 98.0% | 1323 | 22 | 4 |
 
@@ -936,16 +936,15 @@ Below is a detailed breakdown of conformance issues and missing properties for e
 
 ### Responses
 
-**Score:** 81.8% · **Issues:** 22 · **Missing:** 19
+**Score:** 82.7% · **Issues:** 22 · **Missing:** 17
 
 #### `/responses`
 
 **POST**
 
 <details>
-<summary>Missing Properties (19)</summary>
+<summary>Missing Properties (17)</summary>
 
-- `requestBody.content.application/json.properties.background`
 - `requestBody.content.application/json.properties.frequency_penalty`
 - `requestBody.content.application/json.properties.presence_penalty`
 - `requestBody.content.application/json.properties.prompt_cache_key`
@@ -956,7 +955,6 @@ Below is a detailed breakdown of conformance issues and missing properties for e
 - `requestBody.content.application/json.properties.top_p`
 - `requestBody.content.application/json.properties.truncation`
 - `requestBody.content.application/x-www-form-urlencoded`
-- `responses.200.content.application/json.properties.background`
 - `responses.200.content.application/json.properties.frequency_penalty`
 - `responses.200.content.application/json.properties.incomplete_details`
 - `responses.200.content.application/json.properties.presence_penalty`

@@ -130,11 +130,17 @@ The return object from a call to Responses includes a field for indicating why a
 
 ### Background
 
-**Status:** Not Implemented
+**Status:** ✅ Resolved
+
+**Issue:** [#3568](https://github.com/llamastack/llama-stack/issues/3568), [#4701](https://github.com/llamastack/llama-stack/issues/4701)
 
-**Issue:** [#3568](https://github.com/llamastack/llama-stack/issues/3568)
+[Background mode](https://platform.openai.com/docs/guides/background) in OpenAI Responses lets you start a response generation job and then check back in on it later. This is useful if you might lose a connection during a generation and want to reconnect later and get the response back (for example if the client is running in a mobile app).
 
-[Background mode](https://platform.openai.com/docs/guides/background) in OpenAI Responses lets you start a response generation job and then check back in on it later.  This is useful if you might lose a connection during a generation and want to reconnect later and get the response back (for example if the client is running in a mobile app).  It is not implemented in Llama Stack.
+Background mode is now implemented in Llama Stack:
+- Set `background=true` in the request to queue a response for background processing
+- The API returns immediately with status `queued`
+- Poll `GET /v1/responses/{response_id}` to check status (`in_progress`, `completed`, `failed`, `cancelled`)
+- Use `POST /v1/responses/{response_id}/cancel` to cancel a background response
 
 ---
 

@@ -3985,6 +3985,9 @@ components:
       description: Web search tool configuration for OpenAI response inputs.
     OpenAIResponseObjectWithInput:
       properties:
+        background:
+          type: boolean
+          title: Background
         created_at:
           type: integer
           title: Created At
@@ -4440,6 +4443,9 @@ components:
       description: Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
     OpenAIResponseObject:
       properties:
+        background:
+          type: boolean
+          title: Background
         created_at:
           type: integer
           title: Created At
@@ -8937,6 +8943,10 @@ components:
           type: string
           title: Model
           description: The underlying LLM used for completions.
+        background:
+          type: boolean
+          title: Background
+          description: Whether to run the model response in the background. When true, returns immediately with status 'queued'.
         prompt:
           anyOf:
           - $ref: '#/components/schemas/OpenAIResponsePrompt'

@@ -4063,6 +4063,9 @@ components:
       description: Web search tool configuration for OpenAI response inputs.
     OpenAIResponseObjectWithInput:
       properties:
+        background:
+          type: boolean
+          title: Background
         created_at:
           type: integer
           title: Created At
@@ -4507,6 +4510,9 @@ components:
       description: Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
     OpenAIResponseObject:
       properties:
+        background:
+          type: boolean
+          title: Background
         created_at:
           type: integer
           title: Created At

@@ -5856,6 +5856,9 @@ components:
       description: Web search tool configuration for OpenAI response inputs.
     OpenAIResponseObjectWithInput:
       properties:
+        background:
+          type: boolean
+          title: Background
         created_at:
           type: integer
           title: Created At
@@ -6311,6 +6314,9 @@ components:
       description: Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
     OpenAIResponseObject:
       properties:
+        background:
+          type: boolean
+          title: Background
         created_at:
           type: integer
           title: Created At
@@ -10766,6 +10772,10 @@ components:
           type: string
           title: Model
           description: The underlying LLM used for completions.
+        background:
+          type: boolean
+          title: Background
+          description: Whether to run the model response in the background. When true, returns immediately with status 'queued'.
         prompt:
           anyOf:
           - $ref: '#/components/schemas/OpenAIResponsePrompt'

@@ -98,8 +98,8 @@
     "conformance": {
       "score": 82.9,
       "issues": 263,
-      "missing_properties": 182,
-      "total_problems": 445,
+      "missing_properties": 180,
+      "total_problems": 443,
       "total_properties": 2598
     }
   },
@@ -1695,9 +1695,9 @@
       ]
     },
     "Responses": {
-      "score": 81.8,
+      "score": 82.7,
       "issues": 22,
-      "missing_properties": 19,
+      "missing_properties": 17,
       "total_properties": 225,
       "endpoints": [
         {
@@ -1706,7 +1706,6 @@
             {
               "method": "POST",
               "missing_properties": [
-                "POST.requestBody.content.application/json.properties.background",
                 "POST.requestBody.content.application/json.properties.frequency_penalty",
                 "POST.requestBody.content.application/json.properties.presence_penalty",
                 "POST.requestBody.content.application/json.properties.prompt_cache_key",
@@ -1717,7 +1716,6 @@
                 "POST.requestBody.content.application/json.properties.top_p",
                 "POST.requestBody.content.application/json.properties.truncation",
                 "POST.requestBody.content.application/x-www-form-urlencoded",
-                "POST.responses.200.content.application/json.properties.background",
                 "POST.responses.200.content.application/json.properties.frequency_penalty",
                 "POST.responses.200.content.application/json.properties.incomplete_details",
                 "POST.responses.200.content.application/json.properties.presence_penalty",
@@ -1887,7 +1885,7 @@
                   ]
                 }
               ],
-              "missing_count": 19,
+              "missing_count": 17,
               "issues_count": 22
             }
           ]

@@ -7404,6 +7404,9 @@ components:
       description: Web search tool configuration for OpenAI response inputs.
     OpenAIResponseObjectWithInput:
       properties:
+        background:
+          type: boolean
+          title: Background
         created_at:
           type: integer
           title: Created At
@@ -7859,6 +7862,9 @@ components:
       description: Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
     OpenAIResponseObject:
       properties:
+        background:
+          type: boolean
+          title: Background
         created_at:
           type: integer
           title: Created At
@@ -12354,6 +12360,10 @@ components:
           type: string
           title: Model
           description: The underlying LLM used for completions.
+        background:
+          type: boolean
+          title: Background
+          description: Whether to run the model response in the background. When true, returns immediately with status 'queued'.
         prompt:
           anyOf:
           - $ref: '#/components/schemas/OpenAIResponsePrompt'

@@ -110,6 +110,7 @@ async def create_openai_response(
         result = await self.openai_responses_impl.create_openai_response(
             request.input,
             request.model,
+            request.background,
             request.prompt,
             request.instructions,
             request.previous_response_id,