diff --git a/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.json b/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.json
index 9cbac6a92ae9..26517d96b112 100644
--- a/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.json
+++ b/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.json
@@ -3681,9 +3681,6 @@
               "tool_choice": {
                 "$ref": "#/components/schemas/chatCompletionToolChoiceOption"
               },
-              "stream_options": {
-                "$ref": "#/components/schemas/chatCompletionStreamOptions"
-              },
               "functions": {
                 "description": "Deprecated in favor of `tools`. A list of functions the model may generate JSON inputs for.",
                 "type": "array",
@@ -5571,18 +5568,6 @@
         ],
         "description": "The role of the author of the response message."
       },
-      "chatCompletionStreamOptions": {
-        "type": "object",
-        "nullable": true,
-        "default": null,
-        "properties": {
-          "include_usage": {
-            "type": "boolean",
-            "description": "If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value."
-          }
-        },
-        "description": "Options for streaming response. Only set this when you set `stream: true`."
-      },
       "chatCompletionToolChoiceOption": {
         "description": "Controls which (if any) tool is called by the model. `none` means the model will not call any tool and instead generates a message. `auto` means the model can pick between generating a message or calling one or more tools. `required` means the model must call one or more tools. Specifying a particular tool via `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}` forces the model to call that tool. `none` is the default when no tools are present. `auto` is the default if tools are present.",
         "oneOf": [
diff --git a/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.yaml b/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.yaml
index 8cb280070b84..2227ac3b5360 100644
--- a/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.yaml
+++ b/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.yaml
@@ -2304,8 +2304,6 @@ components:
           type: boolean
           nullable: true
           default: false
-        stream_options:
-          $ref: '#/components/schemas/chatCompletionStreamOptions'
         stop:
           description: Up to 4 sequences where the API will stop generating further tokens.
           oneOf:
@@ -3674,17 +3672,6 @@ components:
       enum:
         - assistant
       description: The role of the author of the response message.
-    chatCompletionStreamOptions:
-      description: |
-        Options for streaming response. Only set this when you set `stream: true`.
-      type: object
-      nullable: true
-      default: null
-      properties:
-        include_usage:
-          type: boolean
-          description: |
-            If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
     chatCompletionToolChoiceOption:
       description: |
         Controls which (if any) tool is called by the model.