diff --git a/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.json b/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.json index 9cbac6a92ae9..26517d96b112 100644 --- a/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.json +++ b/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.json @@ -3681,9 +3681,6 @@ "tool_choice": { "$ref": "#/components/schemas/chatCompletionToolChoiceOption" }, - "stream_options": { - "$ref": "#/components/schemas/chatCompletionStreamOptions" - }, "functions": { "description": "Deprecated in favor of `tools`. A list of functions the model may generate JSON inputs for.", "type": "array", @@ -5571,18 +5568,6 @@ ], "description": "The role of the author of the response message." }, - "chatCompletionStreamOptions": { - "type": "object", - "nullable": true, - "default": null, - "properties": { - "include_usage": { - "type": "boolean", - "description": "If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value." - } - }, - "description": "Options for streaming response. Only set this when you set `stream: true`." - }, "chatCompletionToolChoiceOption": { "description": "Controls which (if any) tool is called by the model. `none` means the model will not call any tool and instead generates a message. `auto` means the model can pick between generating a message or calling one or more tools. `required` means the model must call one or more tools. Specifying a particular tool via `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}` forces the model to call that tool. `none` is the default when no tools are present. `auto` is the default if tools are present.", "oneOf": [ diff --git a/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.yaml b/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.yaml index 8cb280070b84..2227ac3b5360 100644 --- a/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.yaml +++ b/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-07-01-preview/inference.yaml @@ -2304,8 +2304,6 @@ components: type: boolean nullable: true default: false - stream_options: - $ref: '#/components/schemas/chatCompletionStreamOptions' stop: description: Up to 4 sequences where the API will stop generating further tokens. oneOf: @@ -3674,17 +3672,6 @@ components: enum: - assistant description: The role of the author of the response message. - chatCompletionStreamOptions: - description: | - Options for streaming response. Only set this when you set `stream: true`. - type: object - nullable: true - default: null - properties: - include_usage: - type: boolean - description: | - If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value. chatCompletionToolChoiceOption: description: | Controls which (if any) tool is called by the model.