Support sending image data as part of a user message, using a new Ima…

…geUrl.load() method. Add sample and test. (#36042)
Azure · Jun 14, 2024 · c19f701 · c19f701
1 parent 147746b
commit c19f701
Show file tree

Hide file tree

Showing 9 changed files with 167 additions and 16 deletions.
diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md
@@ -210,8 +210,13 @@ print(response.choices[0].message.content)
 
 <!-- END SNIPPET -->
 
-The following types or messages are supported: `SystemMessage`,`UserMessage`, `AssistantMessage`, `ToolMessage`. See sample [sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) for usage of `ToolMessage`. See [sample_chat_completions_with_images.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_images.py) for usage of `UserMessage` that
-includes uploading an image.
+The following types or messages are supported: `SystemMessage`,`UserMessage`, `AssistantMessage`, `ToolMessage`. See also samples:
+
+* [sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) for usage of `ToolMessage`. 
+* [sample_chat_completions_with_image_url.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py) for usage of `UserMessage` that
+includes sending an image URL.
+* [sample_chat_completions_with_image_data.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_data.py) for usage of `UserMessage` that
+includes sending image data read from a local file.
 
 Alternatively, you can provide the messages as dictionary instead of using the strongly typed classes like `SystemMessage` and `UserMessage`:
 

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
@@ -16,6 +16,7 @@
    https://github.com/Azure/autorest.python/issues/2619 (all clients).
    Otherwise intellisense did not show the patched public methods on the client object,
    when the client is defined using context manager ("with" statement).
+6. Add support for load() method in ImageUrl class (see /models/_patch.py).
 
 """
 import json

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py b/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py
@@ -18,7 +18,7 @@
 from ._models import ChatCompletionsToolDefinition
 from ._models import ContentItem
 from ._models import ImageContentItem
-from ._models import ImageUrl
+from ._patch import ImageUrl
 from ._models import TextContentItem
 from ._models import ChatRequestMessage
 from ._models import ChatResponseMessage

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_patch.py
@@ -7,18 +7,58 @@
 Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
 """
 import asyncio
+import base64
 import json
 import logging
 import queue
 import re
+import sys
 
-from typing import List, AsyncIterator, Iterator
+from typing import List, AsyncIterator, Iterator, Optional, Union
 from azure.core.rest import HttpResponse, AsyncHttpResponse
+from ._models import ImageUrl as ImageUrlGenerated
 from .. import models as _models
 
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
 logger = logging.getLogger(__name__)
 
 
+class ImageUrl(ImageUrlGenerated):
+
+    @classmethod
+    def load(
+        cls,
+        *,
+        image_file: str,
+        image_format: str,
+        detail: Optional[Union[str, "_models.ImageDetailLevel"]] = None
+    ) -> Self:
+        """
+        Create an ImageUrl object from a local image file. The method reads the image
+        file and encodes it as a base64 string, which together with the image format
+        is then used to format the JSON `url` value passed in the request payload.
+
+        :ivar image_file: The name of the local image file to load. Required.
+        :vartype image_file: str
+        :ivar image_format: The MIME type format of the image. For example: "jpeg", "png". Required.
+        :vartype image_format: str
+        :ivar detail: The evaluation quality setting to use, which controls relative prioritization of
+         speed, token consumption, and accuracy. Known values are: "auto", "low", and "high".
+        :vartype detail: str or ~azure.ai.inference.models.ImageDetailLevel
+        :return: An ImageUrl object with the image data encoded as a base64 string.
+        :rtype: ~azure.ai.inference.models.ImageUrl
+        :raises FileNotFoundError when the image file could not be opened.
+        """
+        with open(image_file, "rb") as f:
+            image_data = base64.b64encode(f.read()).decode("utf-8")
+        url = f"data:image/{image_format};base64,{image_data}"
+        return cls(url=url, detail=detail)
+
+
 class BaseStreamingChatCompletions:
     """A base class for the sync and async streaming chat completions responses, holding any common code
     to deserializes the Server Sent Events (SSE) response stream into chat completions updates, each one
@@ -106,7 +146,7 @@ def __init__(self, response: HttpResponse):
     def __iter__(self):
         return self
 
-    def __next__(self) -> _models.StreamingChatCompletionsUpdate:
+    def __next__(self) -> "_models.StreamingChatCompletionsUpdate":
         while self._queue.empty() and not self._done:
             self._done = self._read_next_block()
         if self._queue.empty():
@@ -145,7 +185,7 @@ def __init__(self, response: AsyncHttpResponse):
     def __aiter__(self):
         return self
 
-    async def __anext__(self) -> _models.StreamingChatCompletionsUpdate:
+    async def __anext__(self) -> "_models.StreamingChatCompletionsUpdate":
         while self._queue.empty() and not self._done:
             self._done = await self._read_next_block_async()
         if self._queue.empty():
@@ -170,6 +210,7 @@ async def aclose(self) -> None:
 
 
 __all__: List[str] = [
+    "ImageUrl",
     "StreamingChatCompletions",
     "AsyncStreamingChatCompletions",
 ]  # Add all objects you want publicly available to users at this package level

diff --git a/sdk/ai/azure-ai-inference/samples/README.md b/sdk/ai/azure-ai-inference/samples/README.md
@@ -92,11 +92,12 @@ similarly for the other samples.
 |[sample_chat_completions_streaming.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming.py) | One chat completion operation using a synchronous client and streaming response. |
 |[sample_chat_completions_streaming_with_entra_id_auth.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_entra_id_auth.py) | One chat completion operation using a synchronous client and streaming response, using Entra ID authentication. This sample also shows setting the `azureml-model-deployment` HTTP request header, which may be required for some Managed Compute endpoint. |
 |[sample_chat_completions.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions.py) | One chat completion operation using a synchronous client. |
-|[sample_chat_completions_with_images.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_images.py) | One chat completion operation using a synchronous client, which includes sending an input image. |
+|[sample_chat_completions_with_image_url.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py) | One chat completion operation using a synchronous client, which includes sending an input image URL. |
+|[sample_chat_completions_with_image_data.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_data.py) | One chat completion operation using a synchronous client, which includes sending input image data read from a local file. |
 |[sample_chat_completions_with_history.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_history.py) | Two chat completion operations using a synchronous client, with the second completion using chat history from the first. |
 |[sample_chat_completions_from_input_bytes.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_bytes.py) | One chat completion operation using a synchronous client, with input messages provided as `IO[bytes]`. |
 |[sample_chat_completions_from_input_json.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json.py) | One chat completion operation using a synchronous client, with input messages provided as a dictionary (type `MutableMapping[str, Any]`) |
-|[sample_chat_completions_from_input_json_with_images.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json_with_images.py) | One chat completion operation using a synchronous client, with input messages provided as a dictionary (type `MutableMapping[str, Any]`). Includes sending an input image. |
+|[sample_chat_completions_from_input_json_with_image_url.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json_with_image_url.py) | One chat completion operation using a synchronous client, with input messages provided as a dictionary (type `MutableMapping[str, Any]`). Includes sending an input image URL. |
 |[sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) | Shows how do use a tool (function) in chat completions, for an AI model that supports tools |
 |[sample_load_client.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_load_client.py) | Shows how do use the function `load_client` to create the appropriate synchronous client based on the provided endpoint URL. In this example, it creates a synchronous `ChatCompletionsClient`. |
 |[sample_get_model_info.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_get_model_info.py) | Get AI model information using the chat completions client. Similarly can be done with all other clients. |

diff --git a/...ompletions_from_input_json_with_images.py → ...letions_from_input_json_with_image_url.py b/...ompletions_from_input_json_with_images.py → ...letions_from_input_json_with_image_url.py
@@ -7,11 +7,11 @@
     This sample demonstrates how to get a chat completions response from
     the service using a synchronous client, and directly providing the
     JSON request body (containing input chat messages). The sample
-    shows how to include an image in the input chat messages.
+    shows how to include an image URL in the input chat messages.
     This sample will only work on AI models that support image input.
 
 USAGE:
-    python sample_chat_completions_from_input_json_with_image.py
+    python sample_chat_completions_from_input_json_with_image_url.py
 
     Set these two or three environment variables before running the sample:
     1) CHAT_COMPLETIONS_ENDPOINT - Your endpoint URL, in the form 
@@ -26,7 +26,7 @@
 # pyright: reportAttributeAccessIssue=false
 
 
-def sample_chat_completions_from_input_json_with_image():
+def sample_chat_completions_from_input_json_with_image_url():
     import os
     from azure.ai.inference import ChatCompletionsClient
     from azure.core.credentials import AzureKeyCredential
@@ -83,4 +83,4 @@ def sample_chat_completions_from_input_json_with_image():
 
 
 if __name__ == "__main__":
-    sample_chat_completions_from_input_json_with_image()
+    sample_chat_completions_from_input_json_with_image_url()
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_data.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_data.py
@@ -0,0 +1,90 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""
+DESCRIPTION:
+    This sample demonstrates how to get a chat completions response from
+    the service using a synchronous client. The sample shows how to load
+    an image from a file and include it in the input chat messages.
+    This sample will only work on AI models that support image input.
+
+USAGE:
+    python sample_chat_completions_with_image_data.py
+
+    Set these two or three environment variables before running the sample:
+    1) CHAT_COMPLETIONS_ENDPOINT - Your endpoint URL, in the form 
+        https://<your-deployment-name>.<your-azure-region>.inference.ai.azure.com
+        where `your-deployment-name` is your unique AI Model deployment name, and
+        `your-azure-region` is the Azure region where your model is deployed.
+    2) CHAT_COMPLETIONS_KEY - Your model key (a 32-character string). Keep it secret.
+    3) CHAT_COMPLETIONS_DEPLOYMENT_NAME - Optional. The value for the HTTP
+        request header `azureml-model-deployment`.
+"""
+
+
+def sample_chat_completions_with_image_data():
+    import os
+    from azure.ai.inference import ChatCompletionsClient
+    from azure.ai.inference.models import (
+        SystemMessage, UserMessage, TextContentItem,
+        ImageContentItem, ImageUrl, ImageDetailLevel
+    )
+    from azure.core.credentials import AzureKeyCredential
+
+    try:
+        endpoint = os.environ["CHAT_COMPLETIONS_ENDPOINT"]
+        key = os.environ["CHAT_COMPLETIONS_KEY"]
+    except KeyError:
+        print("Missing environment variable 'CHAT_COMPLETIONS_ENDPOINT' or 'CHAT_COMPLETIONS_KEY'")
+        print("Set them before running this sample.")
+        exit()
+
+    try:
+        model_deployment = os.environ["CHAT_COMPLETIONS_DEPLOYMENT_NAME"]
+    except KeyError:
+        print("Could not read optional environment variable `CHAT_COMPLETIONS_DEPLOYMENT_NAME`.")
+        print("HTTP request header `azureml-model-deployment` will not be set.")
+        model_deployment = None
+
+    client = ChatCompletionsClient(
+        endpoint=endpoint,
+        credential=AzureKeyCredential(key),
+        headers={"azureml-model-deployment": model_deployment},
+    )
+
+    response = client.complete(
+        messages=[
+            SystemMessage(content="You are an AI assistant that describes images in details."),
+            UserMessage(
+                content=[
+                    TextContentItem(text="What's in this image?"),
+                    ImageContentItem(
+                        image_url=ImageUrl.load(
+                            image_file="sample1.png",
+                            image_format="png",
+                            detail=ImageDetailLevel.HIGH,
+                        ),
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    print(response.choices[0].message.content)
+
+
+def get_image_data_url(image_file: str, image_format: str) -> str:
+    import base64
+    try:
+        with open(image_file, "rb") as f:
+            image_data = base64.b64encode(f.read()).decode("utf-8")
+    except FileNotFoundError:
+        print(f"Could not read '{image_file}'.")
+        print("Set the correct path to the image file before running this sample.")
+        exit()
+    return f"data:image/{image_format};base64,{image_data}"
+
+
+if __name__ == "__main__":
+    sample_chat_completions_with_image_data()
diff --git a/...es/sample_chat_completions_with_images.py → ...sample_chat_completions_with_image_url.py b/...es/sample_chat_completions_with_images.py → ...sample_chat_completions_with_image_url.py
@@ -6,11 +6,11 @@
 DESCRIPTION:
     This sample demonstrates how to get a chat completions response from
     the service using a synchronous client. The sample
-    shows how to include an image in the input chat messages.
+    shows how to include an image URL in the input chat messages.
     This sample will only work on AI models that support image input.
 
 USAGE:
-    python sample_chat_completions_with_images.py
+    python sample_chat_completions_with_image_url.py
 
     Set these two or three environment variables before running the sample:
     1) CHAT_COMPLETIONS_ENDPOINT - Your endpoint URL, in the form 
@@ -23,7 +23,7 @@
 """
 
 
-def sample_chat_completions_with_images():
+def sample_chat_completions_with_image_url():
     import os
     from azure.ai.inference import ChatCompletionsClient
     from azure.ai.inference.models import (
@@ -74,4 +74,4 @@ def sample_chat_completions_with_images():
 
 
 if __name__ == "__main__":
-    sample_chat_completions_with_images()
+    sample_chat_completions_with_image_url()
diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
@@ -3,6 +3,7 @@
 # Licensed under the MIT License.
 # ------------------------------------
 import inspect
+import os
 import azure.ai.inference as sdk
 
 from model_inference_test_base import ModelClientTestBase, ServicePreparerChatCompletions, ServicePreparerEmbeddings
@@ -65,6 +66,18 @@ def test_embeddings(self, **kwargs):
         self._validate_embeddings_result(response)
         client.close()
 
+    def test_image_url_load(self, **kwargs):
+        local_folder = os.path.dirname(os.path.abspath(__file__))
+        image_file = os.path.join(local_folder, "../samples/sample1.png")
+        image_url = sdk.models._patch.ImageUrl.load(
+            image_file=image_file,
+            image_format="png",
+            detail=sdk.models.ImageDetailLevel.AUTO,
+        )
+        assert image_url
+        assert image_url.url.startswith("data:image/png;base64,iVBORw")
+        assert image_url.detail == sdk.models.ImageDetailLevel.AUTO
+
     # **********************************************************************************
     #
     #                      HAPPY PATH TESTS - CHAT COMPLETIONS