Skip to content

Commit

Permalink
Support sending image data as part of a user message, using a new Ima…
Browse files Browse the repository at this point in the history
…geUrl.load() method. Add sample and test. (#36042)
  • Loading branch information
dargilco authored Jun 14, 2024
1 parent 147746b commit c19f701
Show file tree
Hide file tree
Showing 9 changed files with 167 additions and 16 deletions.
9 changes: 7 additions & 2 deletions sdk/ai/azure-ai-inference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,13 @@ print(response.choices[0].message.content)

<!-- END SNIPPET -->

The following types or messages are supported: `SystemMessage`,`UserMessage`, `AssistantMessage`, `ToolMessage`. See sample [sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) for usage of `ToolMessage`. See [sample_chat_completions_with_images.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_images.py) for usage of `UserMessage` that
includes uploading an image.
The following types or messages are supported: `SystemMessage`,`UserMessage`, `AssistantMessage`, `ToolMessage`. See also samples:

* [sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) for usage of `ToolMessage`.
* [sample_chat_completions_with_image_url.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py) for usage of `UserMessage` that
includes sending an image URL.
* [sample_chat_completions_with_image_data.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_data.py) for usage of `UserMessage` that
includes sending image data read from a local file.

Alternatively, you can provide the messages as dictionary instead of using the strongly typed classes like `SystemMessage` and `UserMessage`:

Expand Down
1 change: 1 addition & 0 deletions sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
https://github.com/Azure/autorest.python/issues/2619 (all clients).
Otherwise intellisense did not show the patched public methods on the client object,
when the client is defined using context manager ("with" statement).
6. Add support for load() method in ImageUrl class (see /models/_patch.py).
"""
import json
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from ._models import ChatCompletionsToolDefinition
from ._models import ContentItem
from ._models import ImageContentItem
from ._models import ImageUrl
from ._patch import ImageUrl
from ._models import TextContentItem
from ._models import ChatRequestMessage
from ._models import ChatResponseMessage
Expand Down
47 changes: 44 additions & 3 deletions sdk/ai/azure-ai-inference/azure/ai/inference/models/_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,58 @@
Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
"""
import asyncio
import base64
import json
import logging
import queue
import re
import sys

from typing import List, AsyncIterator, Iterator
from typing import List, AsyncIterator, Iterator, Optional, Union
from azure.core.rest import HttpResponse, AsyncHttpResponse
from ._models import ImageUrl as ImageUrlGenerated
from .. import models as _models

if sys.version_info >= (3, 11):
from typing import Self
else:
from typing_extensions import Self

logger = logging.getLogger(__name__)


class ImageUrl(ImageUrlGenerated):

@classmethod
def load(
cls,
*,
image_file: str,
image_format: str,
detail: Optional[Union[str, "_models.ImageDetailLevel"]] = None
) -> Self:
"""
Create an ImageUrl object from a local image file. The method reads the image
file and encodes it as a base64 string, which together with the image format
is then used to format the JSON `url` value passed in the request payload.
:ivar image_file: The name of the local image file to load. Required.
:vartype image_file: str
:ivar image_format: The MIME type format of the image. For example: "jpeg", "png". Required.
:vartype image_format: str
:ivar detail: The evaluation quality setting to use, which controls relative prioritization of
speed, token consumption, and accuracy. Known values are: "auto", "low", and "high".
:vartype detail: str or ~azure.ai.inference.models.ImageDetailLevel
:return: An ImageUrl object with the image data encoded as a base64 string.
:rtype: ~azure.ai.inference.models.ImageUrl
:raises FileNotFoundError when the image file could not be opened.
"""
with open(image_file, "rb") as f:
image_data = base64.b64encode(f.read()).decode("utf-8")
url = f"data:image/{image_format};base64,{image_data}"
return cls(url=url, detail=detail)


class BaseStreamingChatCompletions:
"""A base class for the sync and async streaming chat completions responses, holding any common code
to deserializes the Server Sent Events (SSE) response stream into chat completions updates, each one
Expand Down Expand Up @@ -106,7 +146,7 @@ def __init__(self, response: HttpResponse):
def __iter__(self):
return self

def __next__(self) -> _models.StreamingChatCompletionsUpdate:
def __next__(self) -> "_models.StreamingChatCompletionsUpdate":
while self._queue.empty() and not self._done:
self._done = self._read_next_block()
if self._queue.empty():
Expand Down Expand Up @@ -145,7 +185,7 @@ def __init__(self, response: AsyncHttpResponse):
def __aiter__(self):
return self

async def __anext__(self) -> _models.StreamingChatCompletionsUpdate:
async def __anext__(self) -> "_models.StreamingChatCompletionsUpdate":
while self._queue.empty() and not self._done:
self._done = await self._read_next_block_async()
if self._queue.empty():
Expand All @@ -170,6 +210,7 @@ async def aclose(self) -> None:


__all__: List[str] = [
"ImageUrl",
"StreamingChatCompletions",
"AsyncStreamingChatCompletions",
] # Add all objects you want publicly available to users at this package level
Expand Down
5 changes: 3 additions & 2 deletions sdk/ai/azure-ai-inference/samples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,12 @@ similarly for the other samples.
|[sample_chat_completions_streaming.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming.py) | One chat completion operation using a synchronous client and streaming response. |
|[sample_chat_completions_streaming_with_entra_id_auth.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_entra_id_auth.py) | One chat completion operation using a synchronous client and streaming response, using Entra ID authentication. This sample also shows setting the `azureml-model-deployment` HTTP request header, which may be required for some Managed Compute endpoint. |
|[sample_chat_completions.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions.py) | One chat completion operation using a synchronous client. |
|[sample_chat_completions_with_images.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_images.py) | One chat completion operation using a synchronous client, which includes sending an input image. |
|[sample_chat_completions_with_image_url.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py) | One chat completion operation using a synchronous client, which includes sending an input image URL. |
|[sample_chat_completions_with_image_data.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_data.py) | One chat completion operation using a synchronous client, which includes sending input image data read from a local file. |
|[sample_chat_completions_with_history.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_history.py) | Two chat completion operations using a synchronous client, with the second completion using chat history from the first. |
|[sample_chat_completions_from_input_bytes.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_bytes.py) | One chat completion operation using a synchronous client, with input messages provided as `IO[bytes]`. |
|[sample_chat_completions_from_input_json.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json.py) | One chat completion operation using a synchronous client, with input messages provided as a dictionary (type `MutableMapping[str, Any]`) |
|[sample_chat_completions_from_input_json_with_images.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json_with_images.py) | One chat completion operation using a synchronous client, with input messages provided as a dictionary (type `MutableMapping[str, Any]`). Includes sending an input image. |
|[sample_chat_completions_from_input_json_with_image_url.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json_with_image_url.py) | One chat completion operation using a synchronous client, with input messages provided as a dictionary (type `MutableMapping[str, Any]`). Includes sending an input image URL. |
|[sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) | Shows how do use a tool (function) in chat completions, for an AI model that supports tools |
|[sample_load_client.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_load_client.py) | Shows how do use the function `load_client` to create the appropriate synchronous client based on the provided endpoint URL. In this example, it creates a synchronous `ChatCompletionsClient`. |
|[sample_get_model_info.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_get_model_info.py) | Get AI model information using the chat completions client. Similarly can be done with all other clients. |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
This sample demonstrates how to get a chat completions response from
the service using a synchronous client, and directly providing the
JSON request body (containing input chat messages). The sample
shows how to include an image in the input chat messages.
shows how to include an image URL in the input chat messages.
This sample will only work on AI models that support image input.
USAGE:
python sample_chat_completions_from_input_json_with_image.py
python sample_chat_completions_from_input_json_with_image_url.py
Set these two or three environment variables before running the sample:
1) CHAT_COMPLETIONS_ENDPOINT - Your endpoint URL, in the form
Expand All @@ -26,7 +26,7 @@
# pyright: reportAttributeAccessIssue=false


def sample_chat_completions_from_input_json_with_image():
def sample_chat_completions_from_input_json_with_image_url():
import os
from azure.ai.inference import ChatCompletionsClient
from azure.core.credentials import AzureKeyCredential
Expand Down Expand Up @@ -83,4 +83,4 @@ def sample_chat_completions_from_input_json_with_image():


if __name__ == "__main__":
sample_chat_completions_from_input_json_with_image()
sample_chat_completions_from_input_json_with_image_url()
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# ------------------------------------
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# ------------------------------------
"""
DESCRIPTION:
This sample demonstrates how to get a chat completions response from
the service using a synchronous client. The sample shows how to load
an image from a file and include it in the input chat messages.
This sample will only work on AI models that support image input.
USAGE:
python sample_chat_completions_with_image_data.py
Set these two or three environment variables before running the sample:
1) CHAT_COMPLETIONS_ENDPOINT - Your endpoint URL, in the form
https://<your-deployment-name>.<your-azure-region>.inference.ai.azure.com
where `your-deployment-name` is your unique AI Model deployment name, and
`your-azure-region` is the Azure region where your model is deployed.
2) CHAT_COMPLETIONS_KEY - Your model key (a 32-character string). Keep it secret.
3) CHAT_COMPLETIONS_DEPLOYMENT_NAME - Optional. The value for the HTTP
request header `azureml-model-deployment`.
"""


def sample_chat_completions_with_image_data():
import os
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import (
SystemMessage, UserMessage, TextContentItem,
ImageContentItem, ImageUrl, ImageDetailLevel
)
from azure.core.credentials import AzureKeyCredential

try:
endpoint = os.environ["CHAT_COMPLETIONS_ENDPOINT"]
key = os.environ["CHAT_COMPLETIONS_KEY"]
except KeyError:
print("Missing environment variable 'CHAT_COMPLETIONS_ENDPOINT' or 'CHAT_COMPLETIONS_KEY'")
print("Set them before running this sample.")
exit()

try:
model_deployment = os.environ["CHAT_COMPLETIONS_DEPLOYMENT_NAME"]
except KeyError:
print("Could not read optional environment variable `CHAT_COMPLETIONS_DEPLOYMENT_NAME`.")
print("HTTP request header `azureml-model-deployment` will not be set.")
model_deployment = None

client = ChatCompletionsClient(
endpoint=endpoint,
credential=AzureKeyCredential(key),
headers={"azureml-model-deployment": model_deployment},
)

response = client.complete(
messages=[
SystemMessage(content="You are an AI assistant that describes images in details."),
UserMessage(
content=[
TextContentItem(text="What's in this image?"),
ImageContentItem(
image_url=ImageUrl.load(
image_file="sample1.png",
image_format="png",
detail=ImageDetailLevel.HIGH,
),
),
],
),
],
)

print(response.choices[0].message.content)


def get_image_data_url(image_file: str, image_format: str) -> str:
import base64
try:
with open(image_file, "rb") as f:
image_data = base64.b64encode(f.read()).decode("utf-8")
except FileNotFoundError:
print(f"Could not read '{image_file}'.")
print("Set the correct path to the image file before running this sample.")
exit()
return f"data:image/{image_format};base64,{image_data}"


if __name__ == "__main__":
sample_chat_completions_with_image_data()
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
DESCRIPTION:
This sample demonstrates how to get a chat completions response from
the service using a synchronous client. The sample
shows how to include an image in the input chat messages.
shows how to include an image URL in the input chat messages.
This sample will only work on AI models that support image input.
USAGE:
python sample_chat_completions_with_images.py
python sample_chat_completions_with_image_url.py
Set these two or three environment variables before running the sample:
1) CHAT_COMPLETIONS_ENDPOINT - Your endpoint URL, in the form
Expand All @@ -23,7 +23,7 @@
"""


def sample_chat_completions_with_images():
def sample_chat_completions_with_image_url():
import os
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import (
Expand Down Expand Up @@ -74,4 +74,4 @@ def sample_chat_completions_with_images():


if __name__ == "__main__":
sample_chat_completions_with_images()
sample_chat_completions_with_image_url()
13 changes: 13 additions & 0 deletions sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Licensed under the MIT License.
# ------------------------------------
import inspect
import os
import azure.ai.inference as sdk

from model_inference_test_base import ModelClientTestBase, ServicePreparerChatCompletions, ServicePreparerEmbeddings
Expand Down Expand Up @@ -65,6 +66,18 @@ def test_embeddings(self, **kwargs):
self._validate_embeddings_result(response)
client.close()

def test_image_url_load(self, **kwargs):
local_folder = os.path.dirname(os.path.abspath(__file__))
image_file = os.path.join(local_folder, "../samples/sample1.png")
image_url = sdk.models._patch.ImageUrl.load(
image_file=image_file,
image_format="png",
detail=sdk.models.ImageDetailLevel.AUTO,
)
assert image_url
assert image_url.url.startswith("")
assert image_url.detail == sdk.models.ImageDetailLevel.AUTO

# **********************************************************************************
#
# HAPPY PATH TESTS - CHAT COMPLETIONS
Expand Down

0 comments on commit c19f701

Please sign in to comment.