OpenAI integration

getsentry · colin-sentry · Mar 11, 2024 · Mar 6, 2024 · Mar 6, 2024 · Mar 6, 2024
commit 6d79ebf3b97f124cc96557c87cfa62af4ee593bd
@@ -78,6 +78,7 @@ def iter_default_integrations(with_auto_enabling_integrations):
     "sentry_sdk.integrations.fastapi.FastApiIntegration",
     "sentry_sdk.integrations.flask.FlaskIntegration",
     "sentry_sdk.integrations.httpx.HttpxIntegration",
+    "sentry_sdk.integrations.openai.OpenAIIntegration",
     "sentry_sdk.integrations.pyramid.PyramidIntegration",
     "sentry_sdk.integrations.redis.RedisIntegration",
     "sentry_sdk.integrations.rq.RqIntegration",

@@ -0,0 +1,224 @@
+from __future__ import absolute_import
+
+from sentry_sdk._types import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Iterator, Any, TypeVar, Callable
+
+    F = TypeVar("F", bound=Callable[..., Any])
+
+from sentry_sdk._functools import wraps
+from sentry_sdk.hub import Hub
+from sentry_sdk.integrations import DidNotEnable, Integration
+from sentry_sdk.utils import logger, capture_internal_exceptions
+
+try:
+    from openai.types.chat import ChatCompletionChunk
+    from openai.resources.chat.completions import Completions
+    from openai.resources import Embeddings
+except ImportError:
+    raise DidNotEnable("OpenAI not installed")
+
+try:
+    import tiktoken
+
+    enc = tiktoken.get_encoding("cl100k_base")
+
+    def count_tokens(s):
+        # type: (str) -> int
+        return len(enc.encode_ordinary(s))
+
+    logger.debug("[OpenAI] using tiktoken to count tokens")
+except ImportError:
+    logger.info(
+        "The Sentry Python SDK requires 'tiktoken' in order to measure token usage from some OpenAI APIs"
+        "Please install 'tiktoken' if you aren't receiving token usage in Sentry."
+        "See https://docs.sentry.io/platforms/python/guides/openai/ for more information."
+    )
+
+    def count_tokens(s):
+        # type: (str) -> int
+        return 0
+
+
+COMPLETION_TOKENS = "completion_tоkens"
+PROMPT_TOKENS = "prompt_tоkens"
+TOTAL_TOKENS = "total_tоkens"
+
+
+class OpenAIIntegration(Integration):
+    identifier = "openai"
+
+    @staticmethod
+    def setup_once():
+        # TODO minimum version
+        Completions.create = _wrap_chat_completion_create(Completions.create)
+        Embeddings.create = _wrap_enbeddings_create(Embeddings.create)
+
+
+def _calculate_chat_completion_usage(
+    messages, response, span, streaming_message_responses=None
+):
+    completion_tokens = 0
+    prompt_tokens = 0
+    total_tokens = 0
+    if hasattr(response, "usage"):
+        if hasattr(response.usage, "completion_tokens") and isinstance(
+            response.usage.completion_tokens, int
+        ):
+            completion_tokens = response.usage.completion_tokens
+        if hasattr(response.usage, "prompt_tokens") and isinstance(
+            response.usage.prompt_tokens, int
+        ):
+            prompt_tokens = response.usage.prompt_tokens
+        if hasattr(response.usage, "total_tokens") and isinstance(
+            response.usage.total_tokens, int
+        ):
+            total_tokens = response.usage.total_tokens
+
+    if prompt_tokens == 0:
+        for message in messages:
+            if hasattr(message, "content"):
+                prompt_tokens += count_tokens(message.content)
+            elif "content" in message:
+                prompt_tokens += count_tokens(message["content"])
+
+    if completion_tokens == 0:
+        if streaming_message_responses is not None:
+            for message in streaming_message_responses:
+                completion_tokens += count_tokens(message)
+        elif hasattr(response, "choices"):
+            for choice in response.choices:
+                if hasattr(choice, "message"):
+                    completion_tokens += count_tokens(choice.message)
+
+    if total_tokens == 0:
+        total_tokens = prompt_tokens + completion_tokens
+
+    if completion_tokens != 0:
+        span.set_data(COMPLETION_TOKENS, completion_tokens)
+    if prompt_tokens != 0:
+        span.set_data(PROMPT_TOKENS, prompt_tokens)
+    if total_tokens != 0:
+        span.set_data(TOTAL_TOKENS, total_tokens)
+
+
+def _wrap_chat_completion_create(f):
+    # type: (F) -> F
+    @wraps(f)
+    def new_chat_completion(*args, **kwargs):
+        # type: (*Any, **Any) -> Any
+        hub = Hub.current
+        integration = hub.get_integration(OpenAIIntegration)
+        if integration is None:
+            return f(*args, **kwargs)
+
+        if "messages" not in kwargs:
+            # invalid call (in all versions of openai), let it return error
+            return f(*args, **kwargs)
+
+        try:
+            iter(kwargs["messages"])
+        except TypeError:
+            # invalid call (in all versions), messages must be iterable
+            return f(*args, **kwargs)
+
+        kwargs["messages"] = list(kwargs["messages"])
+        messages = kwargs["messages"]
+        model = kwargs.get("model")
+        streaming = kwargs.get("stream")  # TODO handle streaming
+
+        span = hub.start_span(op="openai", description="Chat Completion")
+        span.__enter__()
+        res = f(*args, **kwargs)
+        with capture_internal_exceptions():
+            span.set_data("messages", messages)
+            span.set_tag("model", model)
+            span.set_tag("streaming", streaming)
+
+            if hasattr(res, "choices"):
+                span.set_data("response", res.choices[0].message)
+                _calculate_chat_completion_usage(messages, res, span)
+                span.__exit__(None, None, None)
+            elif hasattr(res, "_iterator"):
+                data_buf: list[list[str]] = []  # one for each choice
+
+                old_iterator: Iterator[ChatCompletionChunk] = res._iterator
+
+                def new_iterator() -> Iterator[ChatCompletionChunk]:
+                    with capture_internal_exceptions():
+                        for x in old_iterator:
+                            if hasattr(x, "choices"):
+                                choice_index = 0
+                                for choice in x.choices:
+                                    if hasattr(choice, "delta") and hasattr(
+                                        choice.delta, "content"
+                                    ):
+                                        content = choice.delta.content
+                                        if len(data_buf) <= choice_index:
+                                            data_buf.append([])
+                                        data_buf[choice_index].append(content or "")
+                                    choice_index += 1
+                            yield x
+                        if len(data_buf) > 0:
+                            all_responses = list(
+                                map(lambda chunk: "".join(chunk), data_buf)
+                            )
+                            span.set_data("responses", all_responses)
+                            _calculate_chat_completion_usage(
+                                messages, res, span, all_responses
+                            )
+                    span.__exit__(None, None, None)
+
+                res._iterator = new_iterator()
+            else:
+                span.set_tag("unknown_response", True)
+                span.__exit__(None, None, None)
+            return res
+
+    return new_chat_completion
+
+
+def _wrap_enbeddings_create(f):
+    # type: (F) -> F
+
+    @wraps(f)
+    def new_embeddings_create(*args, **kwargs):
+        hub = Hub.current
+        integration = hub.get_integration(OpenAIIntegration)
+        if integration is None:
+            return f(*args, **kwargs)
+
+        with hub.start_span(op="openai", description="Embeddings Creation") as span:
+            if "input" in kwargs and isinstance(kwargs["input"], str):
+                span.set_data("input", kwargs["input"])
+            if "model" in kwargs:
+                span.set_tag("model", kwargs["model"])
+            if "dimensions" in kwargs:
+                span.set_tag("dimensions", kwargs["dimensions"])
+            response = f(*args, **kwargs)
+
+            prompt_tokens = 0
+            total_tokens = 0
+            if hasattr(response, "usage"):
+                if hasattr(response.usage, "prompt_tokens") and isinstance(
+                    response.usage.prompt_tokens, int
+                ):
+                    prompt_tokens = response.usage.prompt_tokens
+                if hasattr(response.usage, "total_tokens") and isinstance(
+                    response.usage.total_tokens, int
+                ):
+                    total_tokens = response.usage.total_tokens
+
+            if prompt_tokens == 0:
+                prompt_tokens = count_tokens(kwargs["input"] or "")
+
+            if total_tokens == 0:
+                total_tokens = prompt_tokens
+
+            span.set_data(PROMPT_TOKENS, prompt_tokens)
+            span.set_data(TOTAL_TOKENS, total_tokens)
+
+            return response
+
+    return new_embeddings_create
@@ -0,0 +1,148 @@
+from openai import OpenAI, Stream
+from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding
+from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk
+from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice
+from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage
+
+from sentry_sdk import start_transaction
+from sentry_sdk.integrations.openai import OpenAIIntegration
+
+try:
+    from unittest import mock  # python 3.3 and above
+except ImportError:
+    import mock  # python < 3.3
+
+COMPLETION_TOKENS = "completion_tоkens"
+PROMPT_TOKENS = "prompt_tоkens"
+TOTAL_TOKENS = "total_tоkens"
+
+
+def test_nonstreaming_chat_completion(sentry_init, capture_events):
+    sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
+    events = capture_events()
+
+    client = OpenAI(api_key="z")
+    returned_chat = ChatCompletion(
+        id="chat-id",
+        choices=[
+            Choice(
+                index=0,
+                finish_reason="stop",
+                message=ChatCompletionMessage(role="assistant", content="response"),
+            )
+        ],
+        created=10000000,
+        model="model-id",
+        object="chat.completion",
+        usage=CompletionUsage(
+            completion_tokens=10,
+            prompt_tokens=20,
+            total_tokens=30,
+        ),
+    )
+
+    client.chat.completions._post = mock.Mock(return_value=returned_chat)
+    with start_transaction(name="openai tx"):
+        response = (
+            client.chat.completions.create(
+                model="some-model", messages=[{"role": "system", "content": "hello"}]
+            )
+            .choices[0]
+            .message.content
+        )
+
+    assert response == "response"
+    tx = events[0]
+    assert tx["type"] == "transaction"
+    span = tx["spans"][0]
+    assert span["op"] == "openai"
+
+    assert span["data"][COMPLETION_TOKENS] == 10
+    assert span["data"][PROMPT_TOKENS] == 20
+    assert span["data"][TOTAL_TOKENS] == 30
+
+
+# noinspection PyTypeChecker
+def test_streaming_chat_completion(sentry_init, capture_events):
+    sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
+    events = capture_events()
+
+    client = OpenAI(api_key="z")
+    returned_stream = Stream(cast_to=None, response=None, client=None)
+    returned_stream._iterator = [
+        ChatCompletionChunk(
+            id="1",
+            choices=[Choice(index=0, delta=ChoiceDelta(content="hel"))],
+            created=100000,
+            model="model-id",
+            object="chat.completion.chunk",
+        ),
+        ChatCompletionChunk(
+            id="1",
+            choices=[Choice(index=1, delta=ChoiceDelta(content="lo "))],
+            created=100000,
+            model="model-id",
+            object="chat.completion.chunk",
+        ),
+        ChatCompletionChunk(
+            id="1",
+            choices=[
+                Choice(
+                    index=2, delta=ChoiceDelta(content="world"), finish_reason="stop"
+                )
+            ],
+            created=100000,
+            model="model-id",
+            object="chat.completion.chunk",
+        ),
+    ]
+
+    client.chat.completions._post = mock.Mock(return_value=returned_stream)
+    with start_transaction(name="openai tx"):
+        response_stream = client.chat.completions.create(
+            model="some-model", messages=[{"role": "system", "content": "hello"}]
+        )
+        response_string = "".join(
+            map(lambda x: x.choices[0].delta.content, response_stream)
+        )
+    assert response_string == "hello world"
+    tx = events[0]
+    assert tx["type"] == "transaction"
+    span = tx["spans"][0]
+    assert span["op"] == "openai"
+    assert span["data"][COMPLETION_TOKENS] == 2
+    assert span["data"][PROMPT_TOKENS] == 1
+    assert span["data"][TOTAL_TOKENS] == 3
+
+
+def test_embeddings_create(sentry_init, capture_events):
+    sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
+    events = capture_events()
+
+    client = OpenAI(api_key="z")
+
+    returned_embedding = CreateEmbeddingResponse(
+        data=[Embedding(object="embedding", index=0, embedding=[1.0, 2.0, 3.0])],
+        model="some-model",
+        object="list",
+        usage=EmbeddingTokenUsage(
+            prompt_tokens=20,
+            total_tokens=30,
+        ),
+    )
+
+    client.embeddings._post = mock.Mock(return_value=returned_embedding)
+    with start_transaction(name="openai tx"):
+        response = client.embeddings.create(
+            input="test", model="text-embedding-3-large"
+        )
+
+    assert len(response.data[0].embedding) == 3
+
+    tx = events[0]
+    assert tx["type"] == "transaction"
+    span = tx["spans"][0]
+    assert span["op"] == "openai"
+
+    assert span["data"][PROMPT_TOKENS] == 20
+    assert span["data"][TOTAL_TOKENS] == 30