Skip to content

OpenAI integration #2791

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
OpenAI integration
  • Loading branch information
colin-sentry committed Mar 7, 2024
commit 6d79ebf3b97f124cc96557c87cfa62af4ee593bd
1 change: 1 addition & 0 deletions sentry_sdk/integrations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def iter_default_integrations(with_auto_enabling_integrations):
"sentry_sdk.integrations.fastapi.FastApiIntegration",
"sentry_sdk.integrations.flask.FlaskIntegration",
"sentry_sdk.integrations.httpx.HttpxIntegration",
"sentry_sdk.integrations.openai.OpenAIIntegration",
"sentry_sdk.integrations.pyramid.PyramidIntegration",
"sentry_sdk.integrations.redis.RedisIntegration",
"sentry_sdk.integrations.rq.RqIntegration",
Expand Down
224 changes: 224 additions & 0 deletions sentry_sdk/integrations/openai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
from __future__ import absolute_import

from sentry_sdk._types import TYPE_CHECKING

if TYPE_CHECKING:
from typing import Iterator, Any, TypeVar, Callable

F = TypeVar("F", bound=Callable[..., Any])

from sentry_sdk._functools import wraps
from sentry_sdk.hub import Hub
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.utils import logger, capture_internal_exceptions

try:
from openai.types.chat import ChatCompletionChunk
from openai.resources.chat.completions import Completions
from openai.resources import Embeddings
except ImportError:
raise DidNotEnable("OpenAI not installed")

try:
import tiktoken

enc = tiktoken.get_encoding("cl100k_base")

def count_tokens(s):
# type: (str) -> int
return len(enc.encode_ordinary(s))

logger.debug("[OpenAI] using tiktoken to count tokens")
except ImportError:
logger.info(
"The Sentry Python SDK requires 'tiktoken' in order to measure token usage from some OpenAI APIs"
"Please install 'tiktoken' if you aren't receiving token usage in Sentry."
"See https://docs.sentry.io/platforms/python/guides/openai/ for more information."
)

def count_tokens(s):
# type: (str) -> int
return 0


COMPLETION_TOKENS = "completion_tоkens"
PROMPT_TOKENS = "prompt_tоkens"
TOTAL_TOKENS = "total_tоkens"


class OpenAIIntegration(Integration):
identifier = "openai"

@staticmethod
def setup_once():
# TODO minimum version
Completions.create = _wrap_chat_completion_create(Completions.create)
Embeddings.create = _wrap_enbeddings_create(Embeddings.create)


def _calculate_chat_completion_usage(
messages, response, span, streaming_message_responses=None
):
completion_tokens = 0
prompt_tokens = 0
total_tokens = 0
if hasattr(response, "usage"):
if hasattr(response.usage, "completion_tokens") and isinstance(
response.usage.completion_tokens, int
):
completion_tokens = response.usage.completion_tokens
if hasattr(response.usage, "prompt_tokens") and isinstance(
response.usage.prompt_tokens, int
):
prompt_tokens = response.usage.prompt_tokens
if hasattr(response.usage, "total_tokens") and isinstance(
response.usage.total_tokens, int
):
total_tokens = response.usage.total_tokens

if prompt_tokens == 0:
for message in messages:
if hasattr(message, "content"):
prompt_tokens += count_tokens(message.content)
elif "content" in message:
prompt_tokens += count_tokens(message["content"])

if completion_tokens == 0:
if streaming_message_responses is not None:
for message in streaming_message_responses:
completion_tokens += count_tokens(message)
elif hasattr(response, "choices"):
for choice in response.choices:
if hasattr(choice, "message"):
completion_tokens += count_tokens(choice.message)

if total_tokens == 0:
total_tokens = prompt_tokens + completion_tokens

if completion_tokens != 0:
span.set_data(COMPLETION_TOKENS, completion_tokens)
if prompt_tokens != 0:
span.set_data(PROMPT_TOKENS, prompt_tokens)
if total_tokens != 0:
span.set_data(TOTAL_TOKENS, total_tokens)


def _wrap_chat_completion_create(f):
# type: (F) -> F
@wraps(f)
def new_chat_completion(*args, **kwargs):
# type: (*Any, **Any) -> Any
hub = Hub.current
integration = hub.get_integration(OpenAIIntegration)
if integration is None:
return f(*args, **kwargs)

if "messages" not in kwargs:
# invalid call (in all versions of openai), let it return error
return f(*args, **kwargs)

try:
iter(kwargs["messages"])
except TypeError:
# invalid call (in all versions), messages must be iterable
return f(*args, **kwargs)

kwargs["messages"] = list(kwargs["messages"])
messages = kwargs["messages"]
model = kwargs.get("model")
streaming = kwargs.get("stream") # TODO handle streaming

span = hub.start_span(op="openai", description="Chat Completion")
span.__enter__()
res = f(*args, **kwargs)
with capture_internal_exceptions():
span.set_data("messages", messages)
span.set_tag("model", model)
span.set_tag("streaming", streaming)

if hasattr(res, "choices"):
span.set_data("response", res.choices[0].message)
_calculate_chat_completion_usage(messages, res, span)
span.__exit__(None, None, None)
elif hasattr(res, "_iterator"):
data_buf: list[list[str]] = [] # one for each choice

old_iterator: Iterator[ChatCompletionChunk] = res._iterator

def new_iterator() -> Iterator[ChatCompletionChunk]:
with capture_internal_exceptions():
for x in old_iterator:
if hasattr(x, "choices"):
choice_index = 0
for choice in x.choices:
if hasattr(choice, "delta") and hasattr(
choice.delta, "content"
):
content = choice.delta.content
if len(data_buf) <= choice_index:
data_buf.append([])
data_buf[choice_index].append(content or "")
choice_index += 1
yield x
if len(data_buf) > 0:
all_responses = list(
map(lambda chunk: "".join(chunk), data_buf)
)
span.set_data("responses", all_responses)
_calculate_chat_completion_usage(
messages, res, span, all_responses
)
span.__exit__(None, None, None)

res._iterator = new_iterator()
else:
span.set_tag("unknown_response", True)
span.__exit__(None, None, None)
return res

return new_chat_completion


def _wrap_enbeddings_create(f):
# type: (F) -> F

@wraps(f)
def new_embeddings_create(*args, **kwargs):
hub = Hub.current
integration = hub.get_integration(OpenAIIntegration)
if integration is None:
return f(*args, **kwargs)

with hub.start_span(op="openai", description="Embeddings Creation") as span:
if "input" in kwargs and isinstance(kwargs["input"], str):
span.set_data("input", kwargs["input"])
if "model" in kwargs:
span.set_tag("model", kwargs["model"])
if "dimensions" in kwargs:
span.set_tag("dimensions", kwargs["dimensions"])
response = f(*args, **kwargs)

prompt_tokens = 0
total_tokens = 0
if hasattr(response, "usage"):
if hasattr(response.usage, "prompt_tokens") and isinstance(
response.usage.prompt_tokens, int
):
prompt_tokens = response.usage.prompt_tokens
if hasattr(response.usage, "total_tokens") and isinstance(
response.usage.total_tokens, int
):
total_tokens = response.usage.total_tokens

if prompt_tokens == 0:
prompt_tokens = count_tokens(kwargs["input"] or "")

if total_tokens == 0:
total_tokens = prompt_tokens

span.set_data(PROMPT_TOKENS, prompt_tokens)
span.set_data(TOTAL_TOKENS, total_tokens)

return response

return new_embeddings_create
148 changes: 148 additions & 0 deletions tests/integrations/openai/test_openai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
from openai import OpenAI, Stream
from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding
from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk
from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice
from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage

from sentry_sdk import start_transaction
from sentry_sdk.integrations.openai import OpenAIIntegration

try:
from unittest import mock # python 3.3 and above
except ImportError:
import mock # python < 3.3

COMPLETION_TOKENS = "completion_tоkens"
PROMPT_TOKENS = "prompt_tоkens"
TOTAL_TOKENS = "total_tоkens"


def test_nonstreaming_chat_completion(sentry_init, capture_events):
sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
events = capture_events()

client = OpenAI(api_key="z")
returned_chat = ChatCompletion(
id="chat-id",
choices=[
Choice(
index=0,
finish_reason="stop",
message=ChatCompletionMessage(role="assistant", content="response"),
)
],
created=10000000,
model="model-id",
object="chat.completion",
usage=CompletionUsage(
completion_tokens=10,
prompt_tokens=20,
total_tokens=30,
),
)

client.chat.completions._post = mock.Mock(return_value=returned_chat)
with start_transaction(name="openai tx"):
response = (
client.chat.completions.create(
model="some-model", messages=[{"role": "system", "content": "hello"}]
)
.choices[0]
.message.content
)

assert response == "response"
tx = events[0]
assert tx["type"] == "transaction"
span = tx["spans"][0]
assert span["op"] == "openai"

assert span["data"][COMPLETION_TOKENS] == 10
assert span["data"][PROMPT_TOKENS] == 20
assert span["data"][TOTAL_TOKENS] == 30


# noinspection PyTypeChecker
def test_streaming_chat_completion(sentry_init, capture_events):
sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
events = capture_events()

client = OpenAI(api_key="z")
returned_stream = Stream(cast_to=None, response=None, client=None)
returned_stream._iterator = [
ChatCompletionChunk(
id="1",
choices=[Choice(index=0, delta=ChoiceDelta(content="hel"))],
created=100000,
model="model-id",
object="chat.completion.chunk",
),
ChatCompletionChunk(
id="1",
choices=[Choice(index=1, delta=ChoiceDelta(content="lo "))],
created=100000,
model="model-id",
object="chat.completion.chunk",
),
ChatCompletionChunk(
id="1",
choices=[
Choice(
index=2, delta=ChoiceDelta(content="world"), finish_reason="stop"
)
],
created=100000,
model="model-id",
object="chat.completion.chunk",
),
]

client.chat.completions._post = mock.Mock(return_value=returned_stream)
with start_transaction(name="openai tx"):
response_stream = client.chat.completions.create(
model="some-model", messages=[{"role": "system", "content": "hello"}]
)
response_string = "".join(
map(lambda x: x.choices[0].delta.content, response_stream)
)
assert response_string == "hello world"
tx = events[0]
assert tx["type"] == "transaction"
span = tx["spans"][0]
assert span["op"] == "openai"
assert span["data"][COMPLETION_TOKENS] == 2
assert span["data"][PROMPT_TOKENS] == 1
assert span["data"][TOTAL_TOKENS] == 3


def test_embeddings_create(sentry_init, capture_events):
sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
events = capture_events()

client = OpenAI(api_key="z")

returned_embedding = CreateEmbeddingResponse(
data=[Embedding(object="embedding", index=0, embedding=[1.0, 2.0, 3.0])],
model="some-model",
object="list",
usage=EmbeddingTokenUsage(
prompt_tokens=20,
total_tokens=30,
),
)

client.embeddings._post = mock.Mock(return_value=returned_embedding)
with start_transaction(name="openai tx"):
response = client.embeddings.create(
input="test", model="text-embedding-3-large"
)

assert len(response.data[0].embedding) == 3

tx = events[0]
assert tx["type"] == "transaction"
span = tx["spans"][0]
assert span["op"] == "openai"

assert span["data"][PROMPT_TOKENS] == 20
assert span["data"][TOTAL_TOKENS] == 30
Loading