-
Notifications
You must be signed in to change notification settings - Fork 557
OpenAI integration #2791
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
OpenAI integration #2791
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit
Hold shift + click to select a range
6d79ebf
OpenAI integration
colin-sentry edf396c
Fix linting errors
colin-sentry 05ecefa
Fix CI
colin-sentry a3ccbcd
Fix lint
colin-sentry 6003902
Fix more CI issues
colin-sentry 09013f2
Run tests on version pinned OpenAI too
colin-sentry 2f9667a
Fix pydantic issue in test
colin-sentry ec84c04
Import type in TYPE_CHECKING gate
colin-sentry e45420e
PR feedback fixes
colin-sentry 0e319f1
Fix tiktoken test variant
colin-sentry 3acc9ab
PII gate the request and response
colin-sentry 65c311d
Rename set_data tags
colin-sentry 4b33a4e
Move doc location
colin-sentry 56d2679
Add "exclude prompts" flag as optional
colin-sentry 72d4b5a
Change prompts to be excluded by default
colin-sentry b206e4c
Set flag in tests
colin-sentry 4bc4310
Fix tiktoken tox.ini extra dash
colin-sentry c6f5cd2
Change strip PII semantics
colin-sentry d1eae09
More test coverage for PII
colin-sentry c96e0e5
notiktoken
colin-sentry 0278e2c
Merge branch 'master' into openai
antonpirker 452b03b
Merge branch 'master' into openai
colin-sentry File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next
Next commit
OpenAI integration
- Loading branch information
commit 6d79ebf3b97f124cc96557c87cfa62af4ee593bd
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,224 @@ | ||
from __future__ import absolute_import | ||
|
||
from sentry_sdk._types import TYPE_CHECKING | ||
|
||
if TYPE_CHECKING: | ||
from typing import Iterator, Any, TypeVar, Callable | ||
|
||
F = TypeVar("F", bound=Callable[..., Any]) | ||
|
||
from sentry_sdk._functools import wraps | ||
from sentry_sdk.hub import Hub | ||
from sentry_sdk.integrations import DidNotEnable, Integration | ||
from sentry_sdk.utils import logger, capture_internal_exceptions | ||
|
||
try: | ||
from openai.types.chat import ChatCompletionChunk | ||
from openai.resources.chat.completions import Completions | ||
from openai.resources import Embeddings | ||
except ImportError: | ||
raise DidNotEnable("OpenAI not installed") | ||
|
||
try: | ||
import tiktoken | ||
|
||
enc = tiktoken.get_encoding("cl100k_base") | ||
|
||
def count_tokens(s): | ||
# type: (str) -> int | ||
return len(enc.encode_ordinary(s)) | ||
|
||
logger.debug("[OpenAI] using tiktoken to count tokens") | ||
except ImportError: | ||
logger.info( | ||
"The Sentry Python SDK requires 'tiktoken' in order to measure token usage from some OpenAI APIs" | ||
"Please install 'tiktoken' if you aren't receiving token usage in Sentry." | ||
"See https://docs.sentry.io/platforms/python/guides/openai/ for more information." | ||
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
|
||
def count_tokens(s): | ||
# type: (str) -> int | ||
return 0 | ||
|
||
|
||
COMPLETION_TOKENS = "completion_tоkens" | ||
PROMPT_TOKENS = "prompt_tоkens" | ||
TOTAL_TOKENS = "total_tоkens" | ||
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
class OpenAIIntegration(Integration): | ||
identifier = "openai" | ||
|
||
@staticmethod | ||
def setup_once(): | ||
# TODO minimum version | ||
Completions.create = _wrap_chat_completion_create(Completions.create) | ||
Embeddings.create = _wrap_enbeddings_create(Embeddings.create) | ||
|
||
|
||
def _calculate_chat_completion_usage( | ||
messages, response, span, streaming_message_responses=None | ||
): | ||
completion_tokens = 0 | ||
prompt_tokens = 0 | ||
total_tokens = 0 | ||
if hasattr(response, "usage"): | ||
if hasattr(response.usage, "completion_tokens") and isinstance( | ||
response.usage.completion_tokens, int | ||
): | ||
completion_tokens = response.usage.completion_tokens | ||
if hasattr(response.usage, "prompt_tokens") and isinstance( | ||
response.usage.prompt_tokens, int | ||
): | ||
prompt_tokens = response.usage.prompt_tokens | ||
if hasattr(response.usage, "total_tokens") and isinstance( | ||
response.usage.total_tokens, int | ||
): | ||
total_tokens = response.usage.total_tokens | ||
|
||
if prompt_tokens == 0: | ||
for message in messages: | ||
if hasattr(message, "content"): | ||
prompt_tokens += count_tokens(message.content) | ||
elif "content" in message: | ||
prompt_tokens += count_tokens(message["content"]) | ||
|
||
if completion_tokens == 0: | ||
if streaming_message_responses is not None: | ||
for message in streaming_message_responses: | ||
completion_tokens += count_tokens(message) | ||
elif hasattr(response, "choices"): | ||
for choice in response.choices: | ||
if hasattr(choice, "message"): | ||
completion_tokens += count_tokens(choice.message) | ||
|
||
if total_tokens == 0: | ||
total_tokens = prompt_tokens + completion_tokens | ||
|
||
if completion_tokens != 0: | ||
span.set_data(COMPLETION_TOKENS, completion_tokens) | ||
if prompt_tokens != 0: | ||
span.set_data(PROMPT_TOKENS, prompt_tokens) | ||
if total_tokens != 0: | ||
span.set_data(TOTAL_TOKENS, total_tokens) | ||
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
def _wrap_chat_completion_create(f): | ||
# type: (F) -> F | ||
@wraps(f) | ||
def new_chat_completion(*args, **kwargs): | ||
# type: (*Any, **Any) -> Any | ||
hub = Hub.current | ||
integration = hub.get_integration(OpenAIIntegration) | ||
if integration is None: | ||
return f(*args, **kwargs) | ||
|
||
if "messages" not in kwargs: | ||
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# invalid call (in all versions of openai), let it return error | ||
return f(*args, **kwargs) | ||
|
||
try: | ||
iter(kwargs["messages"]) | ||
except TypeError: | ||
# invalid call (in all versions), messages must be iterable | ||
return f(*args, **kwargs) | ||
|
||
kwargs["messages"] = list(kwargs["messages"]) | ||
messages = kwargs["messages"] | ||
model = kwargs.get("model") | ||
streaming = kwargs.get("stream") # TODO handle streaming | ||
|
||
span = hub.start_span(op="openai", description="Chat Completion") | ||
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
span.__enter__() | ||
antonpirker marked this conversation as resolved.
Show resolved
Hide resolved
|
||
res = f(*args, **kwargs) | ||
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
with capture_internal_exceptions(): | ||
span.set_data("messages", messages) | ||
span.set_tag("model", model) | ||
span.set_tag("streaming", streaming) | ||
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if hasattr(res, "choices"): | ||
span.set_data("response", res.choices[0].message) | ||
_calculate_chat_completion_usage(messages, res, span) | ||
span.__exit__(None, None, None) | ||
elif hasattr(res, "_iterator"): | ||
data_buf: list[list[str]] = [] # one for each choice | ||
|
||
old_iterator: Iterator[ChatCompletionChunk] = res._iterator | ||
|
||
def new_iterator() -> Iterator[ChatCompletionChunk]: | ||
with capture_internal_exceptions(): | ||
for x in old_iterator: | ||
if hasattr(x, "choices"): | ||
choice_index = 0 | ||
for choice in x.choices: | ||
if hasattr(choice, "delta") and hasattr( | ||
choice.delta, "content" | ||
): | ||
content = choice.delta.content | ||
if len(data_buf) <= choice_index: | ||
data_buf.append([]) | ||
data_buf[choice_index].append(content or "") | ||
choice_index += 1 | ||
yield x | ||
if len(data_buf) > 0: | ||
all_responses = list( | ||
map(lambda chunk: "".join(chunk), data_buf) | ||
) | ||
span.set_data("responses", all_responses) | ||
_calculate_chat_completion_usage( | ||
messages, res, span, all_responses | ||
) | ||
span.__exit__(None, None, None) | ||
|
||
res._iterator = new_iterator() | ||
else: | ||
span.set_tag("unknown_response", True) | ||
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
span.__exit__(None, None, None) | ||
return res | ||
|
||
return new_chat_completion | ||
|
||
|
||
def _wrap_enbeddings_create(f): | ||
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# type: (F) -> F | ||
|
||
@wraps(f) | ||
def new_embeddings_create(*args, **kwargs): | ||
hub = Hub.current | ||
integration = hub.get_integration(OpenAIIntegration) | ||
if integration is None: | ||
return f(*args, **kwargs) | ||
|
||
with hub.start_span(op="openai", description="Embeddings Creation") as span: | ||
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if "input" in kwargs and isinstance(kwargs["input"], str): | ||
span.set_data("input", kwargs["input"]) | ||
if "model" in kwargs: | ||
span.set_tag("model", kwargs["model"]) | ||
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if "dimensions" in kwargs: | ||
span.set_tag("dimensions", kwargs["dimensions"]) | ||
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
response = f(*args, **kwargs) | ||
|
||
prompt_tokens = 0 | ||
total_tokens = 0 | ||
if hasattr(response, "usage"): | ||
if hasattr(response.usage, "prompt_tokens") and isinstance( | ||
response.usage.prompt_tokens, int | ||
): | ||
prompt_tokens = response.usage.prompt_tokens | ||
if hasattr(response.usage, "total_tokens") and isinstance( | ||
response.usage.total_tokens, int | ||
): | ||
total_tokens = response.usage.total_tokens | ||
|
||
if prompt_tokens == 0: | ||
prompt_tokens = count_tokens(kwargs["input"] or "") | ||
|
||
if total_tokens == 0: | ||
total_tokens = prompt_tokens | ||
|
||
span.set_data(PROMPT_TOKENS, prompt_tokens) | ||
span.set_data(TOTAL_TOKENS, total_tokens) | ||
|
||
return response | ||
|
||
return new_embeddings_create |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
from openai import OpenAI, Stream | ||
from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding | ||
from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk | ||
from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice | ||
from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage | ||
|
||
from sentry_sdk import start_transaction | ||
from sentry_sdk.integrations.openai import OpenAIIntegration | ||
|
||
try: | ||
from unittest import mock # python 3.3 and above | ||
except ImportError: | ||
import mock # python < 3.3 | ||
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
COMPLETION_TOKENS = "completion_tоkens" | ||
PROMPT_TOKENS = "prompt_tоkens" | ||
TOTAL_TOKENS = "total_tоkens" | ||
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
colin-sentry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def test_nonstreaming_chat_completion(sentry_init, capture_events): | ||
sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) | ||
events = capture_events() | ||
|
||
client = OpenAI(api_key="z") | ||
returned_chat = ChatCompletion( | ||
id="chat-id", | ||
choices=[ | ||
Choice( | ||
index=0, | ||
finish_reason="stop", | ||
message=ChatCompletionMessage(role="assistant", content="response"), | ||
) | ||
], | ||
created=10000000, | ||
model="model-id", | ||
object="chat.completion", | ||
usage=CompletionUsage( | ||
completion_tokens=10, | ||
prompt_tokens=20, | ||
total_tokens=30, | ||
), | ||
) | ||
|
||
client.chat.completions._post = mock.Mock(return_value=returned_chat) | ||
with start_transaction(name="openai tx"): | ||
response = ( | ||
client.chat.completions.create( | ||
model="some-model", messages=[{"role": "system", "content": "hello"}] | ||
) | ||
.choices[0] | ||
.message.content | ||
) | ||
|
||
assert response == "response" | ||
tx = events[0] | ||
assert tx["type"] == "transaction" | ||
span = tx["spans"][0] | ||
assert span["op"] == "openai" | ||
|
||
assert span["data"][COMPLETION_TOKENS] == 10 | ||
assert span["data"][PROMPT_TOKENS] == 20 | ||
assert span["data"][TOTAL_TOKENS] == 30 | ||
|
||
|
||
# noinspection PyTypeChecker | ||
def test_streaming_chat_completion(sentry_init, capture_events): | ||
sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) | ||
events = capture_events() | ||
|
||
client = OpenAI(api_key="z") | ||
returned_stream = Stream(cast_to=None, response=None, client=None) | ||
returned_stream._iterator = [ | ||
ChatCompletionChunk( | ||
id="1", | ||
choices=[Choice(index=0, delta=ChoiceDelta(content="hel"))], | ||
created=100000, | ||
model="model-id", | ||
object="chat.completion.chunk", | ||
), | ||
ChatCompletionChunk( | ||
id="1", | ||
choices=[Choice(index=1, delta=ChoiceDelta(content="lo "))], | ||
created=100000, | ||
model="model-id", | ||
object="chat.completion.chunk", | ||
), | ||
ChatCompletionChunk( | ||
id="1", | ||
choices=[ | ||
Choice( | ||
index=2, delta=ChoiceDelta(content="world"), finish_reason="stop" | ||
) | ||
], | ||
created=100000, | ||
model="model-id", | ||
object="chat.completion.chunk", | ||
), | ||
] | ||
|
||
client.chat.completions._post = mock.Mock(return_value=returned_stream) | ||
with start_transaction(name="openai tx"): | ||
response_stream = client.chat.completions.create( | ||
model="some-model", messages=[{"role": "system", "content": "hello"}] | ||
) | ||
response_string = "".join( | ||
map(lambda x: x.choices[0].delta.content, response_stream) | ||
) | ||
assert response_string == "hello world" | ||
tx = events[0] | ||
assert tx["type"] == "transaction" | ||
span = tx["spans"][0] | ||
assert span["op"] == "openai" | ||
assert span["data"][COMPLETION_TOKENS] == 2 | ||
assert span["data"][PROMPT_TOKENS] == 1 | ||
assert span["data"][TOTAL_TOKENS] == 3 | ||
|
||
|
||
def test_embeddings_create(sentry_init, capture_events): | ||
sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) | ||
events = capture_events() | ||
|
||
client = OpenAI(api_key="z") | ||
|
||
returned_embedding = CreateEmbeddingResponse( | ||
data=[Embedding(object="embedding", index=0, embedding=[1.0, 2.0, 3.0])], | ||
model="some-model", | ||
object="list", | ||
usage=EmbeddingTokenUsage( | ||
prompt_tokens=20, | ||
total_tokens=30, | ||
), | ||
) | ||
|
||
client.embeddings._post = mock.Mock(return_value=returned_embedding) | ||
with start_transaction(name="openai tx"): | ||
response = client.embeddings.create( | ||
input="test", model="text-embedding-3-large" | ||
) | ||
|
||
assert len(response.data[0].embedding) == 3 | ||
|
||
tx = events[0] | ||
assert tx["type"] == "transaction" | ||
span = tx["spans"][0] | ||
assert span["op"] == "openai" | ||
|
||
assert span["data"][PROMPT_TOKENS] == 20 | ||
assert span["data"][TOTAL_TOKENS] == 30 |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.