-
Couldn't load subscription status.
- Fork 148
feat(openai): align embedding instrumentation with pending spec #2210
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,13 +22,17 @@ | |
| from openinference.instrumentation.openai._attributes._responses_api import _ResponsesApiAttributes | ||
| from openinference.instrumentation.openai._utils import _get_openai_version | ||
| from openinference.semconv.trace import ( | ||
| EmbeddingAttributes, | ||
| ImageAttributes, | ||
| MessageAttributes, | ||
| MessageContentAttributes, | ||
| SpanAttributes, | ||
| ToolCallAttributes, | ||
| ) | ||
|
|
||
| # TODO: Update to use SpanAttributes.EMBEDDING_INVOCATION_PARAMETERS when released in semconv | ||
| _EMBEDDING_INVOCATION_PARAMETERS = "embedding.invocation_parameters" | ||
|
|
||
| if TYPE_CHECKING: | ||
| from openai.types import Completion, CreateEmbeddingResponse | ||
| from openai.types.chat import ChatCompletion | ||
|
|
@@ -204,14 +208,32 @@ def _get_attributes_from_image( | |
| def _get_attributes_from_completion_create_param( | ||
| params: Mapping[str, Any], | ||
| ) -> Iterator[Tuple[str, AttributeValue]]: | ||
| # openai.types.completion_create_params.CompletionCreateParamsBase | ||
| # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_create_params.py#L11 # noqa: E501 | ||
| """ | ||
| Extract attributes from parameters for the LEGACY completions API. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here's a rewording of the comments which I think better explains the prompt union weirdness |
||
|
|
||
| The legacy completions API supports: | ||
| - Single prompt: client.completions.create(prompt="text") | ||
| - Batch prompts: client.completions.create(prompt=["text1", "text2"]) | ||
| where each prompt generates a separate completion | ||
|
|
||
| See: https://github.com/openai/openai-python/blob/7da727a4a3eb35306c328e2c3207a1618ed1809f/src/openai/types/completion_create_params.py#L18-L25 | ||
| """ | ||
| if not isinstance(params, Mapping): | ||
| return | ||
| invocation_params = dict(params) | ||
| invocation_params.pop("prompt", None) | ||
| yield SpanAttributes.LLM_INVOCATION_PARAMETERS, safe_json_dumps(invocation_params) | ||
|
|
||
| model_prompt = params.get("prompt") | ||
| if isinstance(model_prompt, str): | ||
| yield SpanAttributes.LLM_PROMPTS, [model_prompt] | ||
codefromthecrypt marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| elif ( | ||
| isinstance(model_prompt, list) | ||
| and model_prompt | ||
| and all(isinstance(item, str) for item in model_prompt) | ||
| ): | ||
| yield SpanAttributes.LLM_PROMPTS, model_prompt | ||
|
|
||
|
|
||
| def _get_attributes_from_embedding_create_param( | ||
| params: Mapping[str, Any], | ||
|
|
@@ -222,7 +244,26 @@ def _get_attributes_from_embedding_create_param( | |
| return | ||
| invocation_params = dict(params) | ||
| invocation_params.pop("input", None) | ||
| yield SpanAttributes.LLM_INVOCATION_PARAMETERS, safe_json_dumps(invocation_params) | ||
| yield _EMBEDDING_INVOCATION_PARAMETERS, safe_json_dumps(invocation_params) | ||
|
|
||
| # Extract text from embedding input - only records text, not token IDs | ||
| embedding_input = params.get("input") | ||
| if embedding_input is not None: | ||
| if isinstance(embedding_input, str): | ||
| # Single string input | ||
| yield ( | ||
| f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.0.{EmbeddingAttributes.EMBEDDING_TEXT}", | ||
| embedding_input, | ||
| ) | ||
| elif isinstance(embedding_input, list) and embedding_input: | ||
| # Check if it's a list of strings (not tokens) | ||
| if all(isinstance(item, str) for item in embedding_input): | ||
| # List of strings | ||
| for index, text in enumerate(embedding_input): | ||
| yield ( | ||
| f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}.{EmbeddingAttributes.EMBEDDING_TEXT}", | ||
| text, | ||
| ) | ||
|
|
||
|
|
||
| T = TypeVar("T", bound=type) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,24 +2,22 @@ | |
|
|
||
| import base64 | ||
| import logging | ||
| from importlib import import_module | ||
| import struct | ||
| from types import ModuleType | ||
| from typing import ( | ||
| TYPE_CHECKING, | ||
| Any, | ||
| Iterable, | ||
| Iterator, | ||
| Mapping, | ||
| Optional, | ||
| Sequence, | ||
| Tuple, | ||
| Type, | ||
| ) | ||
|
|
||
| from opentelemetry.util.types import AttributeValue | ||
|
|
||
| from openinference.instrumentation.openai._attributes._responses_api import _ResponsesApiAttributes | ||
| from openinference.instrumentation.openai._utils import _get_openai_version, _get_texts | ||
| from openinference.instrumentation.openai._utils import _get_openai_version | ||
| from openinference.semconv.trace import ( | ||
| EmbeddingAttributes, | ||
| MessageAttributes, | ||
|
|
@@ -37,11 +35,6 @@ | |
| logger = logging.getLogger(__name__) | ||
| logger.addHandler(logging.NullHandler()) | ||
|
|
||
| try: | ||
| _NUMPY: Optional[ModuleType] = import_module("numpy") | ||
| except ImportError: | ||
| _NUMPY = None | ||
|
|
||
|
Comment on lines
-40
to
-44
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I honestly don't remember why this was needed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. originally it was doing this but we have test cases to prove we can get the vectors at the moment without it. If we need to re-introduce it we probably should with a failing test |
||
|
|
||
| class _ResponseAttributesExtractor: | ||
| __slots__ = ( | ||
|
|
@@ -79,12 +72,10 @@ def get_attributes_from_response( | |
| elif isinstance(response, self._create_embedding_response_type): | ||
| yield from self._get_attributes_from_create_embedding_response( | ||
| response=response, | ||
| request_parameters=request_parameters, | ||
| ) | ||
| elif isinstance(response, self._completion_type): | ||
| yield from self._get_attributes_from_completion( | ||
| completion=response, | ||
| request_parameters=request_parameters, | ||
| ) | ||
|
|
||
| def _get_attributes_from_responses_response( | ||
|
|
@@ -116,26 +107,16 @@ def _get_attributes_from_chat_completion( | |
| def _get_attributes_from_completion( | ||
| self, | ||
| completion: "Completion", | ||
| request_parameters: Mapping[str, Any], | ||
| ) -> Iterator[Tuple[str, AttributeValue]]: | ||
| # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion.py#L13 # noqa: E501 | ||
| if model := getattr(completion, "model", None): | ||
| yield SpanAttributes.LLM_MODEL_NAME, model | ||
| if usage := getattr(completion, "usage", None): | ||
| yield from self._get_attributes_from_completion_usage(usage) | ||
| if model_prompt := request_parameters.get("prompt"): | ||
| # FIXME: this step should move to request attributes extractor if decoding is not necessary.# noqa: E501 | ||
| # prompt: Required[Union[str, List[str], List[int], List[List[int]], None]] | ||
| # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_create_params.py#L38 | ||
| # FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model | ||
| # names are not reliable (across OpenAI and Azure). | ||
| if prompts := list(_get_texts(model_prompt, model)): | ||
| yield SpanAttributes.LLM_PROMPTS, prompts | ||
|
|
||
| def _get_attributes_from_create_embedding_response( | ||
| self, | ||
| response: "CreateEmbeddingResponse", | ||
| request_parameters: Mapping[str, Any], | ||
| ) -> Iterator[Tuple[str, AttributeValue]]: | ||
| # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/create_embedding_response.py#L20 # noqa: E501 | ||
| if usage := getattr(response, "usage", None): | ||
|
|
@@ -144,48 +125,39 @@ def _get_attributes_from_create_embedding_response( | |
| if model := getattr(response, "model"): | ||
| yield f"{SpanAttributes.EMBEDDING_MODEL_NAME}", model | ||
| if (data := getattr(response, "data", None)) and isinstance(data, Iterable): | ||
| for embedding in data: | ||
| if (index := getattr(embedding, "index", None)) is None: | ||
| # Extract embedding vectors using the explicit index from each embedding object | ||
| for embedding_item in data: | ||
| # Use the explicit index field from the API response | ||
| index = getattr(embedding_item, "index", None) | ||
| if index is None: | ||
| continue | ||
| for key, value in self._get_attributes_from_embedding(embedding): | ||
| yield f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}.{key}", value | ||
|
|
||
| embedding_input = request_parameters.get("input") | ||
| for index, text in enumerate(_get_texts(embedding_input, model)): | ||
| # FIXME: this step should move to request attributes extractor if decoding is not necessary.# noqa: E501 | ||
| # input: Required[Union[str, List[str], List[int], List[List[int]]]] | ||
| # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding_create_params.py#L12 | ||
| # FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model | ||
| # names are not reliable (across OpenAI and Azure). | ||
| yield ( | ||
| ( | ||
| f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}." | ||
| f"{EmbeddingAttributes.EMBEDDING_TEXT}" | ||
| ), | ||
| text, | ||
| ) | ||
| raw_vector = getattr(embedding_item, "embedding", None) | ||
| if not raw_vector: | ||
| continue | ||
|
|
||
| def _get_attributes_from_embedding( | ||
| self, | ||
| embedding: object, | ||
| ) -> Iterator[Tuple[str, AttributeValue]]: | ||
| # openai.types.Embedding | ||
| # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding.py#L11 # noqa: E501 | ||
| if not (_vector := getattr(embedding, "embedding", None)): | ||
| return | ||
| if isinstance(_vector, Sequence) and len(_vector) and isinstance(_vector[0], float): | ||
| vector = list(_vector) | ||
| yield f"{EmbeddingAttributes.EMBEDDING_VECTOR}", vector | ||
| elif isinstance(_vector, str) and _vector and _NUMPY: | ||
| # FIXME: this step should be removed if decoding is not necessary. | ||
| try: | ||
| # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/resources/embeddings.py#L100 # noqa: E501 | ||
| vector = _NUMPY.frombuffer(base64.b64decode(_vector), dtype="float32").tolist() | ||
| except Exception: | ||
| logger.exception("Failed to decode embedding") | ||
| pass | ||
| else: | ||
| yield f"{EmbeddingAttributes.EMBEDDING_VECTOR}", vector | ||
| vector = None | ||
| # Check if it's a list of floats | ||
| if isinstance(raw_vector, (list, tuple)) and raw_vector: | ||
| if isinstance(raw_vector[0], (int, float)): | ||
| vector = list(raw_vector) | ||
| elif isinstance(raw_vector, str) and raw_vector: | ||
| # Base64-encoded vector (when encoding_format="base64") | ||
| try: | ||
| # Decode base64 to float32 array | ||
| decoded = base64.b64decode(raw_vector) | ||
| # Unpack as float32 values | ||
| num_floats = len(decoded) // 4 | ||
| vector = list(struct.unpack(f"{num_floats}f", decoded)) | ||
| except Exception: | ||
| # If decoding fails, skip this vector | ||
| continue | ||
|
|
||
| if vector: | ||
| yield ( | ||
| f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}.{EmbeddingAttributes.EMBEDDING_VECTOR}", | ||
| vector, | ||
| ) | ||
cursor[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| def _get_attributes_from_chat_completion_message( | ||
| self, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
just for my understanding here - what does the consistent name buy us rather than maybe naming based on function name.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
backends typically index the span name field for query and sometimes people do aggregates/auto-complete on the span name. For example, zipkin does auto-complete on span name, but not arbitrary attributes. Some tools dereive metrics from spans and need to aggregate on something, typically that's a span name. So, if something that is in a spec leaves the span name out, it ends up unable to be usable for things like this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
here's an oldie example of this, which is subverted when span names are subtly different for the same operation https://github.com/openzipkin/zipkin-api/blob/master/zipkin2-api.yaml#L44
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see. I mainly ask because I tend to face a fair amount of pressure to cram things like agent operations in names because there's a need for operators to groc the control flow.
With embedding's generation, though I think this makes a lot of sense
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed. I don't think agent ops are commodity yet maybe not for a long while. Wont go trying to normalize those ;)