Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<p align="center">
<img alt="OpenInfence" src="https://raw.githubusercontent.com/Arize-ai/phoenix-assets/main/logos/OpenInference/Full%20color/OI-full-horiz.svg" width="40%" height="100%" />
<img alt="OpenInference" src="https://raw.githubusercontent.com/Arize-ai/phoenix-assets/main/logos/OpenInference/Full%20color/OI-full-horiz.svg" width="40%" height="100%" />
</p>
<p align="center">
<a href="https://arize-ai.github.io/openinference/spec/">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,12 @@ def __call__(
return wrapped(*args, **kwargs)
try:
cast_to, request_parameters = _parse_request_args(args)
# E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion"
span_name: str = cast_to.__name__.split(".")[-1]
# Use consistent span names: "CreateEmbeddings" for embeddings, class name for others
if cast_to is self._openai.types.CreateEmbeddingResponse:
span_name = "CreateEmbeddings"
Comment on lines +305 to +307
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just for my understanding here - what does the consistent name buy us rather than maybe naming based on function name.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

backends typically index the span name field for query and sometimes people do aggregates/auto-complete on the span name. For example, zipkin does auto-complete on span name, but not arbitrary attributes. Some tools dereive metrics from spans and need to aggregate on something, typically that's a span name. So, if something that is in a spec leaves the span name out, it ends up unable to be usable for things like this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here's an oldie example of this, which is subverted when span names are subtly different for the same operation https://github.com/openzipkin/zipkin-api/blob/master/zipkin2-api.yaml#L44

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. I mainly ask because I tend to face a fair amount of pressure to cram things like agent operations in names because there's a need for operators to groc the control flow.

With embedding's generation, though I think this makes a lot of sense

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. I don't think agent ops are commodity yet maybe not for a long while. Wont go trying to normalize those ;)

else:
# E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion"
span_name = cast_to.__name__.split(".")[-1]
except Exception:
logger.exception("Failed to parse request args")
return wrapped(*args, **kwargs)
Expand Down Expand Up @@ -359,8 +363,12 @@ async def __call__(
return await wrapped(*args, **kwargs)
try:
cast_to, request_parameters = _parse_request_args(args)
# E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion"
span_name: str = cast_to.__name__.split(".")[-1]
# Use consistent span names: "CreateEmbeddings" for embeddings, class name for others
if cast_to is self._openai.types.CreateEmbeddingResponse:
span_name = "CreateEmbeddings"
else:
# E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion"
span_name = cast_to.__name__.split(".")[-1]
except Exception:
logger.exception("Failed to parse request args")
return await wrapped(*args, **kwargs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,17 @@
from openinference.instrumentation.openai._attributes._responses_api import _ResponsesApiAttributes
from openinference.instrumentation.openai._utils import _get_openai_version
from openinference.semconv.trace import (
EmbeddingAttributes,
ImageAttributes,
MessageAttributes,
MessageContentAttributes,
SpanAttributes,
ToolCallAttributes,
)

# TODO: Update to use SpanAttributes.EMBEDDING_INVOCATION_PARAMETERS when released in semconv
_EMBEDDING_INVOCATION_PARAMETERS = "embedding.invocation_parameters"

if TYPE_CHECKING:
from openai.types import Completion, CreateEmbeddingResponse
from openai.types.chat import ChatCompletion
Expand Down Expand Up @@ -204,14 +208,32 @@ def _get_attributes_from_image(
def _get_attributes_from_completion_create_param(
params: Mapping[str, Any],
) -> Iterator[Tuple[str, AttributeValue]]:
# openai.types.completion_create_params.CompletionCreateParamsBase
# See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_create_params.py#L11 # noqa: E501
"""
Extract attributes from parameters for the LEGACY completions API.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here's a rewording of the comments which I think better explains the prompt union weirdness


The legacy completions API supports:
- Single prompt: client.completions.create(prompt="text")
- Batch prompts: client.completions.create(prompt=["text1", "text2"])
where each prompt generates a separate completion

See: https://github.com/openai/openai-python/blob/7da727a4a3eb35306c328e2c3207a1618ed1809f/src/openai/types/completion_create_params.py#L18-L25
"""
if not isinstance(params, Mapping):
return
invocation_params = dict(params)
invocation_params.pop("prompt", None)
yield SpanAttributes.LLM_INVOCATION_PARAMETERS, safe_json_dumps(invocation_params)

model_prompt = params.get("prompt")
if isinstance(model_prompt, str):
yield SpanAttributes.LLM_PROMPTS, [model_prompt]
elif (
isinstance(model_prompt, list)
and model_prompt
and all(isinstance(item, str) for item in model_prompt)
):
yield SpanAttributes.LLM_PROMPTS, model_prompt


def _get_attributes_from_embedding_create_param(
params: Mapping[str, Any],
Expand All @@ -222,7 +244,26 @@ def _get_attributes_from_embedding_create_param(
return
invocation_params = dict(params)
invocation_params.pop("input", None)
yield SpanAttributes.LLM_INVOCATION_PARAMETERS, safe_json_dumps(invocation_params)
yield _EMBEDDING_INVOCATION_PARAMETERS, safe_json_dumps(invocation_params)

# Extract text from embedding input - only records text, not token IDs
embedding_input = params.get("input")
if embedding_input is not None:
if isinstance(embedding_input, str):
# Single string input
yield (
f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.0.{EmbeddingAttributes.EMBEDDING_TEXT}",
embedding_input,
)
elif isinstance(embedding_input, list) and embedding_input:
# Check if it's a list of strings (not tokens)
if all(isinstance(item, str) for item in embedding_input):
# List of strings
for index, text in enumerate(embedding_input):
yield (
f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}.{EmbeddingAttributes.EMBEDDING_TEXT}",
text,
)


T = TypeVar("T", bound=type)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,22 @@

import base64
import logging
from importlib import import_module
import struct
from types import ModuleType
from typing import (
TYPE_CHECKING,
Any,
Iterable,
Iterator,
Mapping,
Optional,
Sequence,
Tuple,
Type,
)

from opentelemetry.util.types import AttributeValue

from openinference.instrumentation.openai._attributes._responses_api import _ResponsesApiAttributes
from openinference.instrumentation.openai._utils import _get_openai_version, _get_texts
from openinference.instrumentation.openai._utils import _get_openai_version
from openinference.semconv.trace import (
EmbeddingAttributes,
MessageAttributes,
Expand All @@ -37,11 +35,6 @@
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())

try:
_NUMPY: Optional[ModuleType] = import_module("numpy")
except ImportError:
_NUMPY = None

Comment on lines -40 to -44
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I honestly don't remember why this was needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

originally it was doing this

                vector = _NUMPY.frombuffer(base64.b64decode(_vector), dtype="float32").tolist()

but we have test cases to prove we can get the vectors at the moment without it. If we need to re-introduce it we probably should with a failing test


class _ResponseAttributesExtractor:
__slots__ = (
Expand Down Expand Up @@ -79,12 +72,10 @@ def get_attributes_from_response(
elif isinstance(response, self._create_embedding_response_type):
yield from self._get_attributes_from_create_embedding_response(
response=response,
request_parameters=request_parameters,
)
elif isinstance(response, self._completion_type):
yield from self._get_attributes_from_completion(
completion=response,
request_parameters=request_parameters,
)

def _get_attributes_from_responses_response(
Expand Down Expand Up @@ -116,26 +107,16 @@ def _get_attributes_from_chat_completion(
def _get_attributes_from_completion(
self,
completion: "Completion",
request_parameters: Mapping[str, Any],
) -> Iterator[Tuple[str, AttributeValue]]:
# See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion.py#L13 # noqa: E501
if model := getattr(completion, "model", None):
yield SpanAttributes.LLM_MODEL_NAME, model
if usage := getattr(completion, "usage", None):
yield from self._get_attributes_from_completion_usage(usage)
if model_prompt := request_parameters.get("prompt"):
# FIXME: this step should move to request attributes extractor if decoding is not necessary.# noqa: E501
# prompt: Required[Union[str, List[str], List[int], List[List[int]], None]]
# See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_create_params.py#L38
# FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model
# names are not reliable (across OpenAI and Azure).
if prompts := list(_get_texts(model_prompt, model)):
yield SpanAttributes.LLM_PROMPTS, prompts

def _get_attributes_from_create_embedding_response(
self,
response: "CreateEmbeddingResponse",
request_parameters: Mapping[str, Any],
) -> Iterator[Tuple[str, AttributeValue]]:
# See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/create_embedding_response.py#L20 # noqa: E501
if usage := getattr(response, "usage", None):
Expand All @@ -144,48 +125,39 @@ def _get_attributes_from_create_embedding_response(
if model := getattr(response, "model"):
yield f"{SpanAttributes.EMBEDDING_MODEL_NAME}", model
if (data := getattr(response, "data", None)) and isinstance(data, Iterable):
for embedding in data:
if (index := getattr(embedding, "index", None)) is None:
# Extract embedding vectors using the explicit index from each embedding object
for embedding_item in data:
# Use the explicit index field from the API response
index = getattr(embedding_item, "index", None)
if index is None:
continue
for key, value in self._get_attributes_from_embedding(embedding):
yield f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}.{key}", value

embedding_input = request_parameters.get("input")
for index, text in enumerate(_get_texts(embedding_input, model)):
# FIXME: this step should move to request attributes extractor if decoding is not necessary.# noqa: E501
# input: Required[Union[str, List[str], List[int], List[List[int]]]]
# See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding_create_params.py#L12
# FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model
# names are not reliable (across OpenAI and Azure).
yield (
(
f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}."
f"{EmbeddingAttributes.EMBEDDING_TEXT}"
),
text,
)
raw_vector = getattr(embedding_item, "embedding", None)
if not raw_vector:
continue

def _get_attributes_from_embedding(
self,
embedding: object,
) -> Iterator[Tuple[str, AttributeValue]]:
# openai.types.Embedding
# See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding.py#L11 # noqa: E501
if not (_vector := getattr(embedding, "embedding", None)):
return
if isinstance(_vector, Sequence) and len(_vector) and isinstance(_vector[0], float):
vector = list(_vector)
yield f"{EmbeddingAttributes.EMBEDDING_VECTOR}", vector
elif isinstance(_vector, str) and _vector and _NUMPY:
# FIXME: this step should be removed if decoding is not necessary.
try:
# See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/resources/embeddings.py#L100 # noqa: E501
vector = _NUMPY.frombuffer(base64.b64decode(_vector), dtype="float32").tolist()
except Exception:
logger.exception("Failed to decode embedding")
pass
else:
yield f"{EmbeddingAttributes.EMBEDDING_VECTOR}", vector
vector = None
# Check if it's a list of floats
if isinstance(raw_vector, (list, tuple)) and raw_vector:
if isinstance(raw_vector[0], (int, float)):
vector = list(raw_vector)
elif isinstance(raw_vector, str) and raw_vector:
# Base64-encoded vector (when encoding_format="base64")
try:
# Decode base64 to float32 array
decoded = base64.b64decode(raw_vector)
# Unpack as float32 values
num_floats = len(decoded) // 4
vector = list(struct.unpack(f"{num_floats}f", decoded))
except Exception:
# If decoding fails, skip this vector
continue

if vector:
yield (
f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}.{EmbeddingAttributes.EMBEDDING_VECTOR}",
vector,
)

def _get_attributes_from_chat_completion_message(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@
from typing import (
Any,
Iterator,
List,
Mapping,
NamedTuple,
Optional,
Protocol,
Sequence,
Tuple,
Union,
cast,
)

Expand Down Expand Up @@ -110,26 +108,3 @@ def _finish_tracing(
)
except Exception:
logger.exception("Failed to finish tracing")


def _get_texts(
model_input: Optional[Union[str, List[str], List[int], List[List[int]]]],
model: Optional[str],
) -> Iterator[str]:
if not model_input:
return
if isinstance(model_input, str):
text = model_input
yield text
return
if not isinstance(model_input, Sequence):
return
if any(not isinstance(item, str) for item in model_input):
# FIXME: We can't decode tokens (List[int]) reliably because the model name is not reliable,
# e.g. for text-embedding-ada-002 (cl100k_base), OpenAI returns "text-embedding-ada-002-v2",
# and Azure returns "ada", which refers to a different model (r50k_base). We could use the
# request model name instead, but that doesn't work for Azure because Azure uses the
# deployment name (which differs from the model name).
return
for text in cast(List[str], model_input):
yield text
Loading