Arize-ai · axiomofjoy · Sep 22, 2025 · Sep 2, 2025 · Sep 17, 2025 · Sep 17, 2025
diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 <p align="center">
-    <img alt="OpenInfence" src="https://raw.githubusercontent.com/Arize-ai/phoenix-assets/main/logos/OpenInference/Full%20color/OI-full-horiz.svg" width="40%" height="100%" />
+    <img alt="OpenInference" src="https://raw.githubusercontent.com/Arize-ai/phoenix-assets/main/logos/OpenInference/Full%20color/OI-full-horiz.svg" width="40%" height="100%" />
 </p>
 <p align="center">
     <a href="https://arize-ai.github.io/openinference/spec/">

@@ -302,8 +302,12 @@ def __call__(
             return wrapped(*args, **kwargs)
         try:
             cast_to, request_parameters = _parse_request_args(args)
-            # E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion"
-            span_name: str = cast_to.__name__.split(".")[-1]
+            # Use consistent span names: "CreateEmbeddings" for embeddings, class name for others
+            if cast_to is self._openai.types.CreateEmbeddingResponse:
+                span_name = "CreateEmbeddings"
+            else:
+                # E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion"
+                span_name = cast_to.__name__.split(".")[-1]
         except Exception:
             logger.exception("Failed to parse request args")
             return wrapped(*args, **kwargs)
@@ -359,8 +363,12 @@ async def __call__(
             return await wrapped(*args, **kwargs)
         try:
             cast_to, request_parameters = _parse_request_args(args)
-            # E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion"
-            span_name: str = cast_to.__name__.split(".")[-1]
+            # Use consistent span names: "CreateEmbeddings" for embeddings, class name for others
+            if cast_to is self._openai.types.CreateEmbeddingResponse:
+                span_name = "CreateEmbeddings"
+            else:
+                # E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion"
+                span_name = cast_to.__name__.split(".")[-1]
         except Exception:
             logger.exception("Failed to parse request args")
             return await wrapped(*args, **kwargs)

@@ -22,13 +22,17 @@
 from openinference.instrumentation.openai._attributes._responses_api import _ResponsesApiAttributes
 from openinference.instrumentation.openai._utils import _get_openai_version
 from openinference.semconv.trace import (
+    EmbeddingAttributes,
     ImageAttributes,
     MessageAttributes,
     MessageContentAttributes,
     SpanAttributes,
     ToolCallAttributes,
 )
 
+# TODO: Update to use SpanAttributes.EMBEDDING_INVOCATION_PARAMETERS when released in semconv
+_EMBEDDING_INVOCATION_PARAMETERS = "embedding.invocation_parameters"
+
 if TYPE_CHECKING:
     from openai.types import Completion, CreateEmbeddingResponse
     from openai.types.chat import ChatCompletion
@@ -204,14 +208,32 @@ def _get_attributes_from_image(
 def _get_attributes_from_completion_create_param(
     params: Mapping[str, Any],
 ) -> Iterator[Tuple[str, AttributeValue]]:
-    # openai.types.completion_create_params.CompletionCreateParamsBase
-    # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_create_params.py#L11  # noqa: E501
+    """
+    Extract attributes from parameters for the LEGACY completions API.
+
+    The legacy completions API supports:
+    - Single prompt: client.completions.create(prompt="text")
+    - Batch prompts: client.completions.create(prompt=["text1", "text2"])
+      where each prompt generates a separate completion
+
+    See: https://github.com/openai/openai-python/blob/7da727a4a3eb35306c328e2c3207a1618ed1809f/src/openai/types/completion_create_params.py#L18-L25
+    """
     if not isinstance(params, Mapping):
         return
     invocation_params = dict(params)
     invocation_params.pop("prompt", None)
     yield SpanAttributes.LLM_INVOCATION_PARAMETERS, safe_json_dumps(invocation_params)
 
+    model_prompt = params.get("prompt")
+    if isinstance(model_prompt, str):
+        yield SpanAttributes.LLM_PROMPTS, [model_prompt]
+    elif (
+        isinstance(model_prompt, list)
+        and model_prompt
+        and all(isinstance(item, str) for item in model_prompt)
+    ):
+        yield SpanAttributes.LLM_PROMPTS, model_prompt
+
 
 def _get_attributes_from_embedding_create_param(
     params: Mapping[str, Any],
@@ -222,7 +244,26 @@ def _get_attributes_from_embedding_create_param(
         return
     invocation_params = dict(params)
     invocation_params.pop("input", None)
-    yield SpanAttributes.LLM_INVOCATION_PARAMETERS, safe_json_dumps(invocation_params)
+    yield _EMBEDDING_INVOCATION_PARAMETERS, safe_json_dumps(invocation_params)
+
+    # Extract text from embedding input - only records text, not token IDs
+    embedding_input = params.get("input")
+    if embedding_input is not None:
+        if isinstance(embedding_input, str):
+            # Single string input
+            yield (
+                f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.0.{EmbeddingAttributes.EMBEDDING_TEXT}",
+                embedding_input,
+            )
+        elif isinstance(embedding_input, list) and embedding_input:
+            # Check if it's a list of strings (not tokens)
+            if all(isinstance(item, str) for item in embedding_input):
+                # List of strings
+                for index, text in enumerate(embedding_input):
+                    yield (
+                        f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}.{EmbeddingAttributes.EMBEDDING_TEXT}",
+                        text,
+                    )
 
 
 T = TypeVar("T", bound=type)

@@ -2,24 +2,22 @@
 
 import base64
 import logging
-from importlib import import_module
+import struct
 from types import ModuleType
 from typing import (
     TYPE_CHECKING,
     Any,
     Iterable,
     Iterator,
     Mapping,
-    Optional,
-    Sequence,
     Tuple,
     Type,
 )
 
 from opentelemetry.util.types import AttributeValue
 
 from openinference.instrumentation.openai._attributes._responses_api import _ResponsesApiAttributes
-from openinference.instrumentation.openai._utils import _get_openai_version, _get_texts
+from openinference.instrumentation.openai._utils import _get_openai_version
 from openinference.semconv.trace import (
     EmbeddingAttributes,
     MessageAttributes,
@@ -37,11 +35,6 @@
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
 
-try:
-    _NUMPY: Optional[ModuleType] = import_module("numpy")
-except ImportError:
-    _NUMPY = None
-
 
 class _ResponseAttributesExtractor:
     __slots__ = (
@@ -79,12 +72,10 @@ def get_attributes_from_response(
         elif isinstance(response, self._create_embedding_response_type):
             yield from self._get_attributes_from_create_embedding_response(
                 response=response,
-                request_parameters=request_parameters,
             )
         elif isinstance(response, self._completion_type):
             yield from self._get_attributes_from_completion(
                 completion=response,
-                request_parameters=request_parameters,
             )
 
     def _get_attributes_from_responses_response(
@@ -116,26 +107,16 @@ def _get_attributes_from_chat_completion(
     def _get_attributes_from_completion(
         self,
         completion: "Completion",
-        request_parameters: Mapping[str, Any],
     ) -> Iterator[Tuple[str, AttributeValue]]:
         # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion.py#L13  # noqa: E501
         if model := getattr(completion, "model", None):
             yield SpanAttributes.LLM_MODEL_NAME, model
         if usage := getattr(completion, "usage", None):
             yield from self._get_attributes_from_completion_usage(usage)
-        if model_prompt := request_parameters.get("prompt"):
-            # FIXME: this step should move to request attributes extractor if decoding is not necessary.# noqa: E501
-            # prompt: Required[Union[str, List[str], List[int], List[List[int]], None]]
-            # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_create_params.py#L38
-            # FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model
-            # names are not reliable (across OpenAI and Azure).
-            if prompts := list(_get_texts(model_prompt, model)):
-                yield SpanAttributes.LLM_PROMPTS, prompts
 
     def _get_attributes_from_create_embedding_response(
         self,
         response: "CreateEmbeddingResponse",
-        request_parameters: Mapping[str, Any],
     ) -> Iterator[Tuple[str, AttributeValue]]:
         # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/create_embedding_response.py#L20  # noqa: E501
         if usage := getattr(response, "usage", None):
@@ -144,48 +125,39 @@ def _get_attributes_from_create_embedding_response(
         if model := getattr(response, "model"):
             yield f"{SpanAttributes.EMBEDDING_MODEL_NAME}", model
         if (data := getattr(response, "data", None)) and isinstance(data, Iterable):
-            for embedding in data:
-                if (index := getattr(embedding, "index", None)) is None:
+            # Extract embedding vectors using the explicit index from each embedding object
+            for embedding_item in data:
+                # Use the explicit index field from the API response
+                index = getattr(embedding_item, "index", None)
+                if index is None:
                     continue
-                for key, value in self._get_attributes_from_embedding(embedding):
-                    yield f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}.{key}", value
 
-        embedding_input = request_parameters.get("input")
-        for index, text in enumerate(_get_texts(embedding_input, model)):
-            # FIXME: this step should move to request attributes extractor if decoding is not necessary.# noqa: E501
-            # input: Required[Union[str, List[str], List[int], List[List[int]]]]
-            # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding_create_params.py#L12
-            # FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model
-            # names are not reliable (across OpenAI and Azure).
-            yield (
-                (
-                    f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}."
-                    f"{EmbeddingAttributes.EMBEDDING_TEXT}"
-                ),
-                text,
-            )
+                raw_vector = getattr(embedding_item, "embedding", None)
+                if not raw_vector:
+                    continue
 
-    def _get_attributes_from_embedding(
-        self,
-        embedding: object,
-    ) -> Iterator[Tuple[str, AttributeValue]]:
-        # openai.types.Embedding
-        # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding.py#L11  # noqa: E501
-        if not (_vector := getattr(embedding, "embedding", None)):
-            return
-        if isinstance(_vector, Sequence) and len(_vector) and isinstance(_vector[0], float):
-            vector = list(_vector)
-            yield f"{EmbeddingAttributes.EMBEDDING_VECTOR}", vector
-        elif isinstance(_vector, str) and _vector and _NUMPY:
-            # FIXME: this step should be removed if decoding is not necessary.
-            try:
-                # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/resources/embeddings.py#L100  # noqa: E501
-                vector = _NUMPY.frombuffer(base64.b64decode(_vector), dtype="float32").tolist()
-            except Exception:
-                logger.exception("Failed to decode embedding")
-                pass
-            else:
-                yield f"{EmbeddingAttributes.EMBEDDING_VECTOR}", vector
+                vector = None
+                # Check if it's a list of floats
+                if isinstance(raw_vector, (list, tuple)) and raw_vector:
+                    if isinstance(raw_vector[0], (int, float)):
+                        vector = list(raw_vector)
+                elif isinstance(raw_vector, str) and raw_vector:
+                    # Base64-encoded vector (when encoding_format="base64")
+                    try:
+                        # Decode base64 to float32 array
+                        decoded = base64.b64decode(raw_vector)
+                        # Unpack as float32 values
+                        num_floats = len(decoded) // 4
+                        vector = list(struct.unpack(f"{num_floats}f", decoded))
+                    except Exception:
+                        # If decoding fails, skip this vector
+                        continue
+
+                if vector:
+                    yield (
+                        f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}.{EmbeddingAttributes.EMBEDDING_VECTOR}",
+                        vector,
+                    )
 
     def _get_attributes_from_chat_completion_message(
         self,

@@ -5,14 +5,12 @@
 from typing import (
     Any,
     Iterator,
-    List,
     Mapping,
     NamedTuple,
     Optional,
     Protocol,
     Sequence,
     Tuple,
-    Union,
     cast,
 )
 
@@ -110,26 +108,3 @@ def _finish_tracing(
         )
     except Exception:
         logger.exception("Failed to finish tracing")
-
-
-def _get_texts(
-    model_input: Optional[Union[str, List[str], List[int], List[List[int]]]],
-    model: Optional[str],
-) -> Iterator[str]:
-    if not model_input:
-        return
-    if isinstance(model_input, str):
-        text = model_input
-        yield text
-        return
-    if not isinstance(model_input, Sequence):
-        return
-    if any(not isinstance(item, str) for item in model_input):
-        # FIXME: We can't decode tokens (List[int]) reliably because the model name is not reliable,
-        # e.g. for text-embedding-ada-002 (cl100k_base), OpenAI returns "text-embedding-ada-002-v2",
-        # and Azure returns "ada", which refers to a different model (r50k_base). We could use the
-        # request model name instead, but that doesn't work for Azure because Azure uses the
-        # deployment name (which differs from the model name).
-        return
-    for text in cast(List[str], model_input):
-        yield text