wip

GeLi2001 · GeLi2001 · commit fd654078bb4e · 2025-10-28T10:29:12.000-07:00
diff --git a/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py b/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py
@@ -10,6 +10,7 @@
 from openinference.instrumentation.google_genai._utils import (
     _as_input_attributes,
     _io_value_and_type,
+    _ValueAndType,
 )
 from openinference.semconv.trace import (
     ImageAttributes,
@@ -39,7 +40,7 @@ def get_attributes_from_request(
         yield SpanAttributes.LLM_PROVIDER, OpenInferenceLLMProviderValues.GOOGLE.value
         try:
             yield from _as_input_attributes(
-                _io_value_and_type(request_parameters),
+                self._get_phoenix_friendly_input_value(request_parameters),
             )
         except Exception:
             logger.exception(
@@ -459,12 +460,133 @@ def _flatten_parts(self, parts: list[Part]) -> Iterator[Tuple[str, AttributeValu
                 elif isinstance(value, str):
                     # Flatten all other string values into a single message content
                     content_values.append(value)
-                else:
-                    # TODO: Handle other types of parts
-                    logger.debug(f"Non-text part encountered: {part}")
+            else:
+                # TODO: Handle other types of parts
+                logger.debug(f"Non-text part encountered: {part}")
         if content_values:
             yield (MessageAttributes.MESSAGE_CONTENT, "\n\n".join(content_values))
 
+    def _get_phoenix_friendly_input_value(self, request_parameters: Any) -> _ValueAndType:
+        """
+        Create a Phoenix-friendly input value by replacing binary data with descriptive text.
+        This ensures the Phoenix UI shows readable content instead of binary data.
+        """
+        try:
+            # First try the standard approach for non-binary content
+            if not isinstance(request_parameters, Mapping):
+                return _io_value_and_type(request_parameters)
+
+            # Check if this request contains binary data (images/files)
+            contents = request_parameters.get("contents")
+            if not contents:
+                return _io_value_and_type(request_parameters)
+
+            # Create a copy of request parameters to modify
+            cleaned_params = dict(request_parameters)
+
+            # Process contents to replace binary data with descriptive text
+            if hasattr(contents, "parts"):
+                # Single Content object
+                cleaned_params["contents"] = self._clean_content_for_display(contents)
+            elif isinstance(contents, (list, tuple)):
+                # List of Content objects
+                cleaned_params["contents"] = [
+                    self._clean_content_for_display(content)
+                    if hasattr(content, "parts")
+                    else content
+                    for content in contents
+                ]
+
+            # Use the standard processing on the cleaned parameters
+            return _io_value_and_type(cleaned_params)
+
+        except Exception:
+            logger.exception(
+                "Failed to create Phoenix-friendly input value, falling back to default"
+            )
+            return _io_value_and_type(request_parameters)
+
+    def _clean_content_for_display(self, content: Any) -> Dict[str, Any]:
+        """Clean a Content object by replacing binary data with descriptive text."""
+        try:
+            # Create a simplified representation
+            result = {"role": get_attribute(content, "role", "user"), "parts": []}
+
+            parts = get_attribute(content, "parts", [])
+            for part in parts:
+                if text := get_attribute(part, "text"):
+                    result["parts"].append({"text": text})
+                elif inline_data := get_attribute(part, "inline_data"):
+                    mime_type = get_attribute(inline_data, "mime_type", "unknown") or "unknown"
+                    data = get_attribute(inline_data, "data")
+
+                    if mime_type.startswith("image/"):
+                        # For images, include the actual data URL so Phoenix can display them
+                        if data:
+                            import base64
+
+                            # Handle both bytes and string data properly
+                            if isinstance(data, bytes):
+                                base64_data = base64.b64encode(data).decode()
+                            elif isinstance(data, str):
+                                # Assume it's already base64 encoded
+                                base64_data = data
+                            else:
+                                # Convert other types to string and base64 encode
+                                base64_data = base64.b64encode(str(data).encode()).decode()
+
+                            data_url = f"data:{mime_type};base64,{base64_data}"
+                            result["parts"].append(
+                                {
+                                    "inline_data": {
+                                        "mime_type": mime_type,
+                                        "data_url": data_url,  # Phoenix-friendly image URL
+                                        "description": f"Image ({mime_type})",
+                                    }
+                                }
+                            )
+                        else:
+                            result["parts"].append(
+                                {
+                                    "inline_data": {
+                                        "mime_type": mime_type,
+                                        "description": f"[Image: {mime_type}, no data]",
+                                    }
+                                }
+                            )
+                    else:
+                        try:
+                            data_size_value = len(data) if data else 0
+                            data_size_str = str(data_size_value)
+                        except (TypeError, AttributeError):
+                            data_size_str = "unknown"
+                        result["parts"].append(
+                            {
+                                "inline_data": {
+                                    "mime_type": mime_type,
+                                    "description": f"[File data: {mime_type}, {data_size_str} bytes]",  # noqa: E501
+                                }
+                            }
+                        )
+                elif file_data := get_attribute(part, "file_data"):
+                    file_uri = get_attribute(file_data, "file_uri", "unknown") or "unknown"
+                    mime_type = get_attribute(file_data, "mime_type", "unknown") or "unknown"
+                    result["parts"].append(
+                        {"file_data": {"file_uri": file_uri, "mime_type": mime_type}}
+                    )
+                elif function_call := get_attribute(part, "function_call"):
+                    result["parts"].append({"function_call": str(function_call)})
+                elif function_response := get_attribute(part, "function_response"):
+                    result["parts"].append({"function_response": str(function_response)})
+                else:
+                    result["parts"].append({"unknown_part": str(type(part))})
+
+            return result
+
+        except Exception:
+            logger.exception("Failed to clean content for display")
+            return {"role": "user", "parts": [{"error": "Failed to process content"}]}
+
     def _extract_tool_call_index(self, attr: str) -> int:
         """Extract tool call index from message tool call attribute key.