Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions js/.changeset/lemon-geese-hear.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@arizeai/openinference-instrumentation-openai": minor
---

extract url for span attributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/**
* HTTP utilities for OpenTelemetry instrumentation
* JavaScript equivalent of opentelemetry-python-contrib/util/opentelemetry-util-http
* Minimal version containing only URL redaction functionality
*/

/**
* List of query parameters that should be redacted for security
*/
const PARAMS_TO_REDACT = [
"AWSAccessKeyId",
"Signature",
"sig",
"X-Goog-Signature",
];

/**
* Replaces the username and password with the keyword `REDACTED` in a URL
* Only modifies the URL if it is valid and contains credentials
* @param url The URL string to process
* @returns The URL with credentials redacted, or original URL if invalid
*/
export function removeUrlCredentials(url: string): string {
try {
const parsed = new URL(url);

// Check if URL has credentials
if (parsed.username || parsed.password) {
// Create new URL with redacted credentials
const newUrl = new URL(url);
newUrl.username = "REDACTED";
newUrl.password = "REDACTED";
return newUrl.toString();
}

return url;
} catch (error) {
// If URL parsing fails, return original URL
return url;
}
}

/**
* Redacts sensitive query parameter values in a URL
* @param url The URL string to process
* @returns The URL with sensitive query parameters redacted, or original URL if no changes needed
*/
export function redactQueryParameters(url: string): string {
try {
const parsed = new URL(url);

if (!parsed.search) {
// No query parameters to redact
return url;
}

const searchParams = new URLSearchParams(parsed.search);
let hasRedactedParams = false;

// Check if any parameters need redaction
for (const param of PARAMS_TO_REDACT) {
if (searchParams.has(param)) {
searchParams.set(param, "REDACTED");
hasRedactedParams = true;
}
}

if (!hasRedactedParams) {
return url;
}

// Reconstruct URL with redacted parameters
const newUrl = new URL(url);
newUrl.search = searchParams.toString();
return newUrl.toString();
} catch (error) {
// If URL parsing fails, return original URL
return url;
}
}

/**
* Redacts sensitive data from the URL, including credentials and query parameters
* @param url The URL string to process
* @returns The URL with all sensitive data redacted
*/
export function redactUrl(url: string): string {
let redactedUrl = removeUrlCredentials(url);
redactedUrl = redactQueryParameters(redactedUrl);
return redactedUrl;
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import {
} from "openai/resources";
import { assertUnreachable, isString } from "./typeUtils";
import { isTracingSuppressed } from "@opentelemetry/core";
import { redactUrl } from "./httpUtils";

import {
OITracer,
Expand Down Expand Up @@ -95,6 +96,73 @@ function getExecContext(span: Span) {
return execContext;
}

/**
* Extracts the base URL from an OpenAI client instance
* @param instance The OpenAI client instance (may be nested)
* @returns The base URL string, or undefined if not found
*/
function getBaseUrl(instance: unknown): string | undefined {
const client = instance as {
baseURL?: string;
_client?: { baseURL?: string };
};
return client.baseURL || client._client?.baseURL;
}

/**
* Extracts the relative path from a full URL given a base URL
* @param fullUrl The complete URL to extract the path from
* @param baseUrl The base URL to remove from the full URL
* @returns The relative path, or null if extraction fails
*/
const extractRelativePath = (
fullUrl: string,
baseUrl: string,
): string | null => {
try {
const [basePath, fullPath] = [
new URL(baseUrl).pathname,
new URL(fullUrl).pathname,
];
const path = fullPath.startsWith(basePath)
? fullPath.slice(basePath.length)
: fullPath;
return path.startsWith("/") ? path : `/${path}`;
} catch {
return null;
}
};

/**
* Adds back non-sensitive query parameters to a redacted URL
* @param redactedUrl The URL that has been redacted
* @param originalUrl The original URL containing parameters
* @param paramsToRestore Array of parameter names to restore (defaults to ["api-version"])
* @returns The redacted URL with specified parameters restored
*/
const addBackNonSensitiveParams = (
redactedUrl: string,
originalUrl: string,
paramsToRestore: string[] = ["api-version"],
): string => {
try {
const [original, redacted] = [new URL(originalUrl), new URL(redactedUrl)];
let hasChanges = false;

for (const param of paramsToRestore) {
const value = original.searchParams.get(param);
if (value) {
redacted.searchParams.set(param, value);
hasChanges = true;
}
}

return hasChanges ? redacted.toString() : redactedUrl;
} catch {
return redactedUrl;
}
};

/**
* Gets the appropriate LLM provider based on the OpenAI client instance
* Follows the same logic as the Python implementation by checking the baseURL host
Expand Down Expand Up @@ -256,6 +324,33 @@ export class OpenAIInstrumentation extends InstrumentationBase<typeof openai> {
// eslint-disable-next-line @typescript-eslint/no-this-alias
const instrumentation: OpenAIInstrumentation = this;

// Patch the buildURL method to capture URL information
this._wrap(
module.OpenAI.prototype,
"buildURL",
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(original: any): any => {
return function patchedBuildURL(this: unknown, ...args: unknown[]) {
const urlFull = original.apply(this, args) as string;
const activeSpan = trace.getActiveSpan();

if (!activeSpan) return urlFull;

const redactedUrl = redactUrl(urlFull);
const finalUrl = addBackNonSensitiveParams(redactedUrl, urlFull);
activeSpan.setAttribute("url.full", finalUrl);

const baseUrl = getBaseUrl(this);
const urlPath = baseUrl && extractRelativePath(urlFull, baseUrl);
if (urlPath) {
activeSpan.setAttribute("url.path", urlPath);
}

return urlFull;
};
},
);

// Patch create chat completions
type ChatCompletionCreateType =
typeof module.OpenAI.Chat.Completions.prototype.create;
Expand Down Expand Up @@ -614,6 +709,7 @@ export class OpenAIInstrumentation extends InstrumentationBase<typeof openai> {
moduleVersion?: string,
) {
diag.debug(`Removing patch for ${MODULE_NAME}@${moduleVersion}`);
this._unwrap(moduleExports.OpenAI.prototype, "buildURL");
this._unwrap(moduleExports.OpenAI.Chat.Completions.prototype, "create");
this._unwrap(moduleExports.OpenAI.Completions.prototype, "create");
this._unwrap(moduleExports.OpenAI.Embeddings.prototype, "create");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { redactUrl } from "../src/httpUtils";

describe("httpUtils", () => {
describe("redactUrl", () => {
it("should redact credentials from URLs", () => {
const url = "https://user:pass@api.openai.com/v1/chat/completions";
const result = redactUrl(url);
expect(result).toBe(
"https://REDACTED:REDACTED@api.openai.com/v1/chat/completions",
);
});

it("should redact sensitive query parameters", () => {
const url =
"https://api.example.com/chat?AWSAccessKeyId=secret&Signature=secret&model=gpt-4";
const result = redactUrl(url);
expect(result).toBe(
"https://api.example.com/chat?AWSAccessKeyId=REDACTED&Signature=REDACTED&model=gpt-4",
);
});

it("should redact both credentials and query parameters", () => {
const url =
"https://user:pass@api.example.com/chat?AWSAccessKeyId=secret&model=gpt-4";
const result = redactUrl(url);
expect(result).toBe(
"https://REDACTED:REDACTED@api.example.com/chat?AWSAccessKeyId=REDACTED&model=gpt-4",
);
});

it("should handle malformed URLs gracefully", () => {
const malformedUrl = "not-a-valid-url";
const result = redactUrl(malformedUrl);
expect(result).toBe(malformedUrl);
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,12 @@ describe("OpenAIInstrumentation", () => {
`);
});

it("should ensure buildURL method exists on OpenAI prototype", () => {
// This test will fail if OpenAI SDK removes buildURL method
// If this fails, update the instrumentation to use a different approach
expect(typeof OpenAI.prototype.buildURL).toBe("function");
});

it("should capture context attributes and add them to spans", async () => {
const response = {
id: "cmpl-8fZu1H3VijJUWev9asnxaYyQvJTC9",
Expand Down Expand Up @@ -1002,6 +1008,7 @@ describe("OpenAIInstrumentation", () => {
}
`);
});

it("creates a span for chat completions parse", async () => {
const response = {
id: "chatcmpl-parseTest",
Expand Down Expand Up @@ -1650,4 +1657,17 @@ describe("OpenAIInstrumentation with a custom tracer provider", () => {
expect(span.attributes["llm.model_name"]).toBe("gpt-3.5-turbo-0613");
});
});

describe("URL extraction", () => {
it("should detect Azure provider correctly", () => {
const azureClient = new OpenAI({
apiKey: "test-key",
baseURL:
"https://test-resource.openai.azure.com/openai/deployments/gpt-4",
});

// Just verify the client was created with Azure base URL
expect(azureClient.baseURL).toContain("openai.azure.com");
});
});
});
Loading