Skip to content

fix(embeddings): correctly decode base64 data #1448

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1295,7 +1295,10 @@ export const toBase64 = (str: string | null | undefined): string => {
export const toFloat32Array = (base64Str: string): Array<number> => {
if (typeof Buffer !== 'undefined') {
// for Node.js environment
return Array.from(new Float32Array(Buffer.from(base64Str, 'base64').buffer));
const buf = Buffer.from(base64Str, 'base64');
return Array.from(
new Float32Array(buf.buffer, buf.byteOffset, buf.length / Float32Array.BYTES_PER_ELEMENT),
);
} else {
// for legacy web platform APIs
const binaryStr = atob(base64Str);
Expand Down
1 change: 1 addition & 0 deletions tests/api-resources/embeddings-base64-response.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"object":"list","data":[{"object":"embedding","index":0,"embedding":"A1fLvaC4Bb0QB7w8yEvrPOm9Xj2r0yA8EW4sPRq75j3Fbiq81/chPumAGb0afqG8R6AFvpzsQT35SPO7Hi39PEMAir1lf0A92McfvRoVlLxQv9o9tHqIvQYlrL0fwlK8sufPPYz2gjzH5Ho93GebvN+eCTxjRjW8PJRKvXMtFD4+n3C9ByMPO39Gkjs1Jm49A1fLPdNXpjv8RLm92McfveKpLz01VNO9SUIevhAHvD0flG09+9srvW5j7Txp8dY8LW4Ju08bJb1GdL29g+aNPWlLBD1p8dY8LkCkvfPLtjxcBj4+1/ehPebv/bz/Ifo8SqkOvREFHzyAr588HbUPPbFS+r00gri825WAPQlcGj1qHZ+8o8EOPo880Tn5dli9zRUSPc2APD0b5RG9mhxEvTyUSj3FQMU95u/9vE20tD3wwBC94NmxvXSUhL3Ofh8904WLPRbeJb2Paja8BClmvhwgOj2e6Ic9em0LPdj1BD3lSau7dJQEPJi107yB6kc97sTKO6lAaD2YDwE9YDuPPSFVC735dtg9SK1IOysJNrwtQkE8BmJxPb2ZXT0hVYs9g+YNvLfuuz2nyhe9z7nHN5UVWDxea5E77F1avTIbyL256oG9ft+hPVWJAbwNoug82TCtvUrm072wgN86JPWGO3TRyTwOY4a8xJwPvkx5DL1f1B68RwkTvja7Q72BrQI9Pfs6PTdfeb3RxG09jJxVvfl22D3eCbQ9FbR6vTPtYrn0mzS+kqGkPDxXhbwyG8i98M9wveayuL1EpL88lNqvve3yL70RQmQ7VcZGPaPBjr1wyEA9fKaWOskMibwNomi8J9Rku9EeGz016Si8O1mivQ38lb0EgxO88P1VvcilmLuNA0a9lj8DvHCceD3lSSs9uFWsve6HBT6XEZ68ShS5PFJSE70dTIK86OvDvSNgsbzS8DU8bPz8PAuVpTxKQIE9/NmOPBhFFj7LsL67PJRKvIxu8LwSqVS8D8yTPSOOlj1g0gG8A+69vYz2AjxPhLK80fLSPbrL/LztWz09LAcZvqfKF73B/JO8lnzIvCk5OLxwMU69dmQCvQtp3bs6hwe9WZKKume4S7x3CLg9zK4hPLsjDT16P6a7MbTXPRp+IT0dtQ89GayGvcngwD2F8bO70R4bu8tFlDxcBr67xAWdvdnWfzzQTIC9zn6fPYSKwz3alx28h8GxPW74wj3eNxk+xUBFvIpjyj0WdRi9AkoIPXhvqLugx+U8F0ezvUlCHjx3NAC9uvlhPEOmXD36oAM9D56uvddgrz2giiC9GhWUvHrWGLv0yRk8fOPbvMc+KLs7//S8v5UjPJUV2D0KLjW6YKa5PDciNDuJznQ9USZLPQ=="}],"model":"text-embedding-3-large","usage":{"prompt_tokens":1,"total_tokens":1}}
1 change: 1 addition & 0 deletions tests/api-resources/embeddings-float-response.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"object":"list","data":[{"object":"embedding","index":0,"embedding":[-0.099287055,-0.032646775,0.022952586,0.028722659,0.05438033,0.009816091,0.042097155,0.112661555,-0.010402386,0.158172,-0.037476454,-0.01971345,-0.13049422,0.04734479,-0.0074244705,0.030905303,-0.06738331,0.046996493,-0.039008945,-0.018076468,0.10681021,-0.06664029,-0.08405499,-0.012863665,0.10151614,0.015986703,0.061253335,-0.018970422,0.008399694,-0.011064145,-0.049457774,0.14470463,-0.058745615,0.0021840946,0.00446397,0.058141906,0.099287055,0.0050763874,-0.09046361,-0.039008945,0.042886622,-0.103187956,-0.15454973,0.091810346,0.058002587,-0.041957837,0.028978076,0.02623816,-0.002097021,-0.040309247,-0.09250693,0.06928732,0.03229848,0.02623816,-0.08020054,0.022314047,0.18557113,0.079086,-0.030998182,0.030533789,-0.034829415,0.009705798,0.019492865,0.035084832,-0.122228034,-0.022523023,0.06278583,0.037685428,-0.019423205,0.13941054,0.00039908706,-0.052847836,0.035665322,0.04602127,-0.035618883,-0.04787884,0.049457774,0.096314944,-0.030998182,0.08823452,-0.03534025,-0.086841345,-0.06473628,0.03893929,0.06812634,-0.040495,-0.011133804,-0.22476584,0.045440778,0.06636165,0.03403995,0.032461017,-0.005227315,0.008092035,-0.025843427,0.048807625,0.0061880266,0.05670229,0.031509012,0.06993747,-0.034016732,0.10569567,0.0030620862,-0.011110584,0.011795563,0.058931373,0.054101694,0.068033464,-0.008660915,0.091763906,-0.0370585,0.000023809172,0.013188739,0.004437848,-0.053312227,-0.09770812,-0.06343598,0.07903956,-0.007906278,0.028397584,-0.084565826,-0.103466585,0.0017051902,0.0041185785,0.024636008,-0.016404655,-0.14024645,-0.034295365,-0.009694188,-0.14359008,-0.04778596,0.031903747,0.045649756,-0.06088182,0.058049027,-0.052151248,0.10569567,0.087909445,-0.061206896,-0.00021641403,-0.17637616,0.020096574,-0.016276948,-0.09770812,-0.058792055,-0.09018497,0.023393758,-0.08586612,-0.04295628,0.0034829418,0.048528988,-0.06970527,0.047066152,0.0011493708,-0.01672973,-0.014198792,-0.0034916492,0.037871186,-0.010309507,-0.079271756,-0.073234655,-0.0090034045,-0.052244127,-0.0046584345,-0.04834323,-0.008010766,0.060696065,0.04181852,-0.08414787,0.13040134,-0.019295497,0.022592682,-0.03596718,-0.015905434,-0.0956648,-0.021652287,0.011104779,0.030882083,0.02021267,0.0631109,0.017437927,0.14674795,-0.005819415,-0.012364443,-0.029349588,-0.012979763,0.072166555,0.07351329,-0.007923692,-0.09273913,0.007993352,-0.021791605,0.1030022,-0.030858863,0.046230245,-0.14944142,-0.0370585,-0.018064858,-0.02447347,-0.011244097,-0.050340116,-0.03183409,-0.006756907,-0.033087946,-0.001057218,-0.012434102,0.089859895,0.009868335,0.034457903,-0.005073485,0.10532416,0.0394269,0.035084832,-0.06575794,0.09417874,-0.005491438,-0.002366949,0.018099686,-0.005799098,-0.07667115,0.0156151885,-0.06264651,0.07787858,0.09547904,-0.009618724,0.086794905,0.095200405,0.14962718,-0.012039368,0.09882267,-0.037221037,0.033273704,-0.0051402412,0.02804929,-0.08753794,0.009659358,-0.031300034,0.01379245,0.053869497,0.03213594,-0.08526241,0.085633926,-0.039194703,-0.018076468,-0.0023321197,0.009386528,-0.026841871,-0.0025672184,-0.02990686,0.009984433,0.105509914,-0.00069114624,0.022662342,0.0027486214,0.05976728,0.04959709]}],"model":"text-embedding-3-large","usage":{"prompt_tokens":1,"total_tokens":1}}
57 changes: 49 additions & 8 deletions tests/api-resources/embeddings.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import OpenAI from 'openai';
import { Response } from 'node-fetch';
import { mockFetch } from '../utils/mock-fetch';
import fs from 'fs/promises';
import Path from 'path';

const client = new OpenAI({
apiKey: 'My API Key',
Expand Down Expand Up @@ -33,34 +36,72 @@ describe('resource embeddings', () => {
});
});

test('create: encoding_format=float should create float32 embeddings', async () => {
test('create: encoding_format=default should create float32 embeddings', async () => {
const client = makeClient();
const response = await client.embeddings.create({
input: 'The quick brown fox jumped over the lazy dog',
model: 'text-embedding-3-small',
});

expect(response.data?.at(0)?.embedding).toBeInstanceOf(Array);
expect(Number.isFinite(response.data?.at(0)?.embedding.at(0))).toBe(true);
expect(response.data?.at(0)?.embedding.at(0)).toBe(-0.09928705543279648);
});

test('create: encoding_format=base64 should create float32 embeddings', async () => {
test('create: encoding_format=float should create float32 embeddings', async () => {
const client = makeClient();
const response = await client.embeddings.create({
input: 'The quick brown fox jumped over the lazy dog',
model: 'text-embedding-3-small',
encoding_format: 'base64',
encoding_format: 'float',
});

expect(response.data?.at(0)?.embedding).toBeInstanceOf(Array);
expect(Number.isFinite(response.data?.at(0)?.embedding.at(0))).toBe(true);
expect(response.data?.at(0)?.embedding.at(0)).toBe(-0.099287055);
});

test('create: encoding_format=default should create float32 embeddings', async () => {
test('create: encoding_format=base64 should return base64 embeddings', async () => {
const client = makeClient();
const response = await client.embeddings.create({
input: 'The quick brown fox jumped over the lazy dog',
model: 'text-embedding-3-small',
encoding_format: 'base64',
});

expect(response.data?.at(0)?.embedding).toBeInstanceOf(Array);
expect(Number.isFinite(response.data?.at(0)?.embedding.at(0))).toBe(true);
expect(typeof response.data?.at(0)?.embedding).toBe('string');
});
});

function makeClient(): OpenAI {
const { fetch, handleRequest } = mockFetch();

handleRequest(async (_, init) => {
const format = (JSON.parse(init!.body as string) as OpenAI.EmbeddingCreateParams).encoding_format;
return new Response(
await fs.readFile(
Path.join(
__dirname,

// these responses were taken from the live API with:
//
// model: 'text-embedding-3-large',
// input: 'h',
// dimensions: 256,

format === 'base64' ? 'embeddings-base64-response.json' : 'embeddings-float-response.json',
),
),
{
status: 200,
headers: {
'Content-Type': 'application/json',
},
},
);
});

return new OpenAI({
fetch,
apiKey: 'My API Key',
baseURL: process.env['TEST_API_BASE_URL'] ?? 'http://127.0.0.1:4010',
});
}
Loading