Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 25 additions & 138 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@
"@typescript-eslint/parser": "^8.44.0",
"@vitest/coverage-v8": "^3.2.4",
"@vitest/eslint-plugin": "^1.3.4",
"ai": "^5.0.72",
"duplexpair": "^1.0.2",
"eslint": "^9.34.0",
"eslint-config-prettier": "^10.1.8",
Expand Down Expand Up @@ -104,6 +103,7 @@
"@mongodb-js/devtools-proxy-support": "^0.5.3",
"@mongosh/arg-parser": "^3.19.0",
"@mongosh/service-provider-node-driver": "^3.17.0",
"ai": "^5.0.72",
"bson": "^6.10.4",
"express": "^5.1.0",
"lru-cache": "^11.1.0",
Expand All @@ -116,6 +116,7 @@
"oauth4webapi": "^3.8.0",
"openapi-fetch": "^0.14.0",
"ts-levenshtein": "^1.0.7",
"voyage-ai-provider": "^2.0.0",
"yargs-parser": "21.1.1",
"zod": "^3.25.76"
},
Expand Down
3 changes: 3 additions & 0 deletions src/common/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ export enum ErrorCodes {
ForbiddenCollscan = 1_000_002,
ForbiddenWriteOperation = 1_000_003,
AtlasSearchNotSupported = 1_000_004,
NoEmbeddingsProviderConfigured = 1_000_005,
AtlasVectorSearchIndexNotFound = 1_000_006,
AtlasVectorSearchInvalidQuery = 1_000_007,
}

export class MongoDBError<ErrorCode extends ErrorCodes = ErrorCodes> extends Error {
Expand Down
87 changes: 87 additions & 0 deletions src/common/search/embeddingsProvider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import { createVoyage } from "voyage-ai-provider";
import type { VoyageProvider } from "voyage-ai-provider";
import { embedMany } from "ai";
import type { UserConfig } from "../config.js";
import assert from "assert";
import { createFetch } from "@mongodb-js/devtools-proxy-support";
import { z } from "zod";

type EmbeddingsInput = string;
type Embeddings = number[];
export type EmbeddingParameters = {
inputType: "query" | "document";
};

export interface EmbeddingsProvider<
SupportedModels extends string,
SupportedEmbeddingParameters extends EmbeddingParameters,
> {
embed(
modelId: SupportedModels,
content: EmbeddingsInput[],
parameters: SupportedEmbeddingParameters
): Promise<Embeddings[]>;
}

export const zVoyageModels = z
.enum(["voyage-3-large", "voyage-3.5", "voyage-3.5-lite", "voyage-code-3"])
.default("voyage-3-large");

export const zVoyageEmbeddingParameters = z.object({
outputDimension: z
.union([z.literal(256), z.literal(512), z.literal(1024), z.literal(2048), z.literal(4096)])
.optional()
.default(1024),
outputDType: z.enum(["float", "int8", "uint8", "binary", "ubinary"]).optional().default("float"),
});

type VoyageModels = z.infer<typeof zVoyageModels>;
type VoyageEmbeddingParameters = z.infer<typeof zVoyageEmbeddingParameters> & EmbeddingParameters;

class VoyageEmbeddingsProvider implements EmbeddingsProvider<VoyageModels, VoyageEmbeddingParameters> {
private readonly voyage: VoyageProvider;

constructor({ voyageApiKey }: UserConfig, providedFetch?: typeof fetch) {
assert(voyageApiKey, "The VoyageAI API Key does not exist. This is likely a bug.");

// We should always use, by default, any enterprise proxy that the user has configured.
// Direct requests to VoyageAI might get blocked by the network if they don't go through
// the provided proxy.
const customFetch: typeof fetch = (providedFetch ??
createFetch({ useEnvironmentVariableProxies: true })) as unknown as typeof fetch;

this.voyage = createVoyage({ apiKey: voyageApiKey, fetch: customFetch });
}

static isConfiguredIn({ voyageApiKey }: UserConfig): boolean {
return !!voyageApiKey;
}

async embed<Model extends VoyageModels>(
modelId: Model,
content: EmbeddingsInput[],
parameters: VoyageEmbeddingParameters
): Promise<Embeddings[]> {
const model = this.voyage.textEmbeddingModel(modelId);
const { embeddings } = await embedMany({
model,
values: content,
providerOptions: { voyage: parameters },
});

return embeddings;
}
}

export function getEmbeddingsProvider(
userConfig: UserConfig
): EmbeddingsProvider<VoyageModels, VoyageEmbeddingParameters> | undefined {
if (VoyageEmbeddingsProvider.isConfiguredIn(userConfig)) {
return new VoyageEmbeddingsProvider(userConfig);
}

return undefined;
}

export const zSupportedEmbeddingParameters = zVoyageEmbeddingParameters.extend({ model: zVoyageModels });
export type SupportedEmbeddingParameters = z.infer<typeof zSupportedEmbeddingParameters>;
Loading
Loading