-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
cosmosdbnosql: Add Semantic Cache Integration #7033
base: main
Are you sure you want to change the base?
Changes from all commits
ea34aa7
ceb00af
0ee6a31
d306cf9
75f496b
72e790d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -68,7 +68,7 @@ export interface AzureCosmosDBNoSQLInitOptions { | |
*/ | ||
export interface AzureCosmosDBNoSQLConfig | ||
extends AzureCosmosDBNoSQLInitOptions { | ||
readonly client?: CosmosClient; | ||
client?: CosmosClient; | ||
readonly connectionString?: string; | ||
readonly endpoint?: string; | ||
readonly credentials?: TokenCredential; | ||
|
@@ -78,7 +78,7 @@ export interface AzureCosmosDBNoSQLConfig | |
readonly metadataKey?: string; | ||
} | ||
|
||
const USER_AGENT_PREFIX = "langchainjs-azure-cosmosdb-nosql"; | ||
const USER_AGENT_SUFFIX = "LangChain-CDBNoSQL-VectorStore-JavaScript"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you explain why you made this change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @sinedied we are following a user agent pattern across all AI integrations, and we came up with "LangChain-CDBNoSQL-VectorStore-JavaScript" |
||
|
||
/** | ||
* Azure Cosmos DB for NoSQL vCore vector store. | ||
|
@@ -151,14 +151,14 @@ export class AzureCosmosDBNoSQLVectorStore extends VectorStore { | |
this.client = new CosmosClient({ | ||
endpoint, | ||
key, | ||
userAgentSuffix: USER_AGENT_PREFIX, | ||
userAgentSuffix: USER_AGENT_SUFFIX, | ||
}); | ||
} else { | ||
// Use managed identity | ||
this.client = new CosmosClient({ | ||
endpoint, | ||
aadCredentials: dbConfig.credentials ?? new DefaultAzureCredential(), | ||
userAgentSuffix: USER_AGENT_PREFIX, | ||
userAgentSuffix: USER_AGENT_SUFFIX, | ||
} as CosmosClientOptions); | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
import { | ||
BaseCache, | ||
deserializeStoredGeneration, | ||
getCacheKey, | ||
serializeGeneration, | ||
} from "@langchain/core/caches"; | ||
import { Generation } from "@langchain/core/outputs"; | ||
import { Document } from "@langchain/core/documents"; | ||
import { EmbeddingsInterface } from "@langchain/core/embeddings"; | ||
import { CosmosClient, CosmosClientOptions } from "@azure/cosmos"; | ||
import { DefaultAzureCredential } from "@azure/identity"; | ||
import { getEnvironmentVariable } from "@langchain/core/utils/env"; | ||
import { | ||
AzureCosmosDBNoSQLConfig, | ||
AzureCosmosDBNoSQLVectorStore, | ||
} from "./azure_cosmosdb_nosql.js"; | ||
|
||
const USER_AGENT_SUFFIX = "LangChain-CDBNoSQL-SemanticCache-JavaScript"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If possible, it would be preferable to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @sinedied we are following a user agent pattern across all AI integrations, and we came up with "LangChain-CDBNoSQL-VectorStore-JavaScript" |
||
|
||
// Create a new object based on dbConfig, and modify the 'client' property with user agent. | ||
function updateDbConfig( | ||
dbConfig: AzureCosmosDBNoSQLConfig, | ||
client: CosmosClient | ||
) { | ||
const updatedDbConfig = { | ||
...dbConfig, | ||
client, | ||
}; | ||
|
||
return updatedDbConfig; | ||
} | ||
|
||
/** | ||
* Represents a Semantic Cache that uses CosmosDB NoSQL backend as the underlying | ||
* storage system. | ||
* | ||
* @example | ||
* ```typescript | ||
* const embeddings = new OpenAIEmbeddings(); | ||
* const cache = new AzureCosmosDBNoSQLSemanticCache(embeddings, { | ||
* databaseName: DATABASE_NAME, | ||
* containerName: CONTAINER_NAME | ||
* }); | ||
* const model = new ChatOpenAI({cache}); | ||
* | ||
* // Invoke the model to perform an action | ||
* const response = await model.invoke("Do something random!"); | ||
* console.log(response); | ||
* ``` | ||
*/ | ||
export class AzureCosmosDBNoSQLSemanticCache extends BaseCache { | ||
private embeddings: EmbeddingsInterface; | ||
|
||
private config: AzureCosmosDBNoSQLConfig; | ||
|
||
private cacheDict: { [key: string]: AzureCosmosDBNoSQLVectorStore } = {}; | ||
|
||
constructor( | ||
embeddings: EmbeddingsInterface, | ||
dbConfig: AzureCosmosDBNoSQLConfig | ||
) { | ||
super(); | ||
let client: CosmosClient; | ||
|
||
const connectionString = | ||
dbConfig.connectionString ?? | ||
getEnvironmentVariable("AZURE_COSMOSDB_NOSQL_CONNECTION_STRING"); | ||
|
||
const endpoint = | ||
dbConfig.endpoint ?? | ||
getEnvironmentVariable("AZURE_COSMOSDB_NOSQL_ENDPOINT"); | ||
|
||
if (!dbConfig.client) { | ||
if (connectionString) { | ||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion | ||
let [endpoint, key] = connectionString!.split(";"); | ||
[, endpoint] = endpoint.split("="); | ||
[, key] = key.split("="); | ||
|
||
client = new CosmosClient({ | ||
endpoint, | ||
key, | ||
userAgentSuffix: USER_AGENT_SUFFIX, | ||
}); | ||
} else { | ||
// Use managed identity | ||
client = new CosmosClient({ | ||
endpoint, | ||
aadCredentials: dbConfig.credentials ?? new DefaultAzureCredential(), | ||
userAgentSuffix: USER_AGENT_SUFFIX, | ||
} as CosmosClientOptions); | ||
} | ||
this.config = updateDbConfig(dbConfig, client); | ||
} else { | ||
this.config = dbConfig; | ||
} | ||
this.embeddings = embeddings; | ||
} | ||
|
||
private getLlmCache(llmKey: string) { | ||
const key = getCacheKey(llmKey); | ||
if (!this.cacheDict[key]) { | ||
this.cacheDict[key] = new AzureCosmosDBNoSQLVectorStore( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the current state of this implementation, this use the same default container name as for the VectorStore, which can be problematic: For example, if a user uses default values and have a vector store for RAG and semantic cache, the results will get mixed up. I suggest 2 changes:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer adding a different name for the semantic caching container. We are doing the same in LangChain python semantic cache as well. Let's keep the vector search and semantic cache container different. |
||
this.embeddings, | ||
this.config | ||
); | ||
} | ||
return this.cacheDict[key]; | ||
} | ||
|
||
/** | ||
* Retrieves data from the cache. | ||
* | ||
* @param prompt The prompt for lookup. | ||
* @param llmKey The LLM key used to construct the cache key. | ||
* @returns An array of Generations if found, null otherwise. | ||
*/ | ||
public async lookup(prompt: string, llmKey: string) { | ||
const llmCache = this.getLlmCache(llmKey); | ||
|
||
const results = await llmCache.similaritySearch(prompt, 1); | ||
if (!results.length) return null; | ||
|
||
const generations = results.flatMap((result) => | ||
result.metadata.return_value.map((gen: string) => | ||
deserializeStoredGeneration(JSON.parse(gen)) | ||
) | ||
); | ||
|
||
return generations.length > 0 ? generations : null; | ||
} | ||
|
||
/** | ||
* Updates the cache with new data. | ||
* | ||
* @param prompt The prompt for update. | ||
* @param llmKey The LLM key used to construct the cache key. | ||
* @param value The value to be stored in the cache. | ||
*/ | ||
public async update( | ||
prompt: string, | ||
llmKey: string, | ||
returnValue: Generation[] | ||
) { | ||
const serializedGenerations = returnValue.map((generation) => | ||
JSON.stringify(serializeGeneration(generation)) | ||
); | ||
const llmCache = this.getLlmCache(llmKey); | ||
const metadata = { | ||
llm_string: llmKey, | ||
prompt, | ||
return_value: serializedGenerations, | ||
}; | ||
const doc = new Document({ | ||
pageContent: prompt, | ||
metadata, | ||
}); | ||
await llmCache.addDocuments([doc]); | ||
} | ||
|
||
/** | ||
* deletes the semantic cache for a given llmKey | ||
* @param llmKey | ||
*/ | ||
public async clear(llmKey: string) { | ||
const key = getCacheKey(llmKey); | ||
if (this.cacheDict[key]) { | ||
await this.cacheDict[key].delete(); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
export * from "./azure_cosmosdb_mongodb.js"; | ||
export * from "./azure_cosmosdb_nosql.js"; | ||
export * from "./caches.js"; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
/* eslint-disable no-process-env */ | ||
/* eslint-disable @typescript-eslint/no-explicit-any */ | ||
|
||
import { IndexingMode, VectorEmbeddingPolicy } from "@azure/cosmos"; | ||
import { FakeEmbeddings, FakeLLM } from "@langchain/core/utils/testing"; | ||
import { AzureCosmosDBNoSQLSemanticCache } from "../caches.js"; | ||
|
||
const DATABASE_NAME = "langchainTestDB"; | ||
const CONTAINER_NAME = "testContainer"; | ||
|
||
function indexingPolicy(indexType: any) { | ||
return { | ||
indexingMode: IndexingMode.consistent, | ||
includedPaths: [{ path: "/*" }], | ||
excludedPaths: [{ path: '/"_etag"/?' }], | ||
vectorIndexes: [{ path: "/embedding", type: indexType }], | ||
}; | ||
} | ||
|
||
function vectorEmbeddingPolicy(distanceFunction: any): VectorEmbeddingPolicy { | ||
return { | ||
vectorEmbeddings: [ | ||
{ | ||
path: "/embedding", | ||
dataType: "float32", | ||
distanceFunction, | ||
dimensions: 1536, | ||
}, | ||
], | ||
}; | ||
} | ||
|
||
function initializeCache( | ||
indexType: any, | ||
distanceFunction: any | ||
): AzureCosmosDBNoSQLSemanticCache { | ||
let cache: AzureCosmosDBNoSQLSemanticCache; | ||
if (process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING) { | ||
cache = new AzureCosmosDBNoSQLSemanticCache(new FakeEmbeddings(), { | ||
databaseName: DATABASE_NAME, | ||
containerName: CONTAINER_NAME, | ||
connectionString: process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING, | ||
indexingPolicy: indexingPolicy(indexType), | ||
vectorEmbeddingPolicy: vectorEmbeddingPolicy(distanceFunction), | ||
}); | ||
} else if (process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT) { | ||
cache = new AzureCosmosDBNoSQLSemanticCache(new FakeEmbeddings(), { | ||
databaseName: DATABASE_NAME, | ||
containerName: CONTAINER_NAME, | ||
endpoint: process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT, | ||
indexingPolicy: indexingPolicy(indexType), | ||
vectorEmbeddingPolicy: vectorEmbeddingPolicy(distanceFunction), | ||
}); | ||
} else { | ||
throw new Error( | ||
"Please set the environment variable AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT" | ||
); | ||
} | ||
return cache; | ||
} | ||
|
||
/* | ||
* To run this test, you need have an Azure Cosmos DB for NoSQL instance | ||
* running. You can deploy a free version on Azure Portal without any cost, | ||
* following this guide: | ||
* https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search | ||
* | ||
* You do not need to create a database or collection, it will be created | ||
* automatically by the test. | ||
* | ||
* Once you have the instance running, you need to set the following environment | ||
* variables before running the test: | ||
* - AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT | ||
*/ | ||
describe("Azure CosmosDB NoSQL Semantic Cache", () => { | ||
it("test AzureCosmosDBNoSqlSemanticCache with cosine quantizedFlat", async () => { | ||
const cache = initializeCache("quantizedFlat", "cosine"); | ||
const llm = new FakeLLM({}); | ||
const llmString = JSON.stringify(llm._identifyingParams()); | ||
await cache.update("foo", llmString, [{ text: "fizz" }]); | ||
const cacheOutput = await cache.lookup("bar", llmString); | ||
expect(cacheOutput).toEqual([{ text: "fizz" }]); | ||
await cache.clear(llmString); | ||
}); | ||
it("test AzureCosmosDBNoSqlSemanticCache with cosine flat", async () => { | ||
const cache = initializeCache("flat", "cosine"); | ||
const llm = new FakeLLM({}); | ||
const llmString = JSON.stringify(llm._identifyingParams()); | ||
await cache.update("foo", llmString, [{ text: "Buzz" }]); | ||
const cacheOutput = await cache.lookup("bar", llmString); | ||
expect(cacheOutput).toEqual([{ text: "Buzz" }]); | ||
await cache.clear(llmString); | ||
}); | ||
it("test AzureCosmosDBNoSqlSemanticCache with dotProduct quantizedFlat", async () => { | ||
const cache = initializeCache("quantizedFlat", "dotProduct"); | ||
const llm = new FakeLLM({}); | ||
const llmString = JSON.stringify(llm._identifyingParams()); | ||
await cache.update("foo", llmString, [{ text: "fizz" }, { text: "Buzz" }]); | ||
const cacheOutput = await cache.lookup("bar", llmString); | ||
expect(cacheOutput).toEqual([{ text: "fizz" }, { text: "Buzz" }]); | ||
await cache.clear(llmString); | ||
}); | ||
it("test AzureCosmosDBNoSqlSemanticCache with dotProduct flat", async () => { | ||
const cache = initializeCache("flat", "dotProduct"); | ||
const llm = new FakeLLM({}); | ||
const llmString = JSON.stringify(llm._identifyingParams()); | ||
await cache.update("foo", llmString, [{ text: "fizz" }, { text: "Buzz" }]); | ||
const cacheOutput = await cache.lookup("bar", llmString); | ||
expect(cacheOutput).toEqual([{ text: "fizz" }, { text: "Buzz" }]); | ||
await cache.clear(llmString); | ||
}); | ||
it("test AzureCosmosDBNoSqlSemanticCache with euclidean quantizedFlat", async () => { | ||
const cache = initializeCache("quantizedFlat", "euclidean"); | ||
const llm = new FakeLLM({}); | ||
const llmString = JSON.stringify(llm._identifyingParams()); | ||
await cache.update("foo", llmString, [{ text: "fizz" }]); | ||
const cacheOutput = await cache.lookup("bar", llmString); | ||
expect(cacheOutput).toEqual([{ text: "fizz" }]); | ||
await cache.clear(llmString); | ||
}); | ||
it("test AzureCosmosDBNoSqlSemanticCache with euclidean flat", async () => { | ||
const cache = initializeCache("flat", "euclidean"); | ||
const llm = new FakeLLM({}); | ||
const llmString = JSON.stringify(llm._identifyingParams()); | ||
await cache.update("foo", llmString, [{ text: "fizz" }, { text: "Buzz" }]); | ||
const cacheOutput = await cache.lookup("bar", llmString); | ||
expect(cacheOutput).toEqual([{ text: "fizz" }, { text: "Buzz" }]); | ||
await cache.clear(llmString); | ||
}); | ||
}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The client should not be overridable by the user aside from the constructor, please keep it read-only. When creating the new client in the semantic cache, you can use the constructor to use it.