langchain-ai · gsa9989 · Jul 19, 2024 · Jul 19, 2024 · Jul 19, 2024 · Jul 19, 2024
diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb
@@ -2806,4 +2806,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
@@ -60,6 +60,8 @@
 )
 from langchain_community.vectorstores.utils import DistanceStrategy
 
+# from libs.community.langchain_community.vectorstores.azure_cosmos_db_no_sql import AzureCosmosDBNoSqlVectorSearch
+
 try:
     from sqlalchemy.orm import declarative_base
 except ImportError:
@@ -80,7 +82,10 @@
 from langchain_community.utilities.astradb import (
     _AstraDBCollectionEnvironment,
 )
-from langchain_community.vectorstores import AzureCosmosDBVectorSearch
+from langchain_community.vectorstores import (
+    AzureCosmosDBNoSqlVectorSearch,
+    AzureCosmosDBVectorSearch,
+)
 from langchain_community.vectorstores import (
     OpenSearchVectorSearch as OpenSearchVectorStore,
 )
@@ -2275,6 +2280,101 @@
             raise ValueError(f"Invalid enum value: {value}. Expected {enum_type}.")
 
 
+class AzureCosmosDBNoSqlSemanticCache(BaseCache):
+    """Cache that uses Cosmos DB NoSQL backend"""
+
+    def __init__(
+        self,
+        embedding: Embeddings,
+        cosmos_client: Optional[Any] = None,
+        database_name: str = "CosmosNoSqlCacheDB",
+        container_name: str = "CosmosNoSqlCacheContainer",
+        *,
+        vector_embedding_policy: Optional[Dict[str, Any]] = None,
+        indexing_policy: Optional[Dict[str, Any]] = None,
+        cosmos_container_properties: Dict[str, Any],
+        cosmos_database_properties: Dict[str, Any],
+    ):
+        self.cosmos_client = cosmos_client
+        self.database_name = database_name
+        self.container_name = container_name
+        self.embedding = embedding
+        self.vector_embedding_policy = vector_embedding_policy
+        self.indexing_policy = indexing_policy
+        self.cosmos_container_properties = cosmos_container_properties
+        self.cosmos_database_properties = cosmos_database_properties
+        self._cache_: Optional[AzureCosmosDBNoSqlVectorSearch] = None
+
+    def _create_llm_cache(self, llm_string: str) -> AzureCosmosDBNoSqlVectorSearch:
+        # create new vectorstore client to create the cache
+        if self.cosmos_client:
+            self._cache_ = AzureCosmosDBNoSqlVectorSearch(
+                cosmos_client=self.cosmos_client,
+                embedding=self.embedding,
+                vector_embedding_policy=self.vector_embedding_policy,
+                indexing_policy=self.indexing_policy,
+                cosmos_container_properties=self.cosmos_container_properties,
+                cosmos_database_properties=self.cosmos_database_properties,
+                database_name=self.database_name,
+                container_name=self.container_name,
+            )
+
+        return self._cache_
+
+    def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
+        """Look up based on prompt."""
+        if not self._cache_:
+            self._cache_ = self._create_llm_cache(llm_string)
+        llm_cache = self._cache_
+        generations: List = []
+        # Read from a Hash
+        results = llm_cache.similarity_search(
+            query=prompt,
+            k=1,
+        )
+        if results:
+            for document in results:
+                try:
+                    generations.extend(loads(document.metadata["return_val"]))
+                except Exception:
+                    logger.warning(
+                        "Retrieving a cache value that could not be deserialized "
+                        "properly. This is likely due to the cache being in an "
+                        "older format. Please recreate your cache to avoid this "
+                        "error."
+                    )
+                    # In a previous life we stored the raw text directly
+                    # in the table, so assume it's in that format.
+                    generations.extend(
+                        _load_generations_from_json(document.metadata["return_val"])
+                    )
+        return generations if generations else None
+
+    def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
+        """Update cache based on prompt and llm_string."""
+        for gen in return_val:
+            if not isinstance(gen, Generation):
+                raise ValueError(
+                    "CosmosDBNoSqlSemanticCache only supports caching of "
+                    f"normal LLM generations, got {type(gen)}"
+                )
+        if not self._cache_:
+            self._cache_ = self._create_llm_cache(llm_string)
+        llm_cache = self._cache_
+        metadata = {
+            "llm_string": llm_string,
+            "prompt": prompt,
+            "return_val": dumps([g for g in return_val]),
+        }
+        llm_cache.add_texts(texts=[prompt], metadatas=[metadata])
+
+    def clear(self, **kwargs: Any) -> None:
+        """Clear semantic cache for a given llm_string."""
+        database = self.cosmos_client.get_database_client(self.database_name)
+        container = database.get_container_client(self.container_name)
+        database.delete_container(self.container_name)
+
+
 class OpenSearchSemanticCache(BaseCache):
     """Cache that uses OpenSearch vector store backend"""
 

diff --git a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@@ -0,0 +1,209 @@
+"""Test Azure CosmosDB NoSql cache functionality."""
+
+import os
+import uuid
+
+import pytest
+from azure.cosmos import CosmosClient, PartitionKey
+from langchain.globals import get_llm_cache, set_llm_cache
+from langchain_core.outputs import Generation
+from libs.community.tests.integration_tests.cache.fake_embeddings import (
+    FakeEmbeddings,
+)
+from libs.community.tests.unit_tests.llms.fake_llm import FakeLLM
+
+from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache
+from langchain_community.vectorstores import AzureCosmosDBNoSqlVectorSearch
+
+URI = "COSMOS_DB_URI"
+KEY = "COSMOS_DB_KEY"
+test_client = CosmosClient(URI, credential=KEY)
+
+
+# cosine, euclidean, innerproduct
+def indexing_policy(index_type: str):
+    return {
+        "indexingMode": "consistent",
+        "includedPaths": [{"path": "/*"}],
+        "excludedPaths": [{"path": '/"_etag"/?'}],
+        "vectorIndexes": [{"path": "/embedding", "type": index_type}],
+    }
+
+
+def vector_embedding_policy(distance_function: str):
+    return {
+        "vectorEmbeddings": [
+            {
+                "path": "/embedding",
+                "dataType": "float32",
+                "distanceFunction": distance_function,
+                "dimensions": 1536,
+            }
+        ]
+    }
+
+
+partition_key = PartitionKey(path="/id")
+cosmos_container_properties_test = {"partition_key": partition_key}
+cosmos_database_properties_test = {}
+
+
+# @pytest.fixture(scope="session")
+def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat() -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=test_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy("cosine"),
+            indexing_policy=indexing_policy("quantizedFlat"),
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat() -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=test_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy("cosine"),
+            indexing_policy=indexing_policy("flat"),
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat() -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=test_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy("dotProduct"),
+            indexing_policy=indexing_policy("quantizedFlat"),
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update(
+        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
+    )
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat() -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=test_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy("dotProduct"),
+            indexing_policy=indexing_policy("flat"),
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update(
+        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
+    )
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat() -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=test_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy("euclidean"),
+            indexing_policy=indexing_policy("quantizedFlat"),
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+def test_azure_cosmos_db_nosql_semantic_cache_euclidean_flat() -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=test_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy("euclidean"),
+            indexing_policy=indexing_policy("flat"),
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)