run-llama · logan-markewich · Jul 18, 2023 · Jul 16, 2023 · Jul 18, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,7 @@
 ### New Features
 - specify `embed_model="local"` to use default local embbeddings in the service context (#6806)
 - Add async `acall` endpoint to `BasePydanticProgram` (defaults to sync version). Implement for `OpenAIPydanticProgram`
+- Add support for chroma v0.4.0 (#6937)
 
 ### Bug Fixes / Nits
 - fix null metadata for searching existing vector dbs (#6912)

diff --git a/docs/community/integrations/vector_stores.md b/docs/community/integrations/vector_stores.md
@@ -207,8 +207,8 @@ import chromadb
 from llama_index.vector_stores import ChromaVectorStore
 
 # Creating a Chroma client
-# By default, Chroma will operate purely in-memory.
-chroma_client = chromadb.Client()
+# EphemeralClient operates purely in-memory, PersistentClient will also save to disk
+chroma_client = chromadb.EphemeralClient()
 chroma_collection = chroma_client.create_collection("quickstart")
 
 # construct vector store

diff --git a/docs/examples/vector_stores/ChromaIndexDemo.ipynb b/docs/examples/vector_stores/ChromaIndexDemo.ipynb
@@ -70,20 +70,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "b3df0b97",
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install llama-index\n",
-    "!pip install chromadb\n",
-    "!pip install sentence-transformers\n",
-    "!pip install pydantic==1.10.11"
+    "# !pip install llama-index chromadb --quiet\n",
+    "# !pip install chromadb\n",
+    "# !pip install sentence-transformers\n",
+    "# !pip install pydantic==1.10.11"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 2,
    "id": "d48af8e1",
    "metadata": {},
    "outputs": [],
@@ -100,7 +100,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 3,
    "id": "374a148b",
    "metadata": {},
    "outputs": [],
@@ -117,10 +117,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 4,
    "id": "667f3cb3-ce18-48d5-b9aa-bfc1a1f0f0f6",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/jeff/.pyenv/versions/3.10.10/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
     {
      "data": {
       "text/markdown": [
@@ -137,7 +145,7 @@
    ],
    "source": [
     "# create client and a new collection\n",
-    "chroma_client = chromadb.Client()\n",
+    "chroma_client = chromadb.EphemeralClient()\n",
     "chroma_collection = chroma_client.create_collection(\"quickstart\")\n",
     "\n",
     "# define embedding function\n",
@@ -174,22 +182,20 @@
     "\n",
     "Extending the previous example, if you want to save to disk, simply initialize the Chroma client and pass the directory where you want the data to be saved to. \n",
     "\n",
-    "`Caution`: Chroma makes a best-effort to automatically save data to disk, however multiple in-memory clients can stomp each other's work. As a best practice, only have one client per path running at any given time.\n",
-    "\n",
-    "`Protip`: Sometimes you can call `db.persist()` to force a save. "
+    "`Caution`: Chroma makes a best-effort to automatically save data to disk, however multiple in-memory clients can stomp each other's work. As a best practice, only have one client per path running at any given time."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 5,
    "id": "9c3a56a5",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/markdown": [
        "<b>\n",
-       "Growing up, the author wrote short stories, programmed on an IBM 1401, and wrote programs on a TRS-80 microcomputer. He also took painting classes at Harvard and worked as a de facto studio assistant for a painter. He also tried to start a company to put art galleries online, and wrote software to build online stores.</b>"
+       "The author grew up taking painting classes at Harvard and freelancing as a Lisp hacker. He also wrote a book on Lisp and moved to New York to be closer to his teacher, Idelle Weber.</b>"
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -201,24 +207,18 @@
    ],
    "source": [
     "# save to disk\n",
-    "from chromadb.config import Settings\n",
     "\n",
-    "db = chromadb.Client(\n",
-    "    Settings(chroma_db_impl=\"duckdb+parquet\", persist_directory=\"./chroma_db\")\n",
-    ")\n",
+    "db = chromadb.PersistentClient(path=\"./chroma_db\")\n",
     "chroma_collection = db.get_or_create_collection(\"quickstart\")\n",
     "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
     "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
     "service_context = ServiceContext.from_defaults(embed_model=embed_model)\n",
     "index = VectorStoreIndex.from_documents(\n",
     "    documents, storage_context=storage_context, service_context=service_context\n",
     ")\n",
-    "db.persist()\n",
     "\n",
     "# load from disk\n",
-    "db2 = chromadb.Client(\n",
-    "    Settings(chroma_db_impl=\"duckdb+parquet\", persist_directory=\"./chroma_db\")\n",
-    ")\n",
+    "db2 = chromadb.PersistentClient(path=\"./chroma_db\")\n",
     "chroma_collection = db2.get_or_create_collection(\"quickstart\")\n",
     "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
     "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
@@ -253,39 +253,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 6,
    "id": "d6c9bd64",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:chromadb.telemetry.posthog:Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
-      "Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
-      "Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
-      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-      "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-      "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n",
-      "> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n",
-      "> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# create the chroma client and add our data\n",
     "import chromadb\n",
-    "from chromadb.config import Settings\n",
     "\n",
-    "remote_db = chromadb.Client(\n",
-    "    Settings(\n",
-    "        chroma_api_impl=\"rest\",\n",
-    "        chroma_server_host=\"localhost\",\n",
-    "        chroma_server_http_port=\"8000\",\n",
-    "    )\n",
-    ")\n",
-    "remote_db.reset()  # resets the database\n",
+    "remote_db = chromadb.HttpClient()\n",
     "chroma_collection = remote_db.get_or_create_collection(\"quickstart\")\n",
     "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
     "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
@@ -297,33 +273,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 7,
    "id": "88e10c26",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens\n",
-      "> [retrieve] Total LLM token usage: 0 tokens\n",
-      "> [retrieve] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 8 tokens\n",
-      "> [retrieve] Total embedding token usage: 8 tokens\n",
-      "> [retrieve] Total embedding token usage: 8 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 1874 tokens\n",
-      "> [get_response] Total LLM token usage: 1874 tokens\n",
-      "> [get_response] Total LLM token usage: 1874 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n",
-      "> [get_response] Total embedding token usage: 0 tokens\n",
-      "> [get_response] Total embedding token usage: 0 tokens\n"
-     ]
-    },
     {
      "data": {
       "text/markdown": [
        "<b>\n",
-       "The author grew up writing essays, learning Italian, exploring Florence, painting people, working with computers, studying at RISD, living in a rent-controlled apartment, building an online store builder, editing code, publishing essays online, writing essays, working on spam filters, cooking for groups, buying a building, and attending parties.</b>"
+       "Growing up, the author wrote short stories, programmed on an IBM 1401, and wrote programs on a TRS-80 microcomputer. He also took painting classes at Harvard and worked as a de facto studio assistant for a painter. He also tried to start a company to put art galleries online, and wrote software to build online stores.</b>"
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -357,15 +315,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 8,
    "id": "d9411826",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'node_info': '{\"start\": 0, \"end\": 4040, \"_node_type\": \"1\"}', 'relationships': '{\"1\": \"a0294b91-ff5f-45fe-b249-5596a18cc952\", \"3\": \"95771df1-9ec9-4128-9a11-ac92b768e2e3\"}', 'document_id': 'a0294b91-ff5f-45fe-b249-5596a18cc952', 'doc_id': 'a0294b91-ff5f-45fe-b249-5596a18cc952', 'ref_doc_id': 'a0294b91-ff5f-45fe-b249-5596a18cc952', 'author': 'Paul Graham'}\n",
+      "{'_node_content': '{\"id_\": \"be08c8bc-f43e-4a71-ba64-e525921a8319\", \"embedding\": null, \"metadata\": {}, \"excluded_embed_metadata_keys\": [], \"excluded_llm_metadata_keys\": [], \"relationships\": {\"1\": {\"node_id\": \"2cbecdbb-0840-48b2-8151-00119da0995b\", \"node_type\": null, \"metadata\": {}, \"hash\": \"4c702b4df575421e1d1af4b1fd50511b226e0c9863dbfffeccb8b689b8448f35\"}, \"3\": {\"node_id\": \"6a75604a-fa76-4193-8f52-c72a7b18b154\", \"node_type\": null, \"metadata\": {}, \"hash\": \"d6c408ee1fbca650fb669214e6f32ffe363b658201d31c204e85a72edb71772f\"}}, \"hash\": \"b4d0b960aa09e693f9dc0d50ef46a3d0bf5a8fb3ac9f3e4bcf438e326d17e0d8\", \"text\": \"\", \"start_char_idx\": 0, \"end_char_idx\": 4050, \"text_template\": \"{metadata_str}\\\\n\\\\n{content}\", \"metadata_template\": \"{key}: {value}\", \"metadata_seperator\": \"\\\\n\"}', 'author': 'Paul Graham', 'doc_id': '2cbecdbb-0840-48b2-8151-00119da0995b', 'document_id': '2cbecdbb-0840-48b2-8151-00119da0995b', 'ref_doc_id': '2cbecdbb-0840-48b2-8151-00119da0995b'}\n",
       "count before 20\n",
       "count after 19\n"
      ]
@@ -406,7 +364,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.6"
+   "version": "3.10.10"
   },
   "vscode": {
    "interpreter": {