From 0ce3f0757c6ff2c5a7648ca27135a625ff2a546e Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Fri, 23 Aug 2024 17:13:39 -0700 Subject: [PATCH] modify reference --- .../pages/getting-started.md | 24 ++++++++++++++++++- docs/docs.trychroma.com/pages/guides/index.md | 13 +++------- .../pages/guides/multimodal.md | 3 --- .../pages/reference/cheatsheet.md | 6 ----- .../pages/reference/js-collection.md | 3 +-- .../pages/reference/py-collection.md | 8 +++---- 6 files changed, 31 insertions(+), 26 deletions(-) diff --git a/docs/docs.trychroma.com/pages/getting-started.md b/docs/docs.trychroma.com/pages/getting-started.md index 4d59e168174..db69e2868fb 100644 --- a/docs/docs.trychroma.com/pages/getting-started.md +++ b/docs/docs.trychroma.com/pages/getting-started.md @@ -143,12 +143,23 @@ const collection = await client.createCollection({ ### 4. Add some text documents to the collection -Chroma will store your text and handle embedding and indexing automatically. You can also customize the embedding model. +Chroma will store your text and handle embedding and indexing automatically. You can also customize the embedding model. When you add documents, IDs are optional. If you don't provide `ids`, Chroma will automatically generate `ids` using uuid v4 for you. {% tabs group="code-lang" hideTabs=true %} {% tab label="Python" %} ```python +# Add docs without IDs +result = collection.add( + documents=[ + "This is a document about pineapple", + "This is a document about oranges" + ] +) + +ids = result["ids"] + +# Add docs with IDs collection.add( documents=[ "This is a document about pineapple", @@ -162,6 +173,17 @@ collection.add( {% tab label="Javascript" %} ```js +// Add docs without Ids +result = await client.addRecords(collection, { + documents: [ + "This is a document about pineapple", + "This is a document about oranges", + ] +}); + +ids = result.ids + +// Add docs with Ids await client.addRecords(collection, { documents: [ "This is a document about pineapple", diff --git a/docs/docs.trychroma.com/pages/guides/index.md b/docs/docs.trychroma.com/pages/guides/index.md index 72c7c72ae68..23d0e9c5f6c 100644 --- a/docs/docs.trychroma.com/pages/guides/index.md +++ b/docs/docs.trychroma.com/pages/guides/index.md @@ -112,8 +112,7 @@ async def main(): collection = await client.create_collection(name="my_collection") await collection.add( - documents=["hello world"], - ids=["id1"] + documents=["hello world"] ) asyncio.run(main()) @@ -358,8 +357,7 @@ Raw documents: ```python collection.add( documents=["lorem ipsum...", "doc2", "doc3", ...], - metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...], - ids=["id1", "id2", "id3", ...] + metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...] ) ``` @@ -372,7 +370,6 @@ Raw documents: ```javascript await client.addRecords(collection, { - ids: ["id1", "id2", "id3", ...], metadatas: [{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...], documents: ["lorem ipsum...", "doc2", "doc3", ...], }) @@ -401,7 +398,6 @@ collection.add( documents=["doc1", "doc2", "doc3", ...], embeddings=[[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2], ...], metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...], - ids=["id1", "id2", "id3", ...] ) ``` @@ -410,7 +406,6 @@ collection.add( ```javascript await client.addRecords(collection, { - ids: ["id1", "id2", "id3", ...], embeddings: [[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2], ...], metadatas: [{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...], documents: ["lorem ipsum...", "doc2", "doc3", ...], @@ -432,8 +427,7 @@ You can also store documents elsewhere, and just supply a list of `embeddings` a ```python collection.add( embeddings=[[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2], ...], - metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...], - ids=["id1", "id2", "id3", ...] + metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...] ) ``` @@ -442,7 +436,6 @@ collection.add( ```javascript await client.addRecords(collection, { - ids: ["id1", "id2", "id3", ...], embeddings: [[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2], ...], metadatas: [{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...], }) diff --git a/docs/docs.trychroma.com/pages/guides/multimodal.md b/docs/docs.trychroma.com/pages/guides/multimodal.md index 76ff0955f3e..f777d526739 100644 --- a/docs/docs.trychroma.com/pages/guides/multimodal.md +++ b/docs/docs.trychroma.com/pages/guides/multimodal.md @@ -65,7 +65,6 @@ You can add data to a multi-modal collection by specifying the data modality. Fo ```python collection.add( - ids=['id1', 'id2', 'id3'], images=[...] # A list of numpy arrays representing images ) ``` @@ -76,7 +75,6 @@ However, you can use Chroma in combination with data stored elsewhere, by adding ```python collection.add( - ids=['id1', 'id2', 'id3'], uris=[...] # A list of strings representing URIs to data ) ``` @@ -85,7 +83,6 @@ Since the embedding function is multi-modal, you can also add text to the same c ```python collection.add( - ids=['id4', 'id5', 'id6'], texts=["This is a document", "This is another document", "This is a third document"] ) ``` diff --git a/docs/docs.trychroma.com/pages/reference/cheatsheet.md b/docs/docs.trychroma.com/pages/reference/cheatsheet.md index 13c3bfe7fc1..fd5c6bc88b8 100644 --- a/docs/docs.trychroma.com/pages/reference/cheatsheet.md +++ b/docs/docs.trychroma.com/pages/reference/cheatsheet.md @@ -101,18 +101,15 @@ collection.add( embeddings=[1.5, 2.9, 3.4], metadatas={"uri": "img9.png", "style": "style1"}, documents="doc1000101", - ids="uri9", ) # or many, up to 100k+! collection.add( embeddings=[[1.5, 2.9, 3.4], [9.8, 2.3, 2.9]], metadatas=[{"style": "style1"}, {"style": "style2"}], - ids=["uri9", "uri10"], ) collection.add( documents=["doc1000101", "doc288822"], metadatas=[{"style": "style1"}, {"style": "style2"}], - ids=["uri9", "uri10"], ) # update items in a collection @@ -200,21 +197,18 @@ await collection.count() // add new items to a collection // either one at a time await collection.add({ - ids: "id1", embeddings: [1.5, 2.9, 3.4], metadatas: {"source": "my_source"}, documents: "This is a document", }) // or many, up to 100k+! await collection.add({ - ids: ["uri9", "uri10"], embeddings: [[1.5, 2.9, 3.4], [9.8, 2.3, 2.9]], metadatas: [{"style": "style1"}, {"style": "style2"}], documents: ["This is a document", 'that is a document'] }) // including just documents await collection.add({ - ids: ["uri9", "uri10"], metadatas: [{"style": "style1"}, {"style": "style2"}], documents: ["doc1000101", "doc288822"], }) diff --git a/docs/docs.trychroma.com/pages/reference/js-collection.md b/docs/docs.trychroma.com/pages/reference/js-collection.md index f4fb3287bfa..766cf4dc8de 100644 --- a/docs/docs.trychroma.com/pages/reference/js-collection.md +++ b/docs/docs.trychroma.com/pages/reference/js-collection.md @@ -34,7 +34,6 @@ Add items to the collection ```javascript const response = await collection.add({ - ids: ["id1", "id2"], embeddings: [ [1, 2, 3], [4, 5, 6], @@ -51,7 +50,7 @@ const response = await collection.add({ | `params` | `Object` | The parameters for the query. | | `params.documents?` | `string` \| `Documents` | Optional documents of the items to add. | | `params.embeddings?` | `Embedding` \| `Embeddings` | Optional embeddings of the items to add. | -| `params.ids` | `string` \| `IDs` | IDs of the items to add. | +| `params.ids` | `string` \| `IDs` | Optional IDs of the items to add. | | `params.metadatas?` | `Metadata` \| `Metadatas` | Optional metadata of the items to add. | #### Returns diff --git a/docs/docs.trychroma.com/pages/reference/py-collection.md b/docs/docs.trychroma.com/pages/reference/py-collection.md index 7b90fc525e3..489aac18bc0 100644 --- a/docs/docs.trychroma.com/pages/reference/py-collection.md +++ b/docs/docs.trychroma.com/pages/reference/py-collection.md @@ -23,17 +23,17 @@ The total number of embeddings added to the database ### add ```python -def add(ids: OneOrMany[ID], +def add(ids: Optiona[OneOrMany[ID]] = None, embeddings: Optional[OneOrMany[Embedding]] = None, metadatas: Optional[OneOrMany[Metadata]] = None, - documents: Optional[OneOrMany[Document]] = None) -> None + documents: Optional[OneOrMany[Document]] = None) -> AddResult ``` Add embeddings to the data store. **Arguments**: -- `ids` - The ids of the embeddings you wish to add +- `ids` - The ids of the embeddings you wish to add. If None, Chroma will generate the ids using uuid v4. - `embeddings` - The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional. - `metadatas` - The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional. - `documents` - The documents to associate with the embeddings. Optional. @@ -41,7 +41,7 @@ Add embeddings to the data store. **Returns**: - None + - `AddResult` - An AddResult object containing IDs. **Raises**: