diff --git a/docs/docs.trychroma.com/pages/getting-started.md b/docs/docs.trychroma.com/pages/getting-started.md index dd65ba1d595..68dac4b1bc7 100644 --- a/docs/docs.trychroma.com/pages/getting-started.md +++ b/docs/docs.trychroma.com/pages/getting-started.md @@ -143,12 +143,23 @@ const collection = await client.createCollection({ ### 4. Add some text documents to the collection -Chroma will store your text and handle embedding and indexing automatically. You can also customize the embedding model. +Chroma will store your text and handle embedding and indexing automatically. You can also customize the embedding model. When you add documents, IDs are optional. If you don't provide `ids`, Chroma will automatically generate `ids` for you. {% tabs group="code-lang" hideTabs=true %} {% tab label="Python" %} ```python +# Add docs without IDs +result = collection.add( + documents=[ + "This is a document about pineapple", + "This is a document about oranges" + ] +) + +ids = result["ids"] + +# Add docs with IDs collection.add( documents=[ "This is a document about pineapple", @@ -162,6 +173,17 @@ collection.add( {% tab label="Javascript" %} ```js +// Add docs without Ids +result = await client.addRecords(collection, { + documents: [ + "This is a document about pineapple", + "This is a document about oranges", + ] +}); + +ids = result.ids + +// Add docs with Ids await collection.add({ documents: [ "This is a document about pineapple", diff --git a/docs/docs.trychroma.com/pages/guides/index.md b/docs/docs.trychroma.com/pages/guides/index.md index 2bc435fa0e2..6350fcc9d97 100644 --- a/docs/docs.trychroma.com/pages/guides/index.md +++ b/docs/docs.trychroma.com/pages/guides/index.md @@ -112,8 +112,7 @@ async def main(): collection = await client.create_collection(name="my_collection") await collection.add( - documents=["hello world"], - ids=["id1"] + documents=["hello world"] ) asyncio.run(main()) @@ -358,26 +357,24 @@ Raw documents: ```python collection.add( documents=["lorem ipsum...", "doc2", "doc3", ...], - metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...], - ids=["id1", "id2", "id3", ...] + metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...] ) ``` {% /tab %} {% tab label="Javascript" %} -Add data to Chroma with `.addRecords`. +Add data to Chroma with `.add`. Raw documents: ```javascript await collection.add({ - ids: ["id1", "id2", "id3", ...], metadatas: [{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...], documents: ["lorem ipsum...", "doc2", "doc3", ...], }) // input order -// ids - required +// ids - optional // embeddings - optional // metadata - optional // documents - optional @@ -400,8 +397,7 @@ Alternatively, you can supply a list of document-associated `embeddings` directl collection.add( documents=["doc1", "doc2", "doc3", ...], embeddings=[[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2], ...], - metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...], - ids=["id1", "id2", "id3", ...] + metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...] ) ``` @@ -410,7 +406,6 @@ collection.add( ```javascript await collection.add({ - ids: ["id1", "id2", "id3", ...], embeddings: [[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2], ...], metadatas: [{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...], documents: ["lorem ipsum...", "doc2", "doc3", ...], @@ -432,8 +427,7 @@ You can also store documents elsewhere, and just supply a list of `embeddings` a ```python collection.add( embeddings=[[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2], ...], - metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...], - ids=["id1", "id2", "id3", ...] + metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...] ) ``` @@ -442,7 +436,6 @@ collection.add( ```javascript await collection.add({ - ids: ["id1", "id2", "id3", ...], embeddings: [[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2], ...], metadatas: [{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...], }) diff --git a/docs/docs.trychroma.com/pages/guides/multimodal.md b/docs/docs.trychroma.com/pages/guides/multimodal.md index 76ff0955f3e..f777d526739 100644 --- a/docs/docs.trychroma.com/pages/guides/multimodal.md +++ b/docs/docs.trychroma.com/pages/guides/multimodal.md @@ -65,7 +65,6 @@ You can add data to a multi-modal collection by specifying the data modality. Fo ```python collection.add( - ids=['id1', 'id2', 'id3'], images=[...] # A list of numpy arrays representing images ) ``` @@ -76,7 +75,6 @@ However, you can use Chroma in combination with data stored elsewhere, by adding ```python collection.add( - ids=['id1', 'id2', 'id3'], uris=[...] # A list of strings representing URIs to data ) ``` @@ -85,7 +83,6 @@ Since the embedding function is multi-modal, you can also add text to the same c ```python collection.add( - ids=['id4', 'id5', 'id6'], texts=["This is a document", "This is another document", "This is a third document"] ) ``` diff --git a/docs/docs.trychroma.com/pages/reference/cheatsheet.md b/docs/docs.trychroma.com/pages/reference/cheatsheet.md index 13c3bfe7fc1..74dc21750c4 100644 --- a/docs/docs.trychroma.com/pages/reference/cheatsheet.md +++ b/docs/docs.trychroma.com/pages/reference/cheatsheet.md @@ -98,21 +98,17 @@ collection.count() # add new items to a collection # either one at a time collection.add( - embeddings=[1.5, 2.9, 3.4], metadatas={"uri": "img9.png", "style": "style1"}, documents="doc1000101", - ids="uri9", ) # or many, up to 100k+! collection.add( embeddings=[[1.5, 2.9, 3.4], [9.8, 2.3, 2.9]], - metadatas=[{"style": "style1"}, {"style": "style2"}], - ids=["uri9", "uri10"], + metadatas=[{"style": "style1"}, {"style": "style2"}] ) collection.add( documents=["doc1000101", "doc288822"], - metadatas=[{"style": "style1"}, {"style": "style2"}], - ids=["uri9", "uri10"], + metadatas=[{"style": "style1"}, {"style": "style2"}] ) # update items in a collection @@ -200,21 +196,18 @@ await collection.count() // add new items to a collection // either one at a time await collection.add({ - ids: "id1", embeddings: [1.5, 2.9, 3.4], metadatas: {"source": "my_source"}, documents: "This is a document", }) // or many, up to 100k+! await collection.add({ - ids: ["uri9", "uri10"], embeddings: [[1.5, 2.9, 3.4], [9.8, 2.3, 2.9]], metadatas: [{"style": "style1"}, {"style": "style2"}], documents: ["This is a document", 'that is a document'] }) // including just documents await collection.add({ - ids: ["uri9", "uri10"], metadatas: [{"style": "style1"}, {"style": "style2"}], documents: ["doc1000101", "doc288822"], }) diff --git a/docs/docs.trychroma.com/pages/reference/js-collection.md b/docs/docs.trychroma.com/pages/reference/js-collection.md index 6533c04490f..1747aa309bb 100644 --- a/docs/docs.trychroma.com/pages/reference/js-collection.md +++ b/docs/docs.trychroma.com/pages/reference/js-collection.md @@ -28,7 +28,7 @@ ___ ### add -▸ **add**(`params`): `Promise`\<`void`\> +▸ **add**(`params`): `Promise`\<`AddResponse`\> Add items to the collection @@ -40,7 +40,7 @@ Add items to the collection #### Returns -`Promise`\<`void`\> +`Promise`\<`AddResponse`\> - The response from the API. True if successful. @@ -294,7 +294,7 @@ Upsert items to the collection | Name | Type | Description | | :------ | :------ | :------ | -| `params` | `AddRecordsParams` | The parameters for the query. | +| `params` | `UpsertRecordsParams` | The parameters for the query. | #### Returns diff --git a/docs/docs.trychroma.com/pages/reference/py-client.md b/docs/docs.trychroma.com/pages/reference/py-client.md index f8b5d46941b..cf025688f70 100644 --- a/docs/docs.trychroma.com/pages/reference/py-client.md +++ b/docs/docs.trychroma.com/pages/reference/py-client.md @@ -3,7 +3,7 @@ title: Client --- -## configure +#### configure ```python def configure(**kwargs) -> None @@ -11,7 +11,7 @@ def configure(**kwargs) -> None Override Chroma's default settings, environment variables or .env files -## EphemeralClient +#### EphemeralClient ```python def EphemeralClient(settings: Optional[Settings] = None, @@ -27,7 +27,7 @@ development, but not recommended for production use. - `tenant` - The tenant to use for this client. Defaults to the default tenant. - `database` - The database to use for this client. Defaults to the default database. -## PersistentClient +#### PersistentClient ```python def PersistentClient(path: str = "./chroma", @@ -45,7 +45,7 @@ testing and development, but not recommended for production use. - `tenant` - The tenant to use for this client. Defaults to the default tenant. - `database` - The database to use for this client. Defaults to the default database. -## HttpClient +#### HttpClient ```python def HttpClient(host: str = "localhost", @@ -71,7 +71,7 @@ use Chroma in production. - `tenant` - The tenant to use for this client. Defaults to the default tenant. - `database` - The database to use for this client. Defaults to the default database. -## AsyncHttpClient +#### AsyncHttpClient ```python async def AsyncHttpClient(host: str = "localhost", @@ -97,7 +97,7 @@ use Chroma in production. - `tenant` - The tenant to use for this client. Defaults to the default tenant. - `database` - The database to use for this client. Defaults to the default database. -## CloudClient +#### CloudClient ```python def CloudClient(tenant: str, @@ -118,7 +118,7 @@ Creates a client to connect to a tennant and database on the Chroma cloud. - `database` - The database to use for this client. - `api_key` - The api key to use for this client. -## Client +#### Client ```python def Client(settings: Settings = __settings, @@ -131,7 +131,7 @@ Return a running chroma.API instance tenant: The tenant to use for this client. Defaults to the default tenant. database: The database to use for this client. Defaults to the default database. -## AdminClient +#### AdminClient ```python def AdminClient(settings: Settings = Settings()) -> AdminAPI @@ -140,15 +140,16 @@ def AdminClient(settings: Settings = Settings()) -> AdminAPI Creates an admin client that can be used to create tenants and databases. -# BaseClient Methods +## BaseAPI Objects ```python class BaseAPI(ABC) ``` -## heartbeat +#### heartbeat ```python +@abstractmethod def heartbeat() -> int ``` @@ -159,9 +160,10 @@ Used to check if the server is alive. - `int` - The current time in nanoseconds since epoch -## count\_collections +#### count\_collections ```python +@abstractmethod def count_collections() -> int ``` @@ -179,9 +181,10 @@ Count the number of collections. # 1 ``` -## delete\_collection +#### delete\_collection ```python +@abstractmethod def delete_collection(name: str) -> None ``` @@ -203,9 +206,10 @@ Delete a collection with the given name. client.delete_collection("my_collection") ``` -## reset +#### reset ```python +@abstractmethod def reset() -> bool ``` @@ -215,9 +219,10 @@ Resets the database. This will delete all collections and entries. - `bool` - True if the database was reset successfully. -## get\_version +#### get\_version ```python +@abstractmethod def get_version() -> str ``` @@ -227,9 +232,10 @@ Get the version of Chroma. - `str` - The version of Chroma -## get\_settings +#### get\_settings ```python +@abstractmethod def get_settings() -> Settings ``` @@ -239,23 +245,25 @@ Get the settings used to initialize. - `Settings` - The settings used to initialize. -## get\_max\_batch\_size +#### get\_max\_batch\_size ```python +@abstractmethod def get_max_batch_size() -> int ``` Return the maximum number of records that can be created or mutated in a single call. -# ClientClient Methods +## ClientAPI Objects ```python class ClientAPI(BaseAPI, ABC) ``` -## list\_collections +#### list\_collections ```python +@abstractmethod def list_collections(limit: Optional[int] = None, offset: Optional[int] = None) -> Sequence[Collection] ``` @@ -280,9 +288,10 @@ List all collections. # [collection(name="my_collection", metadata={})] ``` -## create\_collection +#### create\_collection ```python +@abstractmethod def create_collection(name: str, configuration: Optional[CollectionConfiguration] = None, metadata: Optional[CollectionMetadata] = None, @@ -325,9 +334,10 @@ Create a new collection with the given name and metadata. # collection(name="my_collection", metadata={"foo": "bar"}) ``` -## get\_collection +#### get\_collection ```python +@abstractmethod def get_collection( name: str, id: Optional[UUID] = None, @@ -364,9 +374,10 @@ Get a collection with the given name. # collection(name="my_collection", metadata={}) ``` -## get\_or\_create\_collection +#### get\_or\_create\_collection ```python +@abstractmethod def get_or_create_collection( name: str, configuration: Optional[CollectionConfiguration] = None, @@ -401,9 +412,10 @@ Get or create a collection with the given name and metadata. # collection(name="my_collection", metadata={}) ``` -## set\_tenant +#### set\_tenant ```python +@abstractmethod def set_tenant(tenant: str, database: str = DEFAULT_DATABASE) -> None ``` @@ -415,9 +427,10 @@ database does not exist. - `tenant` - The tenant to set. - `database` - The database to set. -## set\_database +#### set\_database ```python +@abstractmethod def set_database(database: str) -> None ``` @@ -427,25 +440,27 @@ Set the database for the client. Raises an error if the database does not exist. - `database` - The database to set. -## clear\_system\_cache +#### clear\_system\_cache ```python @staticmethod +@abstractmethod def clear_system_cache() -> None ``` Clear the system cache so that new systems can be created for an existing path. This should only be used for testing purposes. -# AdminClient Methods +## AdminAPI Objects ```python class AdminAPI(ABC) ``` -## create\_database +#### create\_database ```python +@abstractmethod def create_database(name: str, tenant: str = DEFAULT_TENANT) -> None ``` @@ -455,9 +470,10 @@ Create a new database. Raises an error if the database already exists. - `database` - The name of the database to create. -## get\_database +#### get\_database ```python +@abstractmethod def get_database(name: str, tenant: str = DEFAULT_TENANT) -> Database ``` @@ -468,9 +484,10 @@ Get a database. Raises an error if the database does not exist. - `database` - The name of the database to get. - `tenant` - The tenant of the database to get. -## create\_tenant +#### create\_tenant ```python +@abstractmethod def create_tenant(name: str) -> None ``` @@ -480,9 +497,10 @@ Create a new tenant. Raises an error if the tenant already exists. - `tenant` - The name of the tenant to create. -## get\_tenant +#### get\_tenant ```python +@abstractmethod def get_tenant(name: str) -> Tenant ``` @@ -492,7 +510,7 @@ Get a tenant. Raises an error if the tenant does not exist. - `tenant` - The name of the tenant to get. -# ServerClient Methods +## ServerAPI Objects ```python class ServerAPI(BaseAPI, AdminAPI, Component) diff --git a/docs/docs.trychroma.com/pages/reference/py-collection.md b/docs/docs.trychroma.com/pages/reference/py-collection.md index 3202689e504..15150a890b9 100644 --- a/docs/docs.trychroma.com/pages/reference/py-collection.md +++ b/docs/docs.trychroma.com/pages/reference/py-collection.md @@ -2,13 +2,13 @@ title: Collection --- -# Collection Objects +## Collection Objects ```python -class Collection(BaseModel) +class Collection(CollectionCommon["ServerAPI"]) ``` -# count +#### count ```python def count() -> int @@ -20,13 +20,20 @@ The total number of embeddings added to the database - `int` - The total number of embeddings added to the database -# add +#### add ```python -def add(ids: OneOrMany[ID], - embeddings: Optional[OneOrMany[Embedding]] = None, +def add( + ids: Optional[OneOrMany[ID]] = None, + embeddings: Optional[ # type: ignore[type-arg] + Union[ + OneOrMany[Embedding], + OneOrMany[np.ndarray], + ]] = None, metadatas: Optional[OneOrMany[Metadata]] = None, - documents: Optional[OneOrMany[Document]] = None) -> None + documents: Optional[OneOrMany[Document]] = None, + images: Optional[OneOrMany[Image]] = None, + uris: Optional[OneOrMany[URI]] = None) -> AddResult ``` Add embeddings to the data store. @@ -34,9 +41,11 @@ Add embeddings to the data store. **Arguments**: - `ids` - The ids of the embeddings you wish to add -- `embeddings` - The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional. +- `embeddings` - The embeddings to add. If None, embeddings will be computed based on the documents or images using the embedding_function set for the Collection. Optional. - `metadatas` - The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional. - `documents` - The documents to associate with the embeddings. Optional. +- `images` - The images to associate with the embeddings. Optional. +- `uris` - The uris of the images to associate with the embeddings. Optional. **Returns**: @@ -49,9 +58,10 @@ Add embeddings to the data store. - `ValueError` - If you don't provide either embeddings or documents - `ValueError` - If the length of ids, embeddings, metadatas, or documents don't match - `ValueError` - If you don't provide an embedding function and don't provide embeddings -- `DuplicateIDError` - If you provide an id that already exists +- `ValueError` - If you provide both embeddings and documents +- `ValueError` - If you provide an id that already exists -# get +#### get ```python def get(ids: Optional[OneOrMany[ID]] = None, @@ -68,7 +78,7 @@ all embeddings up to limit starting at offset. **Arguments**: - `ids` - The ids of the embeddings to get. Optional. -- `where` - A Where type dict used to filter results by. E.g. `{"color" : "red", "price": 4.20}`. Optional. +- `where` - A Where type dict used to filter results by. E.g. `{"$and": [{"color" : "red"}, {"price": {"$gte": 4.20}}]}`. Optional. - `limit` - The number of documents to return. Optional. - `offset` - The offset to start returning results from. Useful for paging results with limit. Optional. - `where_document` - A WhereDocument type dict used to filter by the documents. E.g. `{$contains: {"text": "hello"}}`. Optional. @@ -79,7 +89,7 @@ all embeddings up to limit starting at offset. - `GetResult` - A GetResult object containing the results. -# peek +#### peek ```python def peek(limit: int = 10) -> GetResult @@ -96,12 +106,18 @@ Get the first few results in the database up to limit - `GetResult` - A GetResult object containing the results. -# query +#### query ```python def query( - query_embeddings: Optional[OneOrMany[Embedding]] = None, + query_embeddings: Optional[ # type: ignore[type-arg] + Union[ + OneOrMany[Embedding], + OneOrMany[np.ndarray], + ]] = None, query_texts: Optional[OneOrMany[Document]] = None, + query_images: Optional[OneOrMany[Image]] = None, + query_uris: Optional[OneOrMany[URI]] = None, n_results: int = 10, where: Optional[Where] = None, where_document: Optional[WhereDocument] = None, @@ -115,8 +131,9 @@ Get the n_results nearest neighbor embeddings for provided query_embeddings or q - `query_embeddings` - The embeddings to get the closes neighbors of. Optional. - `query_texts` - The document texts to get the closes neighbors of. Optional. +- `query_images` - The images to get the closes neighbors of. Optional. - `n_results` - The number of neighbors to return for each query_embedding or query_texts. Optional. -- `where` - A Where type dict used to filter results by. E.g. `{"color" : "red", "price": 4.20}`. Optional. +- `where` - A Where type dict used to filter results by. E.g. `{"$and": [{"color" : "red"}, {"price": {"$gte": 4.20}}]}`. Optional. - `where_document` - A WhereDocument type dict used to filter by the documents. E.g. `{$contains: {"text": "hello"}}`. Optional. - `include` - A list of what to include in the results. Can contain `"embeddings"`, `"metadatas"`, `"documents"`, `"distances"`. Ids are always included. Defaults to `["metadatas", "documents", "distances"]`. Optional. @@ -128,10 +145,12 @@ Get the n_results nearest neighbor embeddings for provided query_embeddings or q **Raises**: -- `ValueError` - If you don't provide either query_embeddings or query_texts +- `ValueError` - If you don't provide either query_embeddings, query_texts, or query_images - `ValueError` - If you provide both query_embeddings and query_texts +- `ValueError` - If you provide both query_embeddings and query_images +- `ValueError` - If you provide both query_texts and query_images -# modify +#### modify ```python def modify(name: Optional[str] = None, @@ -150,13 +169,20 @@ Modify the collection name or metadata None -# update +#### update ```python -def update(ids: OneOrMany[ID], - embeddings: Optional[OneOrMany[Embedding]] = None, - metadatas: Optional[OneOrMany[Metadata]] = None, - documents: Optional[OneOrMany[Document]] = None) -> None +def update( + ids: OneOrMany[ID], + embeddings: Optional[ # type: ignore[type-arg] + Union[ + OneOrMany[Embedding], + OneOrMany[np.ndarray], + ]] = None, + metadatas: Optional[OneOrMany[Metadata]] = None, + documents: Optional[OneOrMany[Document]] = None, + images: Optional[OneOrMany[Image]] = None, + uris: Optional[OneOrMany[URI]] = None) -> None ``` Update the embeddings, metadatas or documents for provided ids. @@ -164,22 +190,29 @@ Update the embeddings, metadatas or documents for provided ids. **Arguments**: - `ids` - The ids of the embeddings to update -- `embeddings` - The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional. +- `embeddings` - The embeddings to update. If None, embeddings will be computed based on the documents or images using the embedding_function set for the Collection. Optional. - `metadatas` - The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional. - `documents` - The documents to associate with the embeddings. Optional. - +- `images` - The images to associate with the embeddings. Optional. **Returns**: None -# upsert +#### upsert ```python -def upsert(ids: OneOrMany[ID], - embeddings: Optional[OneOrMany[Embedding]] = None, - metadatas: Optional[OneOrMany[Metadata]] = None, - documents: Optional[OneOrMany[Document]] = None) -> None +def upsert( + ids: OneOrMany[ID], + embeddings: Optional[ # type: ignore[type-arg] + Union[ + OneOrMany[Embedding], + OneOrMany[np.ndarray], + ]] = None, + metadatas: Optional[OneOrMany[Metadata]] = None, + documents: Optional[OneOrMany[Document]] = None, + images: Optional[OneOrMany[Image]] = None, + uris: Optional[OneOrMany[URI]] = None) -> None ``` Update the embeddings, metadatas or documents for provided ids, or create them if they don't exist. @@ -196,7 +229,7 @@ Update the embeddings, metadatas or documents for provided ids, or create them i None -# delete +#### delete ```python def delete(ids: Optional[IDs] = None, @@ -209,10 +242,15 @@ Delete the embeddings based on ids and/or a where filter **Arguments**: - `ids` - The ids of the embeddings to delete -- `where` - A Where type dict used to filter the delection by. E.g. `{"color" : "red", "price": 4.20}`. Optional. +- `where` - A Where type dict used to filter the delection by. E.g. `{"$and": [{"color" : "red"}, {"price": {"$gte": 4.20}]}}`. Optional. - `where_document` - A WhereDocument type dict used to filter the deletion by the document content. E.g. `{$contains: {"text": "hello"}}`. Optional. **Returns**: None + + +**Raises**: + +- `ValueError` - If you don't provide either ids, where, or where_document