Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved component model #629

Merged
merged 38 commits into from
Jun 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
da6f4e3
Introduce new component model
levand May 27, 2023
a40e9ee
change supertype of all components + fix issues
levand May 28, 2023
8e6e5a6
add graphlib backport for old python versions
levand May 28, 2023
ec4bdd3
typing tweaks to workaround Overrides+py3.7 incompatibilites
levand May 28, 2023
eee234f
increase reliability & visibility of server fixture
levand May 28, 2023
e8941d8
allow adding components to a running system
levand May 28, 2023
6f5a6f4
Fix error in tests
levand May 28, 2023
ae1a95e
add graphlib_backport to pyproject.toml
levand May 31, 2023
345aa31
add debug via ssh for github runner
HammadB May 31, 2023
02a3961
I suspect that the port is causing issues, add this for debugging on …
HammadB May 31, 2023
4c52d0e
I suspect that the port is causing issues, add this for debugging on …
HammadB May 31, 2023
e88adcb
Testing if the issue is python version-dependent
HammadB May 31, 2023
4b05aca
Testing if the issue is python version-dependent
HammadB May 31, 2023
86a592c
Move directory out of function definition
HammadB May 31, 2023
aa49ccb
make persist random (shouldn't matter). redirect stdout for process
HammadB May 31, 2023
413e445
Handle generic exception
HammadB Jun 1, 2023
9cd28a7
handle minimal repro
HammadB Jun 1, 2023
93b3a38
Fix stdout redirect
HammadB Jun 1, 2023
ba88c99
Make uvicorn verbose
HammadB Jun 1, 2023
7047051
revert fixture
HammadB Jun 1, 2023
5ac159a
address PR comments
HammadB Jun 1, 2023
6cbf2e8
Only run fastapi, add pdb, preserve stdin for pdb
HammadB Jun 1, 2023
cdacec8
Hide pdb for a moment to see issue in only test_api.py
HammadB Jun 1, 2023
2812401
Add fixtures back in, somehow removing them made tests pass
HammadB Jun 1, 2023
91646c8
Try just [duckdb, fastapi]
HammadB Jun 1, 2023
0a06fab
Try just [duckdb_parquet, fastapi]
HammadB Jun 1, 2023
c38fab1
Try all [duckdb,duckdb_parquet, fastapi]
HammadB Jun 1, 2023
3cd8ba7
Try closing duckdb conns
HammadB Jun 1, 2023
8502d32
throwing paint at the wall -> try unique persist_dirs
HammadB Jun 1, 2023
0cfc023
Try removing uuid call..will this work?
HammadB Jun 1, 2023
893ed36
fixed uuid
HammadB Jun 1, 2023
e5eb07b
I know nothing.
HammadB Jun 1, 2023
8eb18e1
I truly know nothing.
HammadB Jun 1, 2023
a25ee39
spawn it
HammadB Jun 1, 2023
fde3eb6
actually spawn in
HammadB Jun 1, 2023
6d4f210
Revert debugging
HammadB Jun 1, 2023
7130974
Use spawn instead of fork
HammadB Jun 1, 2023
4b482be
merge in main
HammadB Jun 1, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ repos:
rev: 'v1.2.0'
hooks:
- id: mypy
args: [--strict, --ignore-missing-imports, --follow-imports=silent]
args: [--strict, --ignore-missing-imports, --follow-imports=silent, --disable-error-code=type-abstract]
additional_dependencies: ["types-requests", "pydantic", "overrides", "hypothesis", "pytest"]
3 changes: 2 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"--ignore-missing-imports",
"--show-column-numbers",
"--no-pretty",
"--strict"
"--strict",
"--disable-error-code=type-abstract"
]
}
11 changes: 7 additions & 4 deletions chromadb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import chromadb.config
import logging
from chromadb.telemetry.events import ClientStartEvent
from chromadb.telemetry import Telemetry
from chromadb.config import Settings, System
from chromadb.api import API

Expand All @@ -22,14 +23,16 @@ def get_settings() -> Settings:


def Client(settings: Settings = __settings) -> API:
"""Return a chroma.API instance based on the provided or environmental
settings, optionally overriding the DB instance."""
"""Return a running chroma.API instance"""

system = System(settings)

telemetry_client = system.get_telemetry()
telemetry_client = system.instance(Telemetry)
api = system.instance(API)

system.start()

# Submit event for client start
telemetry_client.capture(ClientStartEvent())

return system.get_api()
return api
8 changes: 2 additions & 6 deletions chromadb/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,11 @@
GetResult,
WhereDocument,
)
from chromadb.config import Component
import chromadb.utils.embedding_functions as ef
from chromadb.telemetry import Telemetry


class API(ABC):
@abstractmethod
def __init__(self, telemetry_client: Telemetry):
pass

class API(Component, ABC):
@abstractmethod
def heartbeat(self) -> int:
"""Returns the current server time in nanoseconds to check if the server is alive
Expand Down
29 changes: 26 additions & 3 deletions chromadb/api/fastapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,27 @@
from chromadb.api.models.Collection import Collection
import chromadb.errors as errors
from uuid import UUID
from chromadb.telemetry import Telemetry
from overrides import override


class FastAPI(API):
def __init__(self, system: System):
super().__init__(system)
url_prefix = "https" if system.settings.chroma_server_ssl_enabled else "http"
system.settings.require("chroma_server_host")
system.settings.require("chroma_server_http_port")
self._api_url = f"{url_prefix}://{system.settings.chroma_server_host}:{system.settings.chroma_server_http_port}/api/v1"
self._telemetry_client = system.get_telemetry()
self._telemetry_client = self.require(Telemetry)

@override
def heartbeat(self) -> int:
"""Returns the current server time in nanoseconds to check if the server is alive"""
resp = requests.get(self._api_url)
raise_chroma_error(resp)
return int(resp.json()["nanosecond heartbeat"])

@override
def list_collections(self) -> Sequence[Collection]:
"""Returns a list of all collections"""
resp = requests.get(self._api_url + "/collections")
Expand All @@ -49,6 +54,7 @@ def list_collections(self) -> Sequence[Collection]:

return collections

@override
def create_collection(
self,
name: str,
Expand All @@ -73,6 +79,7 @@ def create_collection(
metadata=resp_json["metadata"],
)

@override
def get_collection(
self,
name: str,
Expand All @@ -90,6 +97,7 @@ def get_collection(
metadata=resp_json["metadata"],
)

@override
def get_or_create_collection(
self,
name: str,
Expand All @@ -102,6 +110,7 @@ def get_or_create_collection(
name, metadata, embedding_function, get_or_create=True
)

@override
def _modify(
self,
id: UUID,
Expand All @@ -115,11 +124,13 @@ def _modify(
)
raise_chroma_error(resp)

@override
def delete_collection(self, name: str) -> None:
"""Deletes a collection"""
resp = requests.delete(self._api_url + "/collections/" + name)
raise_chroma_error(resp)

@override
def _count(self, collection_id: UUID) -> int:
"""Returns the number of embeddings in the database"""
resp = requests.get(
Expand All @@ -128,13 +139,15 @@ def _count(self, collection_id: UUID) -> int:
raise_chroma_error(resp)
return cast(int, resp.json())

def _peek(self, collection_id: UUID, limit: int = 10) -> GetResult:
@override
def _peek(self, collection_id: UUID, n: int = 10) -> GetResult:
return self._get(
collection_id,
limit=limit,
limit=n,
include=["embeddings", "documents", "metadatas"],
)

@override
def _get(
self,
collection_id: UUID,
Expand Down Expand Up @@ -177,6 +190,7 @@ def _get(
documents=body.get("documents", None),
)

@override
def _delete(
self,
collection_id: UUID,
Expand All @@ -196,6 +210,7 @@ def _delete(
raise_chroma_error(resp)
return cast(IDs, resp.json())

@override
def _add(
self,
ids: IDs,
Expand Down Expand Up @@ -227,6 +242,7 @@ def _add(
raise_chroma_error(resp)
return True

@override
def _update(
self,
collection_id: UUID,
Expand Down Expand Up @@ -255,6 +271,7 @@ def _update(
resp.raise_for_status()
return True

@override
def _upsert(
self,
collection_id: UUID,
Expand Down Expand Up @@ -285,6 +302,7 @@ def _upsert(
resp.raise_for_status()
return True

@override
def _query(
self,
collection_id: UUID,
Expand Down Expand Up @@ -320,18 +338,21 @@ def _query(
documents=body.get("documents", None),
)

@override
def reset(self) -> bool:
"""Resets the database"""
resp = requests.post(self._api_url + "/reset")
raise_chroma_error(resp)
return cast(bool, resp.json())

@override
def persist(self) -> bool:
"""Persists the database"""
resp = requests.post(self._api_url + "/persist")
raise_chroma_error(resp)
return cast(bool, resp.json())

@override
def raw_sql(self, sql: str) -> pd.DataFrame:
"""Runs a raw SQL query against the database"""
resp = requests.post(
Expand All @@ -340,6 +361,7 @@ def raw_sql(self, sql: str) -> pd.DataFrame:
raise_chroma_error(resp)
return pd.DataFrame.from_dict(resp.json())

@override
def create_index(self, collection_name: str) -> bool:
"""Creates an index for the given space key"""
resp = requests.post(
Expand All @@ -348,6 +370,7 @@ def create_index(self, collection_name: str) -> bool:
raise_chroma_error(resp)
return cast(bool, resp.json())

@override
def get_version(self) -> str:
"""Returns the version of the server"""
resp = requests.get(self._api_url + "/version")
Expand Down
Loading