From 6cd05f8b30ec90cd1e458955572a13315dfae00d Mon Sep 17 00:00:00 2001 From: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Date: Mon, 27 Mar 2023 16:17:17 +0800 Subject: [PATCH] feat: implement push/pull interface from JAC, file and s3 (#1182) * refactor: move streaming serialization into separate method Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: add binary io like protocol definition Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: ported push pull to JAC Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: protocol is not in 3.7 typing Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: make mypy happy Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: patch missing waterfall Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: jit import backends Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: implement cache in jinaai pull Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: add hubble dependency to jina group Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: better division of concerns Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: add concept of namespace Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: ignore missing hubble stubs Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: streaming protocol stubs Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: make more general buffered caching reader Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: add tests for hubble pushpull Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: add tests for file backend Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: remove hubble dependency from jina group This reverts commit b3044213d58517becb9d71194af34f3833560ebc. Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: implement push pull for local filesystem Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: test concurrent pushes and pulls in file protocol Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: resolve concurrent pushes and pulls correctly Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: rename text to textdoc Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: added some logging Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: s3 tests Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: s3 pushpull Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: add smart open dependency Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: add smart opens silly python bound Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: update hubble tests (failing) Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: fix delete return in hubble pushpull Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * Revert "fix: add smart open dependency" This reverts commit cf78c6cc6d2b367501d2358c18773a456426a448. This reverts commit eb0e52b4c521f2b638bf5de850701546a4996bc3. Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: add hubble and smart open dependencies Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: mypy fixes Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * ci: allow tests to see jina auth token Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: add progress bars for streaming Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * style: blacken Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: buffer writes to s3 Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: mypy no like sequence Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: make progress bar quieter when disabled Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: skip failing tests Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: add tables when listing Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: add jina auth token to uncaped test Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: mock s3 tests with minio container Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: silly error that cost me 2 hours of life Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: use tolerance ratio in file tests Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: add caching to s3 pull Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: add log messages for unused parameters Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: take out unneeded buffering smart open already buffers Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: pick fastest protocol compression configuration for s3 Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: bump tolerance ratio for s3 test Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: reduce code duplication Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: put reader chunk size constant at top of file Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: reduce reader chunk size for memory tests Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: multipart uploads get stuck frequently lets just do big uploads for now... Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * docs: add docstrings to mixin and file backend Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * docs: add docstring for s3 and hubble backends Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: remove unused test Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: use literal in protocol Co-authored-by: samsja <55492238+samsja@users.noreply.github.com> Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: protocols dont need to be inherited Co-authored-by: samsja <55492238+samsja@users.noreply.github.com> Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: add make mypy happy with the literals Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: literals not in 3.7 Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: move mixin out of init file Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: move cache path resolution to utils Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: cache path is only evaluated once Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: loading backends makes more sense as debug log Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * tests: add slow and internet marks Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: pin image tag Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: use abc instead of protocol for typing backends Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: revert - add hubble and smart open dependencies This reverts commit 1d1d2eeaf2b51be6ef00e6ab6ee5b9fd1bcf1d92. Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: add hubble and aws dependencies Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: change all push pull mixin methods to class methods Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: misstyped class method self reference Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: rename pushpull to docstore and use more classmethods Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: separate remote backend implementations from mixin Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: missed import refactor Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: change submodule name to store Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: remove list and delete from mixin Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * tests: clear all the garbage in ci account Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * tests: skip test that is broken on ci Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: standardize naming to jac Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> --------- Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Co-authored-by: samsja <55492238+samsja@users.noreply.github.com> --- .github/workflows/ci.yml | 6 +- docarray/array/array/array.py | 3 +- docarray/array/array/io.py | 128 ++-- docarray/array/array/pushpull.py | 180 ++++++ docarray/store/__init__.py | 5 + docarray/store/abstract_doc_store.py | 104 +++ docarray/store/exceptions.py | 4 + docarray/store/file.py | 194 ++++++ docarray/store/helpers.py | 207 ++++++ docarray/store/jac.py | 360 +++++++++++ docarray/store/s3.py | 239 +++++++ docarray/utils/cache.py | 17 + docarray/utils/progress_bar.py | 23 +- poetry.lock | 659 +++++++++++++++++++- pyproject.toml | 22 +- tests/integrations/store/__init__.py | 32 + tests/integrations/store/docker-compose.yml | 8 + tests/integrations/store/test_file.py | 263 ++++++++ tests/integrations/store/test_jac.py | 254 ++++++++ tests/integrations/store/test_s3.py | 265 ++++++++ 20 files changed, 2897 insertions(+), 76 deletions(-) create mode 100644 docarray/array/array/pushpull.py create mode 100644 docarray/store/__init__.py create mode 100644 docarray/store/abstract_doc_store.py create mode 100644 docarray/store/exceptions.py create mode 100644 docarray/store/file.py create mode 100644 docarray/store/helpers.py create mode 100644 docarray/store/jac.py create mode 100644 docarray/store/s3.py create mode 100644 docarray/utils/cache.py create mode 100644 tests/integrations/store/__init__.py create mode 100644 tests/integrations/store/docker-compose.yml create mode 100644 tests/integrations/store/test_file.py create mode 100644 tests/integrations/store/test_jac.py create mode 100644 tests/integrations/store/test_s3.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 722755aa6a0..d39a6bec70d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -119,8 +119,8 @@ jobs: run: | poetry run pytest -m "not (tensorflow or benchmark or index)" ${{ matrix.test-path }} timeout-minutes: 30 -# env: -# JINA_AUTH_TOKEN: "${{ secrets.JINA_AUTH_TOKEN }}" + env: + JINA_AUTH_TOKEN: "${{ secrets.JINA_AUTH_TOKEN }}" # - name: Check codecov file # id: check_files # uses: andstor/file-existence-action@v1 @@ -164,6 +164,8 @@ jobs: run: | poetry run pytest -m "not (tensorflow or benchmark)" ${{ matrix.test-path }} timeout-minutes: 30 + env: + JINA_AUTH_TOKEN: "${{ secrets.JINA_AUTH_TOKEN }}" docarray-test-proto3: diff --git a/docarray/array/array/array.py b/docarray/array/array/array.py index 4d5a720421e..a699859b595 100644 --- a/docarray/array/array/array.py +++ b/docarray/array/array/array.py @@ -19,6 +19,7 @@ from docarray.array.abstract_array import AnyDocumentArray from docarray.array.array.io import IOMixinArray +from docarray.array.array.pushpull import PushPullMixin from docarray.array.array.sequence_indexing_mixin import ( IndexingSequenceMixin, IndexIterType, @@ -57,7 +58,7 @@ def _delegate_meth(self, *args, **kwargs): class DocumentArray( - IndexingSequenceMixin[T_doc], IOMixinArray, AnyDocumentArray[T_doc] + IndexingSequenceMixin[T_doc], PushPullMixin, IOMixinArray, AnyDocumentArray[T_doc] ): """ DocumentArray is a container of Documents. diff --git a/docarray/array/array/io.py b/docarray/array/array/io.py index ef674639ab3..22b3321810b 100644 --- a/docarray/array/array/io.py +++ b/docarray/array/array/io.py @@ -16,6 +16,7 @@ Dict, Generator, Iterable, + Iterator, List, Optional, Tuple, @@ -177,37 +178,60 @@ def _write_bytes( elif protocol == 'pickle-array': f.write(pickle.dumps(self)) elif protocol in SINGLE_PROTOCOLS: - from rich import filesize - - from docarray.utils.progress_bar import _get_progressbar - - pbar, t = _get_progressbar( - 'Serializing', disable=not show_progress, total=len(self) - ) - - f.write(self._stream_header) - - with pbar: - _total_size = 0 - pbar.start_task(t) - for doc in self: - doc_bytes = doc.to_bytes(protocol=protocol, compress=compress) - len_doc_as_bytes = len(doc_bytes).to_bytes( - 4, 'big', signed=False - ) - all_bytes = len_doc_as_bytes + doc_bytes - f.write(all_bytes) - _total_size += len(all_bytes) - pbar.update( - t, - advance=1, - total_size=str(filesize.decimal(_total_size)), + f.write( + b''.join( + self.to_binary_stream( + protocol=protocol, + compress=compress, + show_progress=show_progress, ) + ) + ) else: raise ValueError( f'protocol={protocol} is not supported. Can be only {ALLOWED_PROTOCOLS}.' ) + def to_binary_stream( + self, + protocol: str = 'protobuf', + compress: Optional[str] = None, + show_progress: bool = False, + ) -> Iterator[bytes]: + from rich import filesize + + if show_progress: + from docarray.utils.progress_bar import _get_progressbar + + pbar, t = _get_progressbar( + 'Serializing', disable=not show_progress, total=len(self) + ) + else: + from contextlib import nullcontext + + pbar = nullcontext() + + yield self._stream_header + + with pbar: + if show_progress: + _total_size = 0 + pbar.start_task(t) + for doc in self: + doc_bytes = doc.to_bytes(protocol=protocol, compress=compress) + len_doc_as_bytes = len(doc_bytes).to_bytes(4, 'big', signed=False) + all_bytes = len_doc_as_bytes + doc_bytes + + yield all_bytes + + if show_progress: + _total_size += len(all_bytes) + pbar.update( + t, + advance=1, + total_size=str(filesize.decimal(_total_size)), + ) + def to_bytes( self, protocol: str = 'protobuf-array', @@ -584,7 +608,7 @@ def _load_binary_all( def _load_binary_stream( cls: Type[T], file_ctx: ContextManager[io.BufferedReader], - protocol: Optional[str] = None, + protocol: str = 'protobuf', compress: Optional[str] = None, show_progress: bool = False, ) -> Generator['BaseDocument', None, None]: @@ -598,37 +622,43 @@ def _load_binary_stream( from rich import filesize - from docarray import BaseDocument - from docarray.utils.progress_bar import _get_progressbar - with file_ctx as f: version_numdocs_lendoc0 = f.read(9) # 1 byte (uint8) # 8 bytes (uint64) num_docs = int.from_bytes(version_numdocs_lendoc0[1:9], 'big', signed=False) - pbar, t = _get_progressbar( - 'Deserializing', disable=not show_progress, total=num_docs - ) + if show_progress: + from docarray.utils.progress_bar import _get_progressbar + + pbar, t = _get_progressbar( + 'Deserializing', disable=not show_progress, total=num_docs + ) + else: + from contextlib import nullcontext + + pbar = nullcontext() with pbar: - _total_size = 0 - pbar.start_task(t) + if show_progress: + _total_size = 0 + pbar.start_task(t) for _ in range(num_docs): # 4 bytes (uint32) len_current_doc_in_bytes = int.from_bytes( f.read(4), 'big', signed=False ) - _total_size += len_current_doc_in_bytes - load_protocol: str = protocol or 'protobuf' - yield BaseDocument.from_bytes( + load_protocol: str = protocol + yield cls.document_type.from_bytes( f.read(len_current_doc_in_bytes), protocol=load_protocol, compress=compress, ) - pbar.update( - t, advance=1, total_size=str(filesize.decimal(_total_size)) - ) + if show_progress: + _total_size += len_current_doc_in_bytes + pbar.update( + t, advance=1, total_size=str(filesize.decimal(_total_size)) + ) @classmethod def load_binary( @@ -670,12 +700,18 @@ def load_binary( else: raise FileNotFoundError(f'cannot find file {file}') if streaming: - return cls._load_binary_stream( - file_ctx, - protocol=load_protocol, - compress=load_compress, - show_progress=show_progress, - ) + if load_protocol not in SINGLE_PROTOCOLS: + raise ValueError( + f'`streaming` is only available when using {" or ".join(map(lambda x: f"`{x}`", SINGLE_PROTOCOLS))} as protocol, ' + f'got {load_protocol}' + ) + else: + return cls._load_binary_stream( + file_ctx, + protocol=load_protocol, + compress=load_compress, + show_progress=show_progress, + ) else: return cls._load_binary_all( file_ctx, load_protocol, load_compress, show_progress diff --git a/docarray/array/array/pushpull.py b/docarray/array/array/pushpull.py new file mode 100644 index 00000000000..e93fc4afec1 --- /dev/null +++ b/docarray/array/array/pushpull.py @@ -0,0 +1,180 @@ +import logging +from abc import abstractmethod +from typing import ( + TYPE_CHECKING, + Dict, + Iterable, + Iterator, + Optional, + Tuple, + Type, + TypeVar, + cast, +) + +from typing_extensions import Literal +from typing_inspect import get_args + +PUSH_PULL_PROTOCOL = Literal['jac', 's3', 'file'] +SUPPORTED_PUSH_PULL_PROTOCOLS = get_args(PUSH_PULL_PROTOCOL) + +if TYPE_CHECKING: # pragma: no cover + from docarray import BaseDocument, DocumentArray + from docarray.store.abstract_doc_store import AbstractDocStore + + +SelfPushPullMixin = TypeVar('SelfPushPullMixin', bound='PushPullMixin') + + +class PushPullMixin(Iterable['BaseDocument']): + """Mixin class for push/pull functionality.""" + + __backends__: Dict[str, Type['AbstractDocStore']] = {} + document_type: Type['BaseDocument'] + + @abstractmethod + def __len__(self) -> int: + ... + + @staticmethod + def resolve_url(url: str) -> Tuple[PUSH_PULL_PROTOCOL, str]: + """Resolve the URL to the correct protocol and name.""" + protocol, name = url.split('://', 2) + if protocol in SUPPORTED_PUSH_PULL_PROTOCOLS: + protocol = cast(PUSH_PULL_PROTOCOL, protocol) + return protocol, name + else: + raise ValueError(f'Unsupported protocol {protocol}') + + @classmethod + def get_pushpull_backend( + cls: Type[SelfPushPullMixin], protocol: PUSH_PULL_PROTOCOL + ) -> Type['AbstractDocStore']: + """ + Get the backend for the given protocol. + + :param protocol: the protocol to use, e.g. 'jac', 'file', 's3' + :return: the backend class + """ + if protocol in cls.__backends__: + return cls.__backends__[protocol] + + if protocol == 'jac': + from docarray.store.jac import JACDocStore + + cls.__backends__[protocol] = JACDocStore + logging.debug('Loaded Jina AI Cloud backend') + elif protocol == 'file': + from docarray.store.file import FileDocStore + + cls.__backends__[protocol] = FileDocStore + logging.debug('Loaded Local Filesystem backend') + elif protocol == 's3': + from docarray.store.s3 import S3DocStore + + cls.__backends__[protocol] = S3DocStore + logging.debug('Loaded S3 backend') + else: + raise NotImplementedError(f'protocol {protocol} not supported') + + return cls.__backends__[protocol] + + def push( + self, + url: str, + public: bool = True, + show_progress: bool = False, + branding: Optional[Dict] = None, + ) -> Dict: + """Push this DocumentArray object to the specified url. + + :param url: url specifying the protocol and save name of the DocumentArray. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name`` + :param public: Only used by ``jac`` protocol. If true, anyone can pull a DocumentArray if they know its name. + Setting this to false will restrict access to only the creator. + :param show_progress: If true, a progress bar will be displayed. + :param branding: Only used by ``jac`` protocol. A dictionary of branding information to be sent to Jina AI Cloud. {"icon": "emoji", "background": "#fff"} + """ + logging.info(f'Pushing {len(self)} docs to {url}') + protocol, name = self.__class__.resolve_url(url) + return self.__class__.get_pushpull_backend(protocol).push( + self, name, public, show_progress, branding # type: ignore + ) + + @classmethod + def push_stream( + cls: Type[SelfPushPullMixin], + docs: Iterator['BaseDocument'], + url: str, + public: bool = True, + show_progress: bool = False, + branding: Optional[Dict] = None, + ) -> Dict: + """Push a stream of documents to the specified url. + + :param docs: a stream of documents + :param url: url specifying the protocol and save name of the DocumentArray. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name`` + :param public: Only used by ``jac`` protocol. If true, anyone can pull a DocumentArray if they know its name. + :param show_progress: If true, a progress bar will be displayed. + :param branding: Only used by ``jac`` protocol. A dictionary of branding information to be sent to Jina AI Cloud. {"icon": "emoji", "background": "#fff"} + """ + logging.info(f'Pushing stream to {url}') + protocol, name = cls.resolve_url(url) + return cls.get_pushpull_backend(protocol).push_stream( + docs, name, public, show_progress, branding + ) + + @classmethod + def pull( + cls: Type[SelfPushPullMixin], + url: str, + show_progress: bool = False, + local_cache: bool = True, + ) -> 'DocumentArray': + """Pull a :class:`DocumentArray` from the specified url. + + :param url: url specifying the protocol and save name of the DocumentArray. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name`` + :param show_progress: if true, display a progress bar. + :param local_cache: store the downloaded DocumentArray to local folder + :return: a :class:`DocumentArray` object + """ + from docarray.base_document import AnyDocument + + if cls.document_type == AnyDocument: + raise TypeError( + 'There is no document schema defined. ' + 'Please specify the DocumentArray\'s Document type using `DocumentArray[MyDoc]`.' + ) + + logging.info(f'Pulling {url}') + protocol, name = cls.resolve_url(url) + return cls.get_pushpull_backend(protocol).pull( + cls, name, show_progress, local_cache # type: ignore + ) + + @classmethod + def pull_stream( + cls: Type[SelfPushPullMixin], + url: str, + show_progress: bool = False, + local_cache: bool = False, + ) -> Iterator['BaseDocument']: + """Pull a stream of Documents from the specified url. + + :param url: url specifying the protocol and save name of the DocumentArray. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name`` + :param show_progress: if true, display a progress bar. + :param local_cache: store the downloaded DocumentArray to local folder + :return: Iterator of Documents + """ + from docarray.base_document import AnyDocument + + if cls.document_type == AnyDocument: + raise TypeError( + 'There is no document schema defined. ' + 'Please specify the DocumentArray\'s Document type using `DocumentArray[MyDoc]`.' + ) + + logging.info(f'Pulling Document stream from {url}') + protocol, name = cls.resolve_url(url) + return cls.get_pushpull_backend(protocol).pull_stream( + cls, name, show_progress, local_cache # type: ignore + ) diff --git a/docarray/store/__init__.py b/docarray/store/__init__.py new file mode 100644 index 00000000000..20eb3af1a51 --- /dev/null +++ b/docarray/store/__init__.py @@ -0,0 +1,5 @@ +from docarray.store.file import FileDocStore +from docarray.store.jac import JACDocStore +from docarray.store.s3 import S3DocStore + +__all__ = ['JACDocStore', 'FileDocStore', 'S3DocStore'] diff --git a/docarray/store/abstract_doc_store.py b/docarray/store/abstract_doc_store.py new file mode 100644 index 00000000000..cf212ceada4 --- /dev/null +++ b/docarray/store/abstract_doc_store.py @@ -0,0 +1,104 @@ +from abc import ABC, abstractmethod +from typing import Dict, Iterator, List, Optional, Type + +from typing_extensions import TYPE_CHECKING + +if TYPE_CHECKING: + from docarray import BaseDocument, DocumentArray + + +class AbstractDocStore(ABC): + @staticmethod + @abstractmethod + def list(namespace: str, show_table: bool) -> List[str]: + """List all DocumentArrays in the specified backend at the namespace. + + :param namespace: The namespace to list + :param show_table: If true, a table is printed to the console + :return: A list of DocumentArray names + """ + ... + + @staticmethod + @abstractmethod + def delete(name: str, missing_ok: bool) -> bool: + """Delete the DocumentArray object at the specified name + + :param name: The name of the DocumentArray to delete + :param missing_ok: If true, no error will be raised if the DocumentArray does not exist. + :return: True if the DocumentArray was deleted, False if it did not exist. + """ + ... + + @staticmethod + @abstractmethod + def push( + da: 'DocumentArray', + name: str, + public: bool, + show_progress: bool, + branding: Optional[Dict], + ) -> Dict: + """Push this DocumentArray to the specified name. + + :param da: The DocumentArray to push + :param name: The name to push to + :param public: Whether the DocumentArray should be publicly accessible + :param show_progress: If true, a progress bar will be displayed. + :param branding: Branding information to be stored with the DocumentArray + """ + ... + + @staticmethod + @abstractmethod + def push_stream( + docs: Iterator['BaseDocument'], + url: str, + public: bool = True, + show_progress: bool = False, + branding: Optional[Dict] = None, + ) -> Dict: + """Push a stream of documents to the specified name. + + :param docs: a stream of documents + :param url: The name to push to + :param public: Whether the DocumentArray should be publicly accessible + :param show_progress: If true, a progress bar will be displayed. + :param branding: Branding information to be stored with the DocumentArray + """ + ... + + @staticmethod + @abstractmethod + def pull( + da_cls: Type['DocumentArray'], + name: str, + show_progress: bool, + local_cache: bool, + ) -> 'DocumentArray': + """Pull a DocumentArray from the specified name. + + :param da_cls: The DocumentArray class to instantiate + :param name: The name to pull from + :param show_progress: If true, a progress bar will be displayed. + :param local_cache: If true, the DocumentArray will be cached locally + :return: A DocumentArray + """ + ... + + @staticmethod + @abstractmethod + def pull_stream( + da_cls: Type['DocumentArray'], + name: str, + show_progress: bool, + local_cache: bool, + ) -> Iterator['BaseDocument']: + """Pull a stream of documents from the specified name. + + :param da_cls: The DocumentArray class to instantiate + :param name: The name to pull from + :param show_progress: If true, a progress bar will be displayed. + :param local_cache: If true, the DocumentArray will be cached locally + :return: An iterator of documents""" + ... diff --git a/docarray/store/exceptions.py b/docarray/store/exceptions.py new file mode 100644 index 00000000000..9caf0d8a167 --- /dev/null +++ b/docarray/store/exceptions.py @@ -0,0 +1,4 @@ +class ConcurrentPushException(Exception): + """Exception raised when a concurrent push is detected.""" + + pass diff --git a/docarray/store/file.py b/docarray/store/file.py new file mode 100644 index 00000000000..287acbd8cc3 --- /dev/null +++ b/docarray/store/file.py @@ -0,0 +1,194 @@ +import logging +from pathlib import Path +from typing import Dict, Iterator, List, Optional, Type, TypeVar + +from typing_extensions import TYPE_CHECKING + +from docarray.store.abstract_doc_store import AbstractDocStore +from docarray.store.exceptions import ConcurrentPushException +from docarray.store.helpers import _from_binary_stream, _to_binary_stream +from docarray.utils.cache import get_cache_path + +if TYPE_CHECKING: + from docarray import BaseDocument, DocumentArray + +SelfFileDocStore = TypeVar('SelfFileDocStore', bound='FileDocStore') + + +class FileDocStore(AbstractDocStore): + @staticmethod + def _abs_filepath(name: str) -> Path: + """Resolve a name to an absolute path. + If it is not a path, the cache directoty is prepended. + If it is a path, it is resolved to an absolute path. + """ + if not (name.startswith('/') or name.startswith('~') or name.startswith('.')): + name = str(get_cache_path() / name) + if name.startswith('~'): + name = str(Path.home() / name[2:]) + return Path(name).resolve() + + @classmethod + def list( + cls: Type[SelfFileDocStore], namespace: str, show_table: bool + ) -> List[str]: + """List all DocumentArrays in a directory. + + :param namespace: The directory to list. + :param show_table: If True, print a table of the files in the directory. + :return: A list of the names of the DocumentArrays in the directory. + """ + namespace_dir = cls._abs_filepath(namespace) + if not namespace_dir.exists(): + raise FileNotFoundError(f'Directory {namespace} does not exist') + da_files = [dafile for dafile in namespace_dir.glob('*.da')] + + if show_table: + from datetime import datetime + + from rich import box, filesize + from rich.console import Console + from rich.table import Table + + table = Table( + title=f'You have {len(da_files)} DocumentArrays in file://{namespace_dir}', + box=box.SIMPLE, + highlight=True, + ) + table.add_column('Name') + table.add_column('Last Modified', justify='center') + table.add_column('Size') + + for da_file in da_files: + table.add_row( + da_file.stem, + str(datetime.fromtimestamp(int(da_file.stat().st_ctime))), + str(filesize.decimal(da_file.stat().st_size)), + ) + + Console().print(table) + + return [dafile.stem for dafile in da_files] + + @classmethod + def delete( + cls: Type[SelfFileDocStore], name: str, missing_ok: bool = False + ) -> bool: + """Delete a DocumentArray from the local filesystem. + + :param name: The name of the DocumentArray to delete. + :param missing_ok: If True, do not raise an exception if the file does not exist. Defaults to False. + :return: True if the file was deleted, False if it did not exist. + """ + path = cls._abs_filepath(name) + try: + path.with_suffix('.da').unlink() + return True + except FileNotFoundError: + if not missing_ok: + raise + return False + + @classmethod + def push( + cls: Type[SelfFileDocStore], + da: 'DocumentArray', + name: str, + public: bool, + show_progress: bool, + branding: Optional[Dict], + ) -> Dict: + """Push this DocumentArray object to the specified file path. + + :param name: The file path to push to. + :param public: Not used by the ``file`` protocol. + :param show_progress: If true, a progress bar will be displayed. + :param branding: Not used by the ``file`` protocol. + """ + return cls.push_stream(iter(da), name, public, show_progress, branding) + + @classmethod + def push_stream( + cls: Type[SelfFileDocStore], + docs: Iterator['BaseDocument'], + name: str, + public: bool = True, + show_progress: bool = False, + branding: Optional[Dict] = None, + ) -> Dict: + """Push a stream of documents to the specified file path. + + :param docs: a stream of documents + :param name: The file path to push to. + :param public: Not used by the ``file`` protocol. + :param show_progress: If true, a progress bar will be displayed. + :param branding: Not used by the ``file`` protocol. + """ + if branding is not None: + logging.warning('branding is not supported for "file" protocol') + + source = _to_binary_stream( + docs, protocol='protobuf', compress='gzip', show_progress=show_progress + ) + path = cls._abs_filepath(name).with_suffix('.da.tmp') + if path.exists(): + raise ConcurrentPushException(f'File {path} already exists.') + with open(path, 'wb') as f: + while True: + try: + f.write(next(source)) + except StopIteration: + break + path.rename(path.with_suffix('')) + return {} + + @classmethod + def pull( + cls: Type[SelfFileDocStore], + da_cls: Type['DocumentArray'], + name: str, + show_progress: bool, + local_cache: bool, + ) -> 'DocumentArray': + """Pull a :class:`DocumentArray` from the specified url. + + :param name: The file path to pull from. + :param show_progress: if true, display a progress bar. + :param local_cache: store the downloaded DocumentArray to local folder + :return: a :class:`DocumentArray` object + """ + + return da_cls( + cls.pull_stream( + da_cls, name, show_progress=show_progress, local_cache=local_cache + ) + ) + + @classmethod + def pull_stream( + cls: Type[SelfFileDocStore], + da_cls: Type['DocumentArray'], + name: str, + show_progress: bool, + local_cache: bool, + ) -> Iterator['BaseDocument']: + """Pull a stream of Documents from the specified file. + + :param name: The file path to pull from. + :param show_progress: if true, display a progress bar. + :param local_cache: Not used by the ``file`` protocol. + :return: Iterator of Documents + """ + + if local_cache: + logging.warning('local_cache is not supported for "file" protocol') + + path = cls._abs_filepath(name).with_suffix('.da') + source = open(path, 'rb') + return _from_binary_stream( + da_cls.document_type, + source, + protocol='protobuf', + compress='gzip', + show_progress=show_progress, + ) diff --git a/docarray/store/helpers.py b/docarray/store/helpers.py new file mode 100644 index 00000000000..25e40991e68 --- /dev/null +++ b/docarray/store/helpers.py @@ -0,0 +1,207 @@ +# It is usually a bad idea to have a helper file because it means we don't know where to put the code (or haven't put much thought into it). +# With that said, rules are meant to be broken, we will live with this for now. +from contextlib import nullcontext +from typing import Dict, Iterable, Iterator, NoReturn, Optional, Sequence, Type, TypeVar + +from rich import filesize +from typing_extensions import TYPE_CHECKING, Protocol + +from docarray.utils.progress_bar import _get_progressbar + +if TYPE_CHECKING: + from pathlib import Path + + import requests + +CACHING_REQUEST_READER_CHUNK_SIZE = 2**20 + + +def get_version_info() -> Dict: + """ + Get the version of libraries used in Jina and environment variables. + + :return: Version information and environment variables + """ + import platform + from uuid import getnode + + import google.protobuf + from google.protobuf.internal import api_implementation + + from docarray import __version__ + + return { + 'docarray': __version__, + 'protobuf': google.protobuf.__version__, + 'proto-backend': api_implementation.Type(), + 'python': platform.python_version(), + 'platform': platform.system(), + 'platform-release': platform.release(), + 'platform-version': platform.version(), + 'architecture': platform.machine(), + 'processor': platform.processor(), + 'uid': getnode(), + } + + +def ibatch(iterable: Sequence, batch_size: int = 32) -> Iterable: + """Get an iterator of batched items from Sequence.""" + seq_len = len(iterable) + for offset in range(0, seq_len, batch_size): + yield iterable[offset : min(offset + batch_size, seq_len)] + + +class _BufferedCachingReader: + """A buffered reader that writes to a cache file while reading.""" + + def __init__( + self, iter_bytes: Iterator[bytes], cache_path: Optional['Path'] = None + ): + self._data = iter_bytes + self._chunk: bytes = b'' + self._seek = 0 + self._chunk_len = 0 + + self._cache = open(cache_path, 'wb') if cache_path else None + + def read(self, size: int = -1) -> bytes: + if size == -1: + return b''.join(self._data) + + if self._seek + size > self._chunk_len: + _bytes = self._chunk[self._seek : self._chunk_len] + size -= self._chunk_len - self._seek + + self._chunk = next(self._data) + self._seek = 0 + self._chunk_len = len(self._chunk) + if self._cache: + self._cache.write(self._chunk) + + _bytes += self._chunk[self._seek : self._seek + size] + self._seek += size + return _bytes + else: + _bytes = self._chunk[self._seek : self._seek + size] + self._seek += size + return _bytes + + def __del__(self): + if self._cache: + self._cache.close() + + +class _BufferedCachingRequestReader(_BufferedCachingReader): + """A buffered reader for requests.Response that writes to a cache file while reading.""" + + def __init__(self, r: 'requests.Response', cache_path: Optional['Path'] = None): + super().__init__( + r.iter_content(chunk_size=CACHING_REQUEST_READER_CHUNK_SIZE), cache_path + ) + + +def raise_req_error(resp: 'requests.Response') -> NoReturn: + """Definitely raise an error from a response.""" + resp.raise_for_status() + raise ValueError(f'Unexpected response status: {resp.status_code}') + + +T_Elem = TypeVar('T_Elem') + + +class Streamable(Protocol): + """A protocol for streamable objects.""" + + def to_bytes(self, protocol: str, compress: Optional[str]) -> bytes: + ... + + @classmethod + def from_bytes( + cls: Type[T_Elem], bytes: bytes, protocol: str, compress: Optional[str] + ) -> 'T_Elem': + ... + + +class ReadableBytes(Protocol): + def read(self, size: int = -1) -> bytes: + ... + + def close(self): + ... + + +def _to_binary_stream( + iterator: Iterator['Streamable'], + total: Optional[int] = None, + protocol: str = 'protobuf', + compress: Optional[str] = None, + show_progress: bool = False, +) -> Iterator[bytes]: + + if show_progress: + pbar, t = _get_progressbar( + 'Serializing', disable=not show_progress, total=total + ) + else: + pbar = nullcontext() + + with pbar: + if show_progress: + _total_size = 0 + count = 0 + pbar.start_task(t) + for item in iterator: + item_bytes = item.to_bytes(protocol=protocol, compress=compress) + len_item_as_bytes = len(item_bytes).to_bytes(4, 'big', signed=False) + all_bytes = len_item_as_bytes + item_bytes + yield all_bytes + + if show_progress: + _total_size += len(all_bytes) + count += 1 + pbar.update(t, advance=1, total_size=str(filesize.decimal(_total_size))) + + yield int(0).to_bytes(4, 'big', signed=False) + + +T = TypeVar('T', bound=Streamable) + + +def _from_binary_stream( + cls: Type[T], + stream: ReadableBytes, + total: Optional[int] = None, + protocol: str = 'protobuf', + compress: Optional[str] = None, + show_progress: bool = False, +) -> Iterator['T']: + + if show_progress: + pbar, t = _get_progressbar( + 'Deserializing', disable=not show_progress, total=total + ) + else: + pbar = nullcontext() + + with pbar: + if show_progress: + _total_size = 0 + pbar.start_task(t) + while True: + len_bytes = stream.read(4) + if len(len_bytes) < 4: + raise ValueError('Unexpected end of stream') + len_item = int.from_bytes(len_bytes, 'big', signed=False) + if len_item == 0: + break + item_bytes = stream.read(len_item) + if len(item_bytes) < len_item: + raise ValueError('Unexpected end of stream') + item = cls.from_bytes(item_bytes, protocol=protocol, compress=compress) + + yield item + + if show_progress: + _total_size += len_item + 4 + pbar.update(t, advance=1, total_size=str(filesize.decimal(_total_size))) + stream.close() diff --git a/docarray/store/jac.py b/docarray/store/jac.py new file mode 100644 index 00000000000..285e65e41f9 --- /dev/null +++ b/docarray/store/jac.py @@ -0,0 +1,360 @@ +import json +import logging +import os +from pathlib import Path +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterator, + List, + Optional, + Type, + TypeVar, + Union, +) + +import hubble +from hubble import Client as HubbleClient +from hubble.client.endpoints import EndpointsV2 + +from docarray.store.abstract_doc_store import AbstractDocStore +from docarray.store.helpers import ( + _BufferedCachingRequestReader, + get_version_info, + raise_req_error, +) +from docarray.utils.cache import get_cache_path + +if TYPE_CHECKING: # pragma: no cover + import io + + from docarray import BaseDocument, DocumentArray + + +def _get_length_from_summary(summary: List[Dict]) -> Optional[int]: + """Get the length from summary.""" + for item in summary: + if 'Length' == item['name']: + return item['value'] + raise ValueError('Length not found in summary') + + +def _get_raw_summary(self: 'DocumentArray') -> List[Dict[str, Any]]: + items: List[Dict[str, Any]] = [ + dict( + name='Type', + value=self.__class__.__name__, + description='The type of the DocumentArray', + ), + dict( + name='Length', + value=len(self), + description='The length of the DocumentArray', + ), + dict( + name='Homogenous Documents', + value=True, + description='Whether all documents are of the same structure, attributes', + ), + dict( + name='Fields', + value=tuple(self[0].__class__.__fields__.keys()), + description='The fields of the Document', + ), + dict( + name='Multimodal dataclass', + value=True, + description='Whether all documents are multimodal', + ), + ] + + return items + + +SelfJACDocStore = TypeVar('SelfJACDocStore', bound='JACDocStore') + + +class JACDocStore(AbstractDocStore): + """Class to push and pull DocumentArray to and from Jina AI Cloud.""" + + @staticmethod + @hubble.login_required + def list(namespace: str = '', show_table: bool = False) -> List[str]: + """List all available arrays in the cloud. + + :param namespace: Not supported for Jina AI Cloud. + :param show_table: if true, show the table of the arrays. + :returns: List of available DocumentArray's names. + """ + if len(namespace) > 0: + logging.warning('Namespace is not supported for Jina AI Cloud.') + from rich import print + + result = [] + from rich import box + from rich.table import Table + + resp = HubbleClient(jsonify=True).list_artifacts( + filter={'type': 'documentArray'}, sort={'createdAt': 1} + ) + + table = Table( + title=f'You have {resp["meta"]["total"]} DocumentArray on the cloud', + box=box.SIMPLE, + highlight=True, + ) + table.add_column('Name') + table.add_column('Length') + table.add_column('Access') + table.add_column('Created at', justify='center') + table.add_column('Updated at', justify='center') + + for da in resp['data']: + result.append(da['name']) + + table.add_row( + da['name'], + str(_get_length_from_summary(da['metaData'].get('summary', []))), + da['visibility'], + da['createdAt'], + da['updatedAt'], + ) + + if show_table: + print(table) + return result + + @staticmethod + @hubble.login_required + def delete(name: str, missing_ok: bool = True) -> bool: + """ + Delete a DocumentArray from the cloud. + :param name: the name of the DocumentArray to delete. + :param missing_ok: if true, do not raise an error if the DocumentArray does not exist. + :return: True if the DocumentArray was deleted, False if it did not exist. + """ + try: + HubbleClient(jsonify=True).delete_artifact(name=name) + except hubble.excepts.RequestedEntityNotFoundError: + if missing_ok: + return False + else: + raise + return True + + @staticmethod + @hubble.login_required + def push( + da: 'DocumentArray', + name: str, + public: bool = True, + show_progress: bool = False, + branding: Optional[Dict] = None, + ) -> Dict: + """Push this DocumentArray object to Jina AI Cloud + + .. note:: + - Push with the same ``name`` will override the existing content. + - Kinda like a public clipboard where everyone can override anyone's content. + So to make your content survive longer, you may want to use longer & more complicated name. + - The lifetime of the content is not promised atm, could be a day, could be a week. Do not use it for + persistence. Only use this full temporary transmission/storage/clipboard. + + :param name: A name that can later be used to retrieve this :class:`DocumentArray`. + :param public: By default, anyone can pull a DocumentArray if they know its name. + Setting this to false will restrict access to only the creator. + :param show_progress: If true, a progress bar will be displayed. + :param branding: A dictionary of branding information to be sent to Jina Cloud. e.g. {"icon": "emoji", "background": "#fff"} + """ + import requests + import urllib3 + + delimiter = os.urandom(32) + + data, ctype = urllib3.filepost.encode_multipart_formdata( + { + 'file': ( + 'DocumentArray', + delimiter, + ), + 'name': name, + 'type': 'documentArray', + 'public': public, + 'metaData': json.dumps( + { + 'summary': _get_raw_summary(da), + 'branding': branding, + 'version': get_version_info(), + }, + sort_keys=True, + ), + } + ) + + headers = { + 'Content-Type': ctype, + } + + auth_token = hubble.get_token() + if auth_token: + headers['Authorization'] = f'token {auth_token}' + + _head, _tail = data.split(delimiter) + + def gen(): + yield _head + binary_stream = da.to_binary_stream( + protocol='protobuf', compress='gzip', show_progress=show_progress + ) + while True: + try: + yield next(binary_stream) + except StopIteration: + break + yield _tail + + response = requests.post( + HubbleClient()._base_url + EndpointsV2.upload_artifact, + data=gen(), + headers=headers, + ) + + if response.ok: + return response.json()['data'] + else: + if response.status_code >= 400 and 'readableMessage' in response.json(): + response.reason = response.json()['readableMessage'] + raise_req_error(response) + + @classmethod + @hubble.login_required + def push_stream( + cls: Type[SelfJACDocStore], + docs: Iterator['BaseDocument'], + name: str, + public: bool = True, + show_progress: bool = False, + branding: Optional[Dict] = None, + ) -> Dict: + """Push a stream of documents to Jina AI Cloud + + .. note:: + - Push with the same ``name`` will override the existing content. + - Kinda like a public clipboard where everyone can override anyone's content. + So to make your content survive longer, you may want to use longer & more complicated name. + - The lifetime of the content is not promised atm, could be a day, could be a week. Do not use it for + persistence. Only use this full temporary transmission/storage/clipboard. + + :param name: A name that can later be used to retrieve this :class:`DocumentArray`. + :param public: By default, anyone can pull a DocumentArray if they know its name. + Setting this to false will restrict access to only the creator. + :param show_progress: If true, a progress bar will be displayed. + :param branding: A dictionary of branding information to be sent to Jina Cloud. e.g. {"icon": "emoji", "background": "#fff"} + """ + from docarray import DocumentArray + + # This is a temporary solution to push a stream of documents + # The memory footprint is not ideal + # But it must be done this way for now because Hubble expects to know the length of the DocumentArray + # before it starts receiving the documents + first_doc = next(docs) + da = DocumentArray[first_doc.__class__]([first_doc]) # type: ignore + for doc in docs: + da.append(doc) + return cls.push(da, name, public, show_progress, branding) + + @staticmethod + @hubble.login_required + def pull( + cls: Type['DocumentArray'], + name: str, + show_progress: bool = False, + local_cache: bool = True, + ) -> 'DocumentArray': + """Pull a :class:`DocumentArray` from Jina AI Cloud to local. + + :param name: the upload name set during :meth:`.push` + :param show_progress: if true, display a progress bar. + :param local_cache: store the downloaded DocumentArray to local folder + :return: a :class:`DocumentArray` object + """ + from docarray import DocumentArray + + return DocumentArray[cls.document_type]( # type: ignore + JACDocStore.pull_stream(cls, name, show_progress, local_cache) + ) + + @staticmethod + @hubble.login_required + def pull_stream( + cls: Type['DocumentArray'], + name: str, + show_progress: bool = False, + local_cache: bool = False, + ) -> Iterator['BaseDocument']: + """Pull a :class:`DocumentArray` from Jina AI Cloud to local. + + :param name: the upload name set during :meth:`.push` + :param show_progress: if true, display a progress bar. + :param local_cache: store the downloaded DocumentArray to local folder + :return: An iterator of Documents + """ + import requests + + headers = {} + + auth_token = hubble.get_token() + + if auth_token: + headers['Authorization'] = f'token {auth_token}' + + url = HubbleClient()._base_url + EndpointsV2.download_artifact + f'?name={name}' + response = requests.get(url, headers=headers) + + if response.ok: + url = response.json()['data']['download'] + else: + response.raise_for_status() + + with requests.get( + url, + stream=True, + ) as r: + from contextlib import nullcontext + + r.raise_for_status() + save_name = name.replace('/', '_') + + tmp_cache_file = Path(f'/tmp/{save_name}.da') + _source: Union[ + _BufferedCachingRequestReader, io.BufferedReader + ] = _BufferedCachingRequestReader(r, tmp_cache_file) + + cache_file = get_cache_path() / f'{save_name}.da' + if local_cache and cache_file.exists(): + _cache_len = cache_file.stat().st_size + if _cache_len == int(r.headers['Content-length']): + if show_progress: + print(f'Loading from local cache {cache_file}') + _source = open(cache_file, 'rb') + r.close() + + docs = cls._load_binary_stream( + nullcontext(_source), # type: ignore + protocol='protobuf', + compress='gzip', + show_progress=show_progress, + ) + try: + while True: + yield next(docs) + except StopIteration: + pass + + if local_cache: + if isinstance(_source, _BufferedCachingRequestReader): + Path(get_cache_path()).mkdir(parents=True, exist_ok=True) + tmp_cache_file.rename(cache_file) + else: + _source.close() diff --git a/docarray/store/s3.py b/docarray/store/s3.py new file mode 100644 index 00000000000..89f5b5b1310 --- /dev/null +++ b/docarray/store/s3.py @@ -0,0 +1,239 @@ +import io +import logging +from pathlib import Path +from typing import Dict, Iterator, List, Optional, Type, TypeVar + +import boto3 +import botocore +from smart_open import open +from typing_extensions import TYPE_CHECKING + +from docarray.store.abstract_doc_store import AbstractDocStore +from docarray.store.helpers import _from_binary_stream, _to_binary_stream +from docarray.utils.cache import get_cache_path + +if TYPE_CHECKING: # pragma: no cover + from docarray import BaseDocument, DocumentArray + +SelfS3DocStore = TypeVar('SelfS3DocStore', bound='S3DocStore') + + +class _BufferedCachingReader: + """A buffered reader that writes to a cache file while reading.""" + + def __init__( + self, iter_bytes: io.BufferedReader, cache_path: Optional['Path'] = None + ): + self._data = iter_bytes + self._cache = None + if cache_path: + self._cache_path = cache_path.with_suffix('.tmp') + self._cache = open(self._cache_path, 'wb') + self.closed = False + + def read(self, size: Optional[int] = -1) -> bytes: + bytes = self._data.read(size) + if self._cache: + self._cache.write(bytes) + return bytes + + def close(self): + if not self.closed and self._cache: + self._cache_path.rename(self._cache_path.with_suffix('.da')) + self._cache.close() + + +class S3DocStore(AbstractDocStore): + """Class to push and pull DocumentArray to and from S3.""" + + @staticmethod + def list(namespace: str, show_table: bool = False) -> List[str]: + """List all DocumentArrays in the specified bucket and namespace. + + :param namespace: The bucket and namespace to list. e.g. my_bucket/my_namespace + :param show_table: If true, a rich table will be printed to the console. + :return: A list of DocumentArray names. + """ + bucket, namespace = namespace.split('/', 1) + s3 = boto3.resource('s3') + s3_bucket = s3.Bucket(bucket) + da_files = [ + obj + for obj in s3_bucket.objects.all() + if obj.key.startswith(namespace) and obj.key.endswith('.da') + ] + da_names = [f.key.split('/')[-1].split('.')[0] for f in da_files] + + if show_table: + from rich import box, filesize + from rich.console import Console + from rich.table import Table + + table = Table( + title=f'You have {len(da_files)} DocumentArrays in bucket s3://{bucket} under the namespace "{namespace}"', + box=box.SIMPLE, + highlight=True, + ) + table.add_column('Name') + table.add_column('Last Modified', justify='center') + table.add_column('Size') + + for da_name, da_file in zip(da_names, da_files): + table.add_row( + da_name, + str(da_file.last_modified), + str(filesize.decimal(da_file.size)), + ) + + Console().print(table) + return da_names + + @staticmethod + def delete(name: str, missing_ok: bool = True) -> bool: + """Delete the DocumentArray object at the specified bucket and key. + + :param name: The bucket and key to delete. e.g. my_bucket/my_key + :param missing_ok: If true, no error will be raised if the object does not exist. + :return: True if the object was deleted, False if it did not exist. + """ + bucket, name = name.split('/', 1) + s3 = boto3.resource('s3') + object = s3.Object(bucket, name + '.da') + try: + object.load() + except botocore.exceptions.ClientError as e: + if e.response['Error']['Code'] == "404": + if missing_ok: + return False + else: + raise ValueError(f'Object {name} does not exist') + else: + raise + object.delete() + return True + + @classmethod + def push( + cls: Type[SelfS3DocStore], + da: 'DocumentArray', + name: str, + public: bool = False, + show_progress: bool = False, + branding: Optional[Dict] = None, + ) -> Dict: + """Push this DocumentArray object to the specified bucket and key. + + :param da: The DocumentArray to push. + :param name: The bucket and key to push to. e.g. my_bucket/my_key + :param public: Not used by the ``s3`` protocol. + :param show_progress: If true, a progress bar will be displayed. + :param branding: Not used by the ``s3`` protocol. + """ + return cls.push_stream(iter(da), name, public, show_progress, branding) + + @staticmethod + def push_stream( + docs: Iterator['BaseDocument'], + name: str, + public: bool = True, + show_progress: bool = False, + branding: Optional[Dict] = None, + ) -> Dict: + """Push a stream of documents to the specified bucket and key. + + :param docs: a stream of documents + :param name: The bucket and key to push to. e.g. my_bucket/my_key + :param public: Not used by the ``s3`` protocol. + :param show_progress: If true, a progress bar will be displayed. + :param branding: Not used by the ``s3`` protocol. + """ + if branding is not None: + logging.warning("Branding is not supported for S3 push") + + bucket, name = name.split('/', 1) + binary_stream = _to_binary_stream( + docs, protocol='pickle', compress=None, show_progress=show_progress + ) + + # Upload to S3 + with open( + f"s3://{bucket}/{name}.da", + 'wb', + compression='.gz', + transport_params={'multipart_upload': False}, + ) as fout: + while True: + try: + fout.write(next(binary_stream)) + except StopIteration: + break + + return {} + + @classmethod + def pull( + cls: Type[SelfS3DocStore], + da_cls: Type['DocumentArray'], + name: str, + show_progress: bool = False, + local_cache: bool = False, + ) -> 'DocumentArray': + """Pull a :class:`DocumentArray` from the specified bucket and key. + + :param name: The bucket and key to pull from. e.g. my_bucket/my_key + :param show_progress: if true, display a progress bar. + :param local_cache: store the downloaded DocumentArray to local cache + :return: a :class:`DocumentArray` object + """ + da = da_cls( # type: ignore + cls.pull_stream( + da_cls, name, show_progress=show_progress, local_cache=local_cache + ) + ) + return da + + @classmethod + def pull_stream( + cls: Type[SelfS3DocStore], + da_cls: Type['DocumentArray'], + name: str, + show_progress: bool, + local_cache: bool, + ) -> Iterator['BaseDocument']: + """Pull a stream of Documents from the specified name. + Name is expected to be in the format of bucket/key. + + :param name: The bucket and key to pull from. e.g. my_bucket/my_key + :param show_progress: if true, display a progress bar. + :param local_cache: store the downloaded DocumentArray to local cache + :return: An iterator of Documents + """ + + bucket, name = name.split('/', 1) + + save_name = name.replace('/', '_') + cache_path = get_cache_path() / f'{save_name}.da' + + source = _BufferedCachingReader( + open(f"s3://{bucket}/{name}.da", 'rb', compression='.gz'), + cache_path=cache_path if local_cache else None, + ) + + if local_cache: + if cache_path.exists(): + object_header = boto3.client('s3').head_object( + Bucket=bucket, Key=name + '.da' + ) + if cache_path.stat().st_size == object_header['ContentLength']: + logging.info( + f'Using cached file for {name} (size: {cache_path.stat().st_size})' + ) + source = open(cache_path, 'rb') + + return _from_binary_stream( + da_cls.document_type, + source, + protocol='pickle', + compress=None, + show_progress=show_progress, + ) diff --git a/docarray/utils/cache.py b/docarray/utils/cache.py new file mode 100644 index 00000000000..4df305414b3 --- /dev/null +++ b/docarray/utils/cache.py @@ -0,0 +1,17 @@ +import os +from functools import lru_cache +from pathlib import Path + + +@lru_cache(maxsize=None) +def get_cache_path() -> Path: + """ + Get the path to the cache directory. + + :return: The path to the cache directory. + """ + cache_path = Path.home() / '.cache' / 'docarray' + if "DOCARRAY_CACHE" in os.environ: + cache_path = Path(os.environ["DOCARRAY_CACHE"]) + cache_path.mkdir(parents=True, exist_ok=True) + return cache_path diff --git a/docarray/utils/progress_bar.py b/docarray/utils/progress_bar.py index 01f32cec2ae..ad7bb3dcd5d 100644 --- a/docarray/utils/progress_bar.py +++ b/docarray/utils/progress_bar.py @@ -1,11 +1,14 @@ +from typing import Optional + from rich.progress import ( - Progress, BarColumn, - SpinnerColumn, MofNCompleteColumn, + Progress, + SpinnerColumn, + Text, TextColumn, + TimeElapsedColumn, TimeRemainingColumn, - Text, ) @@ -24,8 +27,8 @@ def render(self, task) -> Text: return text -def _get_pbar(disable): - return Progress( +def _get_pbar(disable: bool, total: Optional[int] = None): + columns = ( SpinnerColumn(), TextColumn('[bold]{task.description}'), BarColumn(), @@ -33,19 +36,23 @@ def _get_pbar(disable): '•', QPSColumn('{task.speed} QPS', justify='right', style='progress.data.speed'), '•', - TimeRemainingColumn(), + TimeRemainingColumn() if total else TimeElapsedColumn(), '•', TextColumn( '[bold blue]{task.fields[total_size]}', justify='right', style='progress.filesize', ), + ) + + return Progress( + *columns, transient=False, disable=disable, ) -def _get_progressbar(description, disable, total): - progress = _get_pbar(disable) +def _get_progressbar(description: str, disable: bool, total: Optional[int]): + progress = _get_pbar(disable, total) task = progress.add_task(description, total=total, start=False, total_size=0) return progress, task diff --git a/poetry.lock b/poetry.lock index d1889e68e3c..6b31ed3d618 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,130 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand. + +[[package]] +name = "aiohttp" +version = "3.8.4" +description = "Async http client/server framework (asyncio)" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "aiohttp-3.8.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5ce45967538fb747370308d3145aa68a074bdecb4f3a300869590f725ced69c1"}, + {file = "aiohttp-3.8.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b744c33b6f14ca26b7544e8d8aadff6b765a80ad6164fb1a430bbadd593dfb1a"}, + {file = "aiohttp-3.8.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a45865451439eb320784918617ba54b7a377e3501fb70402ab84d38c2cd891b"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a86d42d7cba1cec432d47ab13b6637bee393a10f664c425ea7b305d1301ca1a3"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee3c36df21b5714d49fc4580247947aa64bcbe2939d1b77b4c8dcb8f6c9faecc"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:176a64b24c0935869d5bbc4c96e82f89f643bcdf08ec947701b9dbb3c956b7dd"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c844fd628851c0bc309f3c801b3a3d58ce430b2ce5b359cd918a5a76d0b20cb5"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5393fb786a9e23e4799fec788e7e735de18052f83682ce2dfcabaf1c00c2c08e"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e4b09863aae0dc965c3ef36500d891a3ff495a2ea9ae9171e4519963c12ceefd"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:adfbc22e87365a6e564c804c58fc44ff7727deea782d175c33602737b7feadb6"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:147ae376f14b55f4f3c2b118b95be50a369b89b38a971e80a17c3fd623f280c9"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:eafb3e874816ebe2a92f5e155f17260034c8c341dad1df25672fb710627c6949"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c6cc15d58053c76eacac5fa9152d7d84b8d67b3fde92709195cb984cfb3475ea"}, + {file = "aiohttp-3.8.4-cp310-cp310-win32.whl", hash = "sha256:59f029a5f6e2d679296db7bee982bb3d20c088e52a2977e3175faf31d6fb75d1"}, + {file = "aiohttp-3.8.4-cp310-cp310-win_amd64.whl", hash = "sha256:fe7ba4a51f33ab275515f66b0a236bcde4fb5561498fe8f898d4e549b2e4509f"}, + {file = "aiohttp-3.8.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3d8ef1a630519a26d6760bc695842579cb09e373c5f227a21b67dc3eb16cfea4"}, + {file = "aiohttp-3.8.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b3f2e06a512e94722886c0827bee9807c86a9f698fac6b3aee841fab49bbfb4"}, + {file = "aiohttp-3.8.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3a80464982d41b1fbfe3154e440ba4904b71c1a53e9cd584098cd41efdb188ef"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b631e26df63e52f7cce0cce6507b7a7f1bc9b0c501fcde69742130b32e8782f"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f43255086fe25e36fd5ed8f2ee47477408a73ef00e804cb2b5cba4bf2ac7f5e"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4d347a172f866cd1d93126d9b239fcbe682acb39b48ee0873c73c933dd23bd0f"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3fec6a4cb5551721cdd70473eb009d90935b4063acc5f40905d40ecfea23e05"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80a37fe8f7c1e6ce8f2d9c411676e4bc633a8462844e38f46156d07a7d401654"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d1e6a862b76f34395a985b3cd39a0d949ca80a70b6ebdea37d3ab39ceea6698a"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cd468460eefef601ece4428d3cf4562459157c0f6523db89365202c31b6daebb"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:618c901dd3aad4ace71dfa0f5e82e88b46ef57e3239fc7027773cb6d4ed53531"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:652b1bff4f15f6287550b4670546a2947f2a4575b6c6dff7760eafb22eacbf0b"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80575ba9377c5171407a06d0196b2310b679dc752d02a1fcaa2bc20b235dbf24"}, + {file = "aiohttp-3.8.4-cp311-cp311-win32.whl", hash = "sha256:bbcf1a76cf6f6dacf2c7f4d2ebd411438c275faa1dc0c68e46eb84eebd05dd7d"}, + {file = "aiohttp-3.8.4-cp311-cp311-win_amd64.whl", hash = "sha256:6e74dd54f7239fcffe07913ff8b964e28b712f09846e20de78676ce2a3dc0bfc"}, + {file = "aiohttp-3.8.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:880e15bb6dad90549b43f796b391cfffd7af373f4646784795e20d92606b7a51"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb96fa6b56bb536c42d6a4a87dfca570ff8e52de2d63cabebfd6fb67049c34b6"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4a6cadebe132e90cefa77e45f2d2f1a4b2ce5c6b1bfc1656c1ddafcfe4ba8131"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f352b62b45dff37b55ddd7b9c0c8672c4dd2eb9c0f9c11d395075a84e2c40f75"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ab43061a0c81198d88f39aaf90dae9a7744620978f7ef3e3708339b8ed2ef01"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9cb1565a7ad52e096a6988e2ee0397f72fe056dadf75d17fa6b5aebaea05622"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:1b3ea7edd2d24538959c1c1abf97c744d879d4e541d38305f9bd7d9b10c9ec41"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:7c7837fe8037e96b6dd5cfcf47263c1620a9d332a87ec06a6ca4564e56bd0f36"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:3b90467ebc3d9fa5b0f9b6489dfb2c304a1db7b9946fa92aa76a831b9d587e99"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:cab9401de3ea52b4b4c6971db5fb5c999bd4260898af972bf23de1c6b5dd9d71"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d1f9282c5f2b5e241034a009779e7b2a1aa045f667ff521e7948ea9b56e0c5ff"}, + {file = "aiohttp-3.8.4-cp36-cp36m-win32.whl", hash = "sha256:5e14f25765a578a0a634d5f0cd1e2c3f53964553a00347998dfdf96b8137f777"}, + {file = "aiohttp-3.8.4-cp36-cp36m-win_amd64.whl", hash = "sha256:4c745b109057e7e5f1848c689ee4fb3a016c8d4d92da52b312f8a509f83aa05e"}, + {file = "aiohttp-3.8.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:aede4df4eeb926c8fa70de46c340a1bc2c6079e1c40ccf7b0eae1313ffd33519"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ddaae3f3d32fc2cb4c53fab020b69a05c8ab1f02e0e59665c6f7a0d3a5be54f"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4eb3b82ca349cf6fadcdc7abcc8b3a50ab74a62e9113ab7a8ebc268aad35bb9"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bcb89336efa095ea21b30f9e686763f2be4478f1b0a616969551982c4ee4c3b"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c08e8ed6fa3d477e501ec9db169bfac8140e830aa372d77e4a43084d8dd91ab"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c6cd05ea06daca6ad6a4ca3ba7fe7dc5b5de063ff4daec6170ec0f9979f6c332"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7a00a9ed8d6e725b55ef98b1b35c88013245f35f68b1b12c5cd4100dddac333"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:de04b491d0e5007ee1b63a309956eaed959a49f5bb4e84b26c8f5d49de140fa9"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:40653609b3bf50611356e6b6554e3a331f6879fa7116f3959b20e3528783e699"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:dbf3a08a06b3f433013c143ebd72c15cac33d2914b8ea4bea7ac2c23578815d6"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:854f422ac44af92bfe172d8e73229c270dc09b96535e8a548f99c84f82dde241"}, + {file = "aiohttp-3.8.4-cp37-cp37m-win32.whl", hash = "sha256:aeb29c84bb53a84b1a81c6c09d24cf33bb8432cc5c39979021cc0f98c1292a1a"}, + {file = "aiohttp-3.8.4-cp37-cp37m-win_amd64.whl", hash = "sha256:db3fc6120bce9f446d13b1b834ea5b15341ca9ff3f335e4a951a6ead31105480"}, + {file = "aiohttp-3.8.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fabb87dd8850ef0f7fe2b366d44b77d7e6fa2ea87861ab3844da99291e81e60f"}, + {file = "aiohttp-3.8.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:91f6d540163f90bbaef9387e65f18f73ffd7c79f5225ac3d3f61df7b0d01ad15"}, + {file = "aiohttp-3.8.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d265f09a75a79a788237d7f9054f929ced2e69eb0bb79de3798c468d8a90f945"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d89efa095ca7d442a6d0cbc755f9e08190ba40069b235c9886a8763b03785da"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4dac314662f4e2aa5009977b652d9b8db7121b46c38f2073bfeed9f4049732cd"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe11310ae1e4cd560035598c3f29d86cef39a83d244c7466f95c27ae04850f10"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ddb2a2026c3f6a68c3998a6c47ab6795e4127315d2e35a09997da21865757f8"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e75b89ac3bd27d2d043b234aa7b734c38ba1b0e43f07787130a0ecac1e12228a"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6e601588f2b502c93c30cd5a45bfc665faaf37bbe835b7cfd461753068232074"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a5d794d1ae64e7753e405ba58e08fcfa73e3fad93ef9b7e31112ef3c9a0efb52"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:a1f4689c9a1462f3df0a1f7e797791cd6b124ddbee2b570d34e7f38ade0e2c71"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:3032dcb1c35bc330134a5b8a5d4f68c1a87252dfc6e1262c65a7e30e62298275"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8189c56eb0ddbb95bfadb8f60ea1b22fcfa659396ea36f6adcc521213cd7b44d"}, + {file = "aiohttp-3.8.4-cp38-cp38-win32.whl", hash = "sha256:33587f26dcee66efb2fff3c177547bd0449ab7edf1b73a7f5dea1e38609a0c54"}, + {file = "aiohttp-3.8.4-cp38-cp38-win_amd64.whl", hash = "sha256:e595432ac259af2d4630008bf638873d69346372d38255774c0e286951e8b79f"}, + {file = "aiohttp-3.8.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5a7bdf9e57126dc345b683c3632e8ba317c31d2a41acd5800c10640387d193ed"}, + {file = "aiohttp-3.8.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:22f6eab15b6db242499a16de87939a342f5a950ad0abaf1532038e2ce7d31567"}, + {file = "aiohttp-3.8.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7235604476a76ef249bd64cb8274ed24ccf6995c4a8b51a237005ee7a57e8643"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea9eb976ffdd79d0e893869cfe179a8f60f152d42cb64622fca418cd9b18dc2a"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:92c0cea74a2a81c4c76b62ea1cac163ecb20fb3ba3a75c909b9fa71b4ad493cf"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:493f5bc2f8307286b7799c6d899d388bbaa7dfa6c4caf4f97ef7521b9cb13719"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a63f03189a6fa7c900226e3ef5ba4d3bd047e18f445e69adbd65af433add5a2"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10c8cefcff98fd9168cdd86c4da8b84baaa90bf2da2269c6161984e6737bf23e"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bca5f24726e2919de94f047739d0a4fc01372801a3672708260546aa2601bf57"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:03baa76b730e4e15a45f81dfe29a8d910314143414e528737f8589ec60cf7391"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:8c29c77cc57e40f84acef9bfb904373a4e89a4e8b74e71aa8075c021ec9078c2"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:03543dcf98a6619254b409be2d22b51f21ec66272be4ebda7b04e6412e4b2e14"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:17b79c2963db82086229012cff93ea55196ed31f6493bb1ccd2c62f1724324e4"}, + {file = "aiohttp-3.8.4-cp39-cp39-win32.whl", hash = "sha256:34ce9f93a4a68d1272d26030655dd1b58ff727b3ed2a33d80ec433561b03d67a"}, + {file = "aiohttp-3.8.4-cp39-cp39-win_amd64.whl", hash = "sha256:41a86a69bb63bb2fc3dc9ad5ea9f10f1c9c8e282b471931be0268ddd09430b04"}, + {file = "aiohttp-3.8.4.tar.gz", hash = "sha256:bf2e1a9162c1e441bf805a1fd166e249d574ca04e03b34f97e2928769e91ab5c"}, +] + +[package.dependencies] +aiosignal = ">=1.1.2" +async-timeout = ">=4.0.0a3,<5.0" +asynctest = {version = "0.13.0", markers = "python_version < \"3.8\""} +attrs = ">=17.3.0" +charset-normalizer = ">=2.0,<4.0" +frozenlist = ">=1.1.1" +multidict = ">=4.5,<7.0" +typing-extensions = {version = ">=3.7.4", markers = "python_version < \"3.8\""} +yarl = ">=1.0,<2.0" + +[package.extras] +speedups = ["Brotli", "aiodns", "cchardet"] + +[[package]] +name = "aiosignal" +version = "1.3.1" +description = "aiosignal: a list of registered asynchronous callbacks" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, + {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, +] + +[package.dependencies] +frozenlist = ">=1.1.0" [[package]] name = "anyio" @@ -93,11 +219,38 @@ cffi = ">=1.0.1" dev = ["cogapp", "pre-commit", "pytest", "wheel"] tests = ["pytest"] +[[package]] +name = "async-timeout" +version = "4.0.2" +description = "Timeout context manager for asyncio programs" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "async-timeout-4.0.2.tar.gz", hash = "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15"}, + {file = "async_timeout-4.0.2-py3-none-any.whl", hash = "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c"}, +] + +[package.dependencies] +typing-extensions = {version = ">=3.6.5", markers = "python_version < \"3.8\""} + +[[package]] +name = "asynctest" +version = "0.13.0" +description = "Enhance the standard unittest package with features for testing asyncio libraries" +category = "main" +optional = true +python-versions = ">=3.5" +files = [ + {file = "asynctest-0.13.0-py3-none-any.whl", hash = "sha256:5da6118a7e6d6b54d83a8f7197769d046922a44d2a99c21382f0a6e4fadae676"}, + {file = "asynctest-0.13.0.tar.gz", hash = "sha256:c27862842d15d83e6a34eb0b2866c323880eb3a75e4485b079ea11748fd77fac"}, +] + [[package]] name = "attrs" version = "22.1.0" description = "Classes Without Boilerplate" -category = "dev" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -277,6 +430,46 @@ webencodings = "*" css = ["tinycss2 (>=1.1.0,<1.2)"] dev = ["Sphinx (==4.3.2)", "black (==22.3.0)", "build (==0.8.0)", "flake8 (==4.0.1)", "hashin (==0.17.0)", "mypy (==0.961)", "pip-tools (==6.6.2)", "pytest (==7.1.2)", "tox (==3.25.0)", "twine (==4.0.1)", "wheel (==0.37.1)"] +[[package]] +name = "boto3" +version = "1.26.95" +description = "The AWS SDK for Python" +category = "main" +optional = true +python-versions = ">= 3.7" +files = [ + {file = "boto3-1.26.95-py3-none-any.whl", hash = "sha256:2f07523d45da7a970d18037676e1fb78401ce3f44f4cc26a6a991be8c519b62b"}, + {file = "boto3-1.26.95.tar.gz", hash = "sha256:945d32fa9bbbb5fc775378bc2c19278797b54ad57e24bd4dbd46ab27f0938152"}, +] + +[package.dependencies] +botocore = ">=1.29.95,<1.30.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.6.0,<0.7.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.29.95" +description = "Low-level, data-driven core of boto 3." +category = "main" +optional = true +python-versions = ">= 3.7" +files = [ + {file = "botocore-1.29.95-py3-none-any.whl", hash = "sha256:5f5f1c8125f8c331f561ca0a7a892f709df206714c306c48fd907eee469926cb"}, + {file = "botocore-1.29.95.tar.gz", hash = "sha256:e9ffd4a2fc415c313eda03713c212e3121084fdcb21e20aac5b15924a4ae5a9d"}, +] + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = ">=1.25.4,<1.27" + +[package.extras] +crt = ["awscrt (==0.16.9)"] + [[package]] name = "cached-property" version = "1.5.2" @@ -293,7 +486,7 @@ files = [ name = "certifi" version = "2022.9.24" description = "Python package for providing Mozilla's CA Bundle." -category = "dev" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -394,7 +587,7 @@ files = [ name = "charset-normalizer" version = "2.0.12" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "dev" +category = "main" optional = false python-versions = ">=3.5.0" files = [ @@ -512,6 +705,47 @@ files = [ {file = "distlib-0.3.6.tar.gz", hash = "sha256:14bad2d9b04d3a36127ac97f30b12a19268f211063d8f8ee4f47108896e11b46"}, ] +[[package]] +name = "docker" +version = "6.0.1" +description = "A Python library for the Docker Engine API." +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "docker-6.0.1-py3-none-any.whl", hash = "sha256:dbcb3bd2fa80dca0788ed908218bf43972772009b881ed1e20dfc29a65e49782"}, + {file = "docker-6.0.1.tar.gz", hash = "sha256:896c4282e5c7af5c45e8b683b0b0c33932974fe6e50fc6906a0a83616ab3da97"}, +] + +[package.dependencies] +packaging = ">=14.0" +pywin32 = {version = ">=304", markers = "sys_platform == \"win32\""} +requests = ">=2.26.0" +urllib3 = ">=1.26.0" +websocket-client = ">=0.32.0" + +[package.extras] +ssh = ["paramiko (>=2.4.3)"] + +[[package]] +name = "ecdsa" +version = "0.18.0" +description = "ECDSA cryptographic signature library (pure python)" +category = "main" +optional = true +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "ecdsa-0.18.0-py2.py3-none-any.whl", hash = "sha256:80600258e7ed2f16b9aa1d7c295bd70194109ad5a30fdee0eaeefef1d4c559dd"}, + {file = "ecdsa-0.18.0.tar.gz", hash = "sha256:190348041559e21b22a1d65cee485282ca11a6f81d503fddb84d5017e9ed1e49"}, +] + +[package.dependencies] +six = ">=1.9.0" + +[package.extras] +gmpy = ["gmpy"] +gmpy2 = ["gmpy2"] + [[package]] name = "entrypoints" version = "0.4" @@ -580,7 +814,7 @@ devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benc name = "filelock" version = "3.8.0" description = "A platform independent file lock." -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -592,6 +826,90 @@ files = [ docs = ["furo (>=2022.6.21)", "sphinx (>=5.1.1)", "sphinx-autodoc-typehints (>=1.19.1)"] testing = ["covdefaults (>=2.2)", "coverage (>=6.4.2)", "pytest (>=7.1.2)", "pytest-cov (>=3)", "pytest-timeout (>=2.1)"] +[[package]] +name = "frozenlist" +version = "1.3.3" +description = "A list-like structure which implements collections.abc.MutableSequence" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "frozenlist-1.3.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff8bf625fe85e119553b5383ba0fb6aa3d0ec2ae980295aaefa552374926b3f4"}, + {file = "frozenlist-1.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dfbac4c2dfcc082fcf8d942d1e49b6aa0766c19d3358bd86e2000bf0fa4a9cf0"}, + {file = "frozenlist-1.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b1c63e8d377d039ac769cd0926558bb7068a1f7abb0f003e3717ee003ad85530"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7fdfc24dcfce5b48109867c13b4cb15e4660e7bd7661741a391f821f23dfdca7"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c926450857408e42f0bbc295e84395722ce74bae69a3b2aa2a65fe22cb14b99"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1841e200fdafc3d51f974d9d377c079a0694a8f06de2e67b48150328d66d5483"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f470c92737afa7d4c3aacc001e335062d582053d4dbe73cda126f2d7031068dd"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:783263a4eaad7c49983fe4b2e7b53fa9770c136c270d2d4bbb6d2192bf4d9caf"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:924620eef691990dfb56dc4709f280f40baee568c794b5c1885800c3ecc69816"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ae4dc05c465a08a866b7a1baf360747078b362e6a6dbeb0c57f234db0ef88ae0"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:bed331fe18f58d844d39ceb398b77d6ac0b010d571cba8267c2e7165806b00ce"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:02c9ac843e3390826a265e331105efeab489ffaf4dd86384595ee8ce6d35ae7f"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9545a33965d0d377b0bc823dcabf26980e77f1b6a7caa368a365a9497fb09420"}, + {file = "frozenlist-1.3.3-cp310-cp310-win32.whl", hash = "sha256:d5cd3ab21acbdb414bb6c31958d7b06b85eeb40f66463c264a9b343a4e238642"}, + {file = "frozenlist-1.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:b756072364347cb6aa5b60f9bc18e94b2f79632de3b0190253ad770c5df17db1"}, + {file = "frozenlist-1.3.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b4395e2f8d83fbe0c627b2b696acce67868793d7d9750e90e39592b3626691b7"}, + {file = "frozenlist-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14143ae966a6229350021384870458e4777d1eae4c28d1a7aa47f24d030e6678"}, + {file = "frozenlist-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5d8860749e813a6f65bad8285a0520607c9500caa23fea6ee407e63debcdbef6"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23d16d9f477bb55b6154654e0e74557040575d9d19fe78a161bd33d7d76808e8"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb82dbba47a8318e75f679690190c10a5e1f447fbf9df41cbc4c3afd726d88cb"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9309869032abb23d196cb4e4db574232abe8b8be1339026f489eeb34a4acfd91"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a97b4fe50b5890d36300820abd305694cb865ddb7885049587a5678215782a6b"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c188512b43542b1e91cadc3c6c915a82a5eb95929134faf7fd109f14f9892ce4"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:303e04d422e9b911a09ad499b0368dc551e8c3cd15293c99160c7f1f07b59a48"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0771aed7f596c7d73444c847a1c16288937ef988dc04fb9f7be4b2aa91db609d"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:66080ec69883597e4d026f2f71a231a1ee9887835902dbe6b6467d5a89216cf6"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:41fe21dc74ad3a779c3d73a2786bdf622ea81234bdd4faf90b8b03cad0c2c0b4"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f20380df709d91525e4bee04746ba612a4df0972c1b8f8e1e8af997e678c7b81"}, + {file = "frozenlist-1.3.3-cp311-cp311-win32.whl", hash = "sha256:f30f1928162e189091cf4d9da2eac617bfe78ef907a761614ff577ef4edfb3c8"}, + {file = "frozenlist-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:a6394d7dadd3cfe3f4b3b186e54d5d8504d44f2d58dcc89d693698e8b7132b32"}, + {file = "frozenlist-1.3.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8df3de3a9ab8325f94f646609a66cbeeede263910c5c0de0101079ad541af332"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0693c609e9742c66ba4870bcee1ad5ff35462d5ffec18710b4ac89337ff16e27"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd4210baef299717db0a600d7a3cac81d46ef0e007f88c9335db79f8979c0d3d"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:394c9c242113bfb4b9aa36e2b80a05ffa163a30691c7b5a29eba82e937895d5e"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6327eb8e419f7d9c38f333cde41b9ae348bec26d840927332f17e887a8dcb70d"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e24900aa13212e75e5b366cb9065e78bbf3893d4baab6052d1aca10d46d944c"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3843f84a6c465a36559161e6c59dce2f2ac10943040c2fd021cfb70d58c4ad56"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:84610c1502b2461255b4c9b7d5e9c48052601a8957cd0aea6ec7a7a1e1fb9420"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:c21b9aa40e08e4f63a2f92ff3748e6b6c84d717d033c7b3438dd3123ee18f70e"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:efce6ae830831ab6a22b9b4091d411698145cb9b8fc869e1397ccf4b4b6455cb"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:40de71985e9042ca00b7953c4f41eabc3dc514a2d1ff534027f091bc74416401"}, + {file = "frozenlist-1.3.3-cp37-cp37m-win32.whl", hash = "sha256:180c00c66bde6146a860cbb81b54ee0df350d2daf13ca85b275123bbf85de18a"}, + {file = "frozenlist-1.3.3-cp37-cp37m-win_amd64.whl", hash = "sha256:9bbbcedd75acdfecf2159663b87f1bb5cfc80e7cd99f7ddd9d66eb98b14a8411"}, + {file = "frozenlist-1.3.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:034a5c08d36649591be1cbb10e09da9f531034acfe29275fc5454a3b101ce41a"}, + {file = "frozenlist-1.3.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ba64dc2b3b7b158c6660d49cdb1d872d1d0bf4e42043ad8d5006099479a194e5"}, + {file = "frozenlist-1.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:47df36a9fe24054b950bbc2db630d508cca3aa27ed0566c0baf661225e52c18e"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:008a054b75d77c995ea26629ab3a0c0d7281341f2fa7e1e85fa6153ae29ae99c"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:841ea19b43d438a80b4de62ac6ab21cfe6827bb8a9dc62b896acc88eaf9cecba"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e235688f42b36be2b6b06fc37ac2126a73b75fb8d6bc66dd632aa35286238703"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca713d4af15bae6e5d79b15c10c8522859a9a89d3b361a50b817c98c2fb402a2"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ac5995f2b408017b0be26d4a1d7c61bce106ff3d9e3324374d66b5964325448"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a4ae8135b11652b08a8baf07631d3ebfe65a4c87909dbef5fa0cdde440444ee4"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4ea42116ceb6bb16dbb7d526e242cb6747b08b7710d9782aa3d6732bd8d27649"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:810860bb4bdce7557bc0febb84bbd88198b9dbc2022d8eebe5b3590b2ad6c842"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:ee78feb9d293c323b59a6f2dd441b63339a30edf35abcb51187d2fc26e696d13"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0af2e7c87d35b38732e810befb9d797a99279cbb85374d42ea61c1e9d23094b3"}, + {file = "frozenlist-1.3.3-cp38-cp38-win32.whl", hash = "sha256:899c5e1928eec13fd6f6d8dc51be23f0d09c5281e40d9cf4273d188d9feeaf9b"}, + {file = "frozenlist-1.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:7f44e24fa70f6fbc74aeec3e971f60a14dde85da364aa87f15d1be94ae75aeef"}, + {file = "frozenlist-1.3.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2b07ae0c1edaa0a36339ec6cce700f51b14a3fc6545fdd32930d2c83917332cf"}, + {file = "frozenlist-1.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ebb86518203e12e96af765ee89034a1dbb0c3c65052d1b0c19bbbd6af8a145e1"}, + {file = "frozenlist-1.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5cf820485f1b4c91e0417ea0afd41ce5cf5965011b3c22c400f6d144296ccbc0"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c11e43016b9024240212d2a65043b70ed8dfd3b52678a1271972702d990ac6d"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8fa3c6e3305aa1146b59a09b32b2e04074945ffcfb2f0931836d103a2c38f936"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:352bd4c8c72d508778cf05ab491f6ef36149f4d0cb3c56b1b4302852255d05d5"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65a5e4d3aa679610ac6e3569e865425b23b372277f89b5ef06cf2cdaf1ebf22b"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e2c1185858d7e10ff045c496bbf90ae752c28b365fef2c09cf0fa309291669"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f163d2fd041c630fed01bc48d28c3ed4a3b003c00acd396900e11ee5316b56bb"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:05cdb16d09a0832eedf770cb7bd1fe57d8cf4eaf5aced29c4e41e3f20b30a784"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:8bae29d60768bfa8fb92244b74502b18fae55a80eac13c88eb0b496d4268fd2d"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:eedab4c310c0299961ac285591acd53dc6723a1ebd90a57207c71f6e0c2153ab"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3bbdf44855ed8f0fbcd102ef05ec3012d6a4fd7c7562403f76ce6a52aeffb2b1"}, + {file = "frozenlist-1.3.3-cp39-cp39-win32.whl", hash = "sha256:efa568b885bca461f7c7b9e032655c0c143d305bf01c30caf6db2854a4532b38"}, + {file = "frozenlist-1.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:cfe33efc9cb900a4c46f91a5ceba26d6df370ffddd9ca386eb1d4f0ad97b9ea9"}, + {file = "frozenlist-1.3.3.tar.gz", hash = "sha256:58bcc55721e8a90b88332d6cd441261ebb22342e238296bb330968952fbb3a6a"}, +] + [[package]] name = "ghp-import" version = "2.1.0" @@ -735,7 +1053,7 @@ files = [ name = "importlib-metadata" version = "5.0.0" description = "Read metadata from Python packages" -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -898,6 +1216,32 @@ parso = ">=0.8.0,<0.9.0" qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] testing = ["Django (<3.1)", "colorama", "docopt", "pytest (<7.0.0)"] +[[package]] +name = "jina-hubble-sdk" +version = "0.34.0" +description = "SDK for Hubble API at Jina AI." +category = "main" +optional = true +python-versions = ">=3.7.0" +files = [ + {file = "jina-hubble-sdk-0.34.0.tar.gz", hash = "sha256:d52e1c3b90262a779dd6be66c687d5d824698cc0e17f79e9f6b94d03e86b10c1"}, + {file = "jina_hubble_sdk-0.34.0-py3-none-any.whl", hash = "sha256:d52b5d0756b710192453858a6d9056280616b9c1ab35fbf061fe6ee66cdf77a5"}, +] + +[package.dependencies] +aiohttp = "*" +docker = "*" +filelock = "*" +importlib-metadata = "*" +pathspec = "*" +python-jose = "*" +pyyaml = "*" +requests = "*" +rich = "*" + +[package.extras] +full = ["aiohttp", "black (==22.3.0)", "docker", "filelock", "flake8 (==4.0.1)", "importlib-metadata", "isort (==5.10.1)", "mock (==4.0.3)", "pathspec", "pytest (==7.0.0)", "pytest-asyncio (==0.19.0)", "pytest-cov (==3.0.0)", "pytest-mock (==3.7.0)", "python-jose", "pyyaml", "requests", "rich"] + [[package]] name = "jinja2" version = "3.1.2" @@ -916,6 +1260,18 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] + [[package]] name = "json5" version = "0.9.10" @@ -1379,6 +1735,90 @@ files = [ griffe = ">=0.24" mkdocstrings = ">=0.19" +[[package]] +name = "multidict" +version = "6.0.4" +description = "multidict implementation" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5"}, + {file = "multidict-6.0.4-cp310-cp310-win32.whl", hash = "sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8"}, + {file = "multidict-6.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461"}, + {file = "multidict-6.0.4-cp311-cp311-win32.whl", hash = "sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636"}, + {file = "multidict-6.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0"}, + {file = "multidict-6.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d"}, + {file = "multidict-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775"}, + {file = "multidict-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1"}, + {file = "multidict-6.0.4-cp38-cp38-win32.whl", hash = "sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779"}, + {file = "multidict-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95"}, + {file = "multidict-6.0.4-cp39-cp39-win32.whl", hash = "sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313"}, + {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"}, + {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, +] + [[package]] name = "mypy" version = "1.0.0" @@ -1804,7 +2244,7 @@ files = [ name = "packaging" version = "21.3" description = "Core utilities for Python packages" -category = "dev" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1881,7 +2321,7 @@ testing = ["docopt", "pytest (<6.0.0)"] name = "pathspec" version = "0.10.2" description = "Utility library for gitignore style pattern matching of file paths." -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2163,6 +2603,18 @@ files = [ {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, ] +[[package]] +name = "pyasn1" +version = "0.4.8" +description = "ASN.1 types and codecs" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"}, + {file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"}, +] + [[package]] name = "pycparser" version = "2.21" @@ -2275,7 +2727,7 @@ pyyaml = "*" name = "pyparsing" version = "3.0.9" description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "dev" +category = "main" optional = false python-versions = ">=3.6.8" files = [ @@ -2377,6 +2829,28 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-jose" +version = "3.3.0" +description = "JOSE implementation in Python" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "python-jose-3.3.0.tar.gz", hash = "sha256:55779b5e6ad599c6336191246e95eb2293a9ddebd555f796a65f838f07e5d78a"}, + {file = "python_jose-3.3.0-py2.py3-none-any.whl", hash = "sha256:9b1376b023f8b298536eedd47ae1089bcdb848f1535ab30555cd92002d78923a"}, +] + +[package.dependencies] +ecdsa = "!=0.15" +pyasn1 = "*" +rsa = "*" + +[package.extras] +cryptography = ["cryptography (>=3.4.0)"] +pycrypto = ["pyasn1", "pycrypto (>=2.6.0,<2.7.0)"] +pycryptodome = ["pyasn1", "pycryptodome (>=3.3.1,<4.0.0)"] + [[package]] name = "pytz" version = "2022.6" @@ -2393,7 +2867,7 @@ files = [ name = "pywin32" version = "305" description = "Python for Window Extensions" -category = "dev" +category = "main" optional = false python-versions = "*" files = [ @@ -2433,7 +2907,7 @@ files = [ name = "pyyaml" version = "6.0" description = "YAML parser and emitter for Python" -category = "dev" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2684,7 +3158,7 @@ files = [ name = "requests" version = "2.27.1" description = "Python HTTP for Humans." -category = "dev" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -2740,6 +3214,21 @@ typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9 [package.extras] jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"] +[[package]] +name = "rsa" +version = "4.9" +description = "Pure-Python RSA implementation" +category = "main" +optional = true +python-versions = ">=3.6,<4" +files = [ + {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, + {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, +] + +[package.dependencies] +pyasn1 = ">=0.1.3" + [[package]] name = "ruff" version = "0.0.243" @@ -2766,6 +3255,24 @@ files = [ {file = "ruff-0.0.243.tar.gz", hash = "sha256:d5847e75038b51801f45b31a93c3526114d3aac59acea3493bb06ebc7783b004"}, ] +[[package]] +name = "s3transfer" +version = "0.6.0" +description = "An Amazon S3 Transfer Manager" +category = "main" +optional = true +python-versions = ">= 3.7" +files = [ + {file = "s3transfer-0.6.0-py3-none-any.whl", hash = "sha256:06176b74f3a15f61f1b4f25a1fc29a4429040b7647133a463da8fa5bd28d5ecd"}, + {file = "s3transfer-0.6.0.tar.gz", hash = "sha256:2ed07d3866f523cc561bf4a00fc5535827981b117dd7876f036b0c1aca42c947"}, +] + +[package.dependencies] +botocore = ">=1.12.36,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] + [[package]] name = "send2trash" version = "1.8.0" @@ -2812,6 +3319,31 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "smart-open" +version = "6.3.0" +description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)" +category = "main" +optional = true +python-versions = ">=3.6,<4.0" +files = [ + {file = "smart_open-6.3.0-py3-none-any.whl", hash = "sha256:b4c9ae193ad6d3e7add50944b86afa0d150bd821ab8ec21edb26d9a06b66f6a8"}, + {file = "smart_open-6.3.0.tar.gz", hash = "sha256:d5238825fe9a9340645fac3d75b287c08fbb99fb2b422477de781c9f5f09e019"}, +] + +[package.dependencies] +boto3 = {version = "*", optional = true, markers = "extra == \"s3\""} + +[package.extras] +all = ["azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "paramiko", "requests"] +azure = ["azure-common", "azure-core", "azure-storage-blob"] +gcs = ["google-cloud-storage (>=2.6.0)"] +http = ["requests"] +s3 = ["boto3"] +ssh = ["paramiko"] +test = ["azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "moto[server]", "paramiko", "pytest", "pytest-rerunfailures", "requests", "responses"] +webhdfs = ["requests"] + [[package]] name = "sniffio" version = "1.3.0" @@ -3134,7 +3666,7 @@ typing-extensions = ">=3.7.4" name = "urllib3" version = "1.26.14" description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "dev" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -3258,7 +3790,7 @@ files = [ name = "websocket-client" version = "1.4.2" description = "WebSocket client for Python with low level API options" -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3286,11 +3818,100 @@ files = [ [package.extras] test = ["pytest (>=3.0.0)"] +[[package]] +name = "yarl" +version = "1.8.2" +description = "Yet another URL library" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "yarl-1.8.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bb81f753c815f6b8e2ddd2eef3c855cf7da193b82396ac013c661aaa6cc6b0a5"}, + {file = "yarl-1.8.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:47d49ac96156f0928f002e2424299b2c91d9db73e08c4cd6742923a086f1c863"}, + {file = "yarl-1.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3fc056e35fa6fba63248d93ff6e672c096f95f7836938241ebc8260e062832fe"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58a3c13d1c3005dbbac5c9f0d3210b60220a65a999b1833aa46bd6677c69b08e"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10b08293cda921157f1e7c2790999d903b3fd28cd5c208cf8826b3b508026996"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de986979bbd87272fe557e0a8fcb66fd40ae2ddfe28a8b1ce4eae22681728fef"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c4fcfa71e2c6a3cb568cf81aadc12768b9995323186a10827beccf5fa23d4f8"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae4d7ff1049f36accde9e1ef7301912a751e5bae0a9d142459646114c70ecba6"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bf071f797aec5b96abfc735ab97da9fd8f8768b43ce2abd85356a3127909d146"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:74dece2bfc60f0f70907c34b857ee98f2c6dd0f75185db133770cd67300d505f"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:df60a94d332158b444301c7f569659c926168e4d4aad2cfbf4bce0e8fb8be826"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:63243b21c6e28ec2375f932a10ce7eda65139b5b854c0f6b82ed945ba526bff3"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cfa2bbca929aa742b5084fd4663dd4b87c191c844326fcb21c3afd2d11497f80"}, + {file = "yarl-1.8.2-cp310-cp310-win32.whl", hash = "sha256:b05df9ea7496df11b710081bd90ecc3a3db6adb4fee36f6a411e7bc91a18aa42"}, + {file = "yarl-1.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:24ad1d10c9db1953291f56b5fe76203977f1ed05f82d09ec97acb623a7976574"}, + {file = "yarl-1.8.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2a1fca9588f360036242f379bfea2b8b44cae2721859b1c56d033adfd5893634"}, + {file = "yarl-1.8.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f37db05c6051eff17bc832914fe46869f8849de5b92dc4a3466cd63095d23dfd"}, + {file = "yarl-1.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:77e913b846a6b9c5f767b14dc1e759e5aff05502fe73079f6f4176359d832581"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0978f29222e649c351b173da2b9b4665ad1feb8d1daa9d971eb90df08702668a"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:388a45dc77198b2460eac0aca1efd6a7c09e976ee768b0d5109173e521a19daf"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2305517e332a862ef75be8fad3606ea10108662bc6fe08509d5ca99503ac2aee"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42430ff511571940d51e75cf42f1e4dbdded477e71c1b7a17f4da76c1da8ea76"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3150078118f62371375e1e69b13b48288e44f6691c1069340081c3fd12c94d5b"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c15163b6125db87c8f53c98baa5e785782078fbd2dbeaa04c6141935eb6dab7a"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4d04acba75c72e6eb90745447d69f84e6c9056390f7a9724605ca9c56b4afcc6"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e7fd20d6576c10306dea2d6a5765f46f0ac5d6f53436217913e952d19237efc4"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:75c16b2a900b3536dfc7014905a128a2bea8fb01f9ee26d2d7d8db0a08e7cb2c"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6d88056a04860a98341a0cf53e950e3ac9f4e51d1b6f61a53b0609df342cc8b2"}, + {file = "yarl-1.8.2-cp311-cp311-win32.whl", hash = "sha256:fb742dcdd5eec9f26b61224c23baea46c9055cf16f62475e11b9b15dfd5c117b"}, + {file = "yarl-1.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:8c46d3d89902c393a1d1e243ac847e0442d0196bbd81aecc94fcebbc2fd5857c"}, + {file = "yarl-1.8.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ceff9722e0df2e0a9e8a79c610842004fa54e5b309fe6d218e47cd52f791d7ef"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f6b4aca43b602ba0f1459de647af954769919c4714706be36af670a5f44c9c1"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1684a9bd9077e922300ecd48003ddae7a7474e0412bea38d4631443a91d61077"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ebb78745273e51b9832ef90c0898501006670d6e059f2cdb0e999494eb1450c2"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3adeef150d528ded2a8e734ebf9ae2e658f4c49bf413f5f157a470e17a4a2e89"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57a7c87927a468e5a1dc60c17caf9597161d66457a34273ab1760219953f7f4c"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:efff27bd8cbe1f9bd127e7894942ccc20c857aa8b5a0327874f30201e5ce83d0"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a783cd344113cb88c5ff7ca32f1f16532a6f2142185147822187913eb989f739"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:705227dccbe96ab02c7cb2c43e1228e2826e7ead880bb19ec94ef279e9555b5b"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:34c09b43bd538bf6c4b891ecce94b6fa4f1f10663a8d4ca589a079a5018f6ed7"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a48f4f7fea9a51098b02209d90297ac324241bf37ff6be6d2b0149ab2bd51b37"}, + {file = "yarl-1.8.2-cp37-cp37m-win32.whl", hash = "sha256:0414fd91ce0b763d4eadb4456795b307a71524dbacd015c657bb2a39db2eab89"}, + {file = "yarl-1.8.2-cp37-cp37m-win_amd64.whl", hash = "sha256:d881d152ae0007809c2c02e22aa534e702f12071e6b285e90945aa3c376463c5"}, + {file = "yarl-1.8.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5df5e3d04101c1e5c3b1d69710b0574171cc02fddc4b23d1b2813e75f35a30b1"}, + {file = "yarl-1.8.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7a66c506ec67eb3159eea5096acd05f5e788ceec7b96087d30c7d2865a243918"}, + {file = "yarl-1.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2b4fa2606adf392051d990c3b3877d768771adc3faf2e117b9de7eb977741229"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e21fb44e1eff06dd6ef971d4bdc611807d6bd3691223d9c01a18cec3677939e"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93202666046d9edadfe9f2e7bf5e0782ea0d497b6d63da322e541665d65a044e"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc77086ce244453e074e445104f0ecb27530d6fd3a46698e33f6c38951d5a0f1"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dd68a92cab699a233641f5929a40f02a4ede8c009068ca8aa1fe87b8c20ae3"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1b372aad2b5f81db66ee7ec085cbad72c4da660d994e8e590c997e9b01e44901"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e6f3515aafe0209dd17fb9bdd3b4e892963370b3de781f53e1746a521fb39fc0"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dfef7350ee369197106805e193d420b75467b6cceac646ea5ed3049fcc950a05"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:728be34f70a190566d20aa13dc1f01dc44b6aa74580e10a3fb159691bc76909d"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:ff205b58dc2929191f68162633d5e10e8044398d7a45265f90a0f1d51f85f72c"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:baf211dcad448a87a0d9047dc8282d7de59473ade7d7fdf22150b1d23859f946"}, + {file = "yarl-1.8.2-cp38-cp38-win32.whl", hash = "sha256:272b4f1599f1b621bf2aabe4e5b54f39a933971f4e7c9aa311d6d7dc06965165"}, + {file = "yarl-1.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:326dd1d3caf910cd26a26ccbfb84c03b608ba32499b5d6eeb09252c920bcbe4f"}, + {file = "yarl-1.8.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f8ca8ad414c85bbc50f49c0a106f951613dfa5f948ab69c10ce9b128d368baf8"}, + {file = "yarl-1.8.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:418857f837347e8aaef682679f41e36c24250097f9e2f315d39bae3a99a34cbf"}, + {file = "yarl-1.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae0eec05ab49e91a78700761777f284c2df119376e391db42c38ab46fd662b77"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:009a028127e0a1755c38b03244c0bea9d5565630db9c4cf9572496e947137a87"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3edac5d74bb3209c418805bda77f973117836e1de7c000e9755e572c1f7850d0"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da65c3f263729e47351261351b8679c6429151ef9649bba08ef2528ff2c423b2"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ef8fb25e52663a1c85d608f6dd72e19bd390e2ecaf29c17fb08f730226e3a08"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcd7bb1e5c45274af9a1dd7494d3c52b2be5e6bd8d7e49c612705fd45420b12d"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:44ceac0450e648de86da8e42674f9b7077d763ea80c8ceb9d1c3e41f0f0a9951"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:97209cc91189b48e7cfe777237c04af8e7cc51eb369004e061809bcdf4e55220"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:48dd18adcf98ea9cd721a25313aef49d70d413a999d7d89df44f469edfb38a06"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e59399dda559688461762800d7fb34d9e8a6a7444fd76ec33220a926c8be1516"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d617c241c8c3ad5c4e78a08429fa49e4b04bedfc507b34b4d8dceb83b4af3588"}, + {file = "yarl-1.8.2-cp39-cp39-win32.whl", hash = "sha256:cb6d48d80a41f68de41212f3dfd1a9d9898d7841c8f7ce6696cf2fd9cb57ef83"}, + {file = "yarl-1.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:6604711362f2dbf7160df21c416f81fac0de6dbcf0b5445a2ef25478ecc4c778"}, + {file = "yarl-1.8.2.tar.gz", hash = "sha256:49d43402c6e3013ad0978602bf6bf5328535c48d192304b91b97a3c6790b1562"}, +] + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" +typing-extensions = {version = ">=3.7.4", markers = "python_version < \"3.8\""} + [[package]] name = "zipp" version = "3.10.0" description = "Backport of pathlib-compatible object wrapper for zip files" -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3304,9 +3925,11 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools" [extras] audio = ["pydub"] -common = ["protobuf", "lz4"] +aws = ["smart-open"] +common = ["lz4", "protobuf"] hnswlib = ["hnswlib"] image = ["pillow", "types-pillow"] +jac = ["jina-hubble-sdk"] mesh = ["trimesh"] pandas = ["pandas"] torch = ["torch"] @@ -3315,5 +3938,5 @@ web = ["fastapi"] [metadata] lock-version = "2.0" -python-versions = ">=3.7" -content-hash = "60dc7dedebd775c6fe3f45ddd2869a07df2c28bbc83420e875eb61e118b064b2" +python-versions = ">=3.7,<4.0" +content-hash = "0872bd8654de67d349699a227cd2dc1708c6fa5066c84e1f295d630184ee5ac4" diff --git a/pyproject.toml b/pyproject.toml index af53260c24f..61b10f7f298 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors=['DocArray'] license='Apache 2.0' [tool.poetry.dependencies] -python = ">=3.7" +python = ">=3.7,<4.0" pydantic = ">=1.10.2" numpy = ">=1.17.3" protobuf = { version = ">=3.19.0", optional = true } @@ -24,6 +24,8 @@ hnswlib = {version = ">=0.6.2", optional = true } lz4 = {version= ">=1.0.0", optional = true} pydub = {version = "^0.25.1", optional = true } pandas = {version = ">=1.1.0", optional = true } +smart-open = {version = ">=6.3.0", extras = ["s3"], optional = true} +jina-hubble-sdk = {version = ">=0.34.0", optional = true} [tool.poetry.extras] common = ["protobuf", "lz4"] @@ -35,6 +37,8 @@ mesh = ["trimesh"] web = ["fastapi"] hnswlib = ["hnswlib"] pandas = ["pandas"] +jac = ["jina-hubble-sdk"] +aws = ["smart-open"] [tool.poetry.dev-dependencies] pytest = ">=7.0" @@ -89,6 +93,22 @@ ignore_missing_imports = true module = "IPython.display" ignore_missing_imports = true +[[tool.mypy.overrides]] +module = "hubble.*" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "smart_open" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "boto3" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "botocore" +ignore_missing_imports = true + [tool.black] skip-string-normalization = true # equivalent to black -S exclude = 'docarray/proto/pb*/*' diff --git a/tests/integrations/store/__init__.py b/tests/integrations/store/__init__.py new file mode 100644 index 00000000000..51d9e298a4d --- /dev/null +++ b/tests/integrations/store/__init__.py @@ -0,0 +1,32 @@ +import tracemalloc +from functools import wraps + +from docarray import DocumentArray +from docarray.documents import TextDoc + + +def get_test_da(n: int): + return DocumentArray[TextDoc](gen_text_docs(n)) + + +def gen_text_docs(n: int): + for i in range(n): + yield TextDoc(text=f'text {i}') + + +def profile_memory(func): + """Decorator to profile memory usage of a function. + + Returns: + original function return value, (current memory usage, peak memory usage) + """ + + @wraps(func) + def _inner(*args, **kwargs): + tracemalloc.start() + ret = func(*args, **kwargs) + current, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + return ret, (current, peak) + + return _inner diff --git a/tests/integrations/store/docker-compose.yml b/tests/integrations/store/docker-compose.yml new file mode 100644 index 00000000000..c9d0638ad14 --- /dev/null +++ b/tests/integrations/store/docker-compose.yml @@ -0,0 +1,8 @@ +version: "3" +services: + minio: + container_name: minio + image: "minio/minio:RELEASE.2023-03-13T19-46-17Z" + ports: + - "9005:9000" + command: server /data diff --git a/tests/integrations/store/test_file.py b/tests/integrations/store/test_file.py new file mode 100644 index 00000000000..fc96ed7e686 --- /dev/null +++ b/tests/integrations/store/test_file.py @@ -0,0 +1,263 @@ +import multiprocessing as mp +from pathlib import Path + +import pytest + +from docarray import DocumentArray +from docarray.documents import TextDoc +from docarray.store.file import ConcurrentPushException, FileDocStore +from docarray.utils.cache import get_cache_path +from tests.integrations.store import gen_text_docs, get_test_da, profile_memory + +DA_LEN: int = 2**10 +TOLERANCE_RATIO = 0.1 # Percentage of difference allowed in stream vs non-stream test + + +def test_path_resolution(): + assert FileDocStore._abs_filepath('meow') == get_cache_path() / 'meow' + assert FileDocStore._abs_filepath('/meow') == Path('/meow') + assert FileDocStore._abs_filepath('~/meow') == Path.home() / 'meow' + assert FileDocStore._abs_filepath('./meow') == Path.cwd() / 'meow' + assert FileDocStore._abs_filepath('../meow') == Path.cwd().parent / 'meow' + + +def test_pushpull_correct(capsys, tmp_path: Path): + tmp_path.mkdir(parents=True, exist_ok=True) + namespace_dir = tmp_path + da1 = get_test_da(DA_LEN) + + # Verbose + da1.push(f'file://{namespace_dir}/meow', show_progress=True) + da2 = DocumentArray[TextDoc].pull( + f'file://{namespace_dir}/meow', show_progress=True + ) + assert len(da1) == len(da2) + assert all(d1.id == d2.id for d1, d2 in zip(da1, da2)) + assert all(d1.text == d2.text for d1, d2 in zip(da1, da2)) + + captured = capsys.readouterr() + assert len(captured.out) > 0 + assert len(captured.err) == 0 + + # Quiet + da2.push(f'file://{namespace_dir}/meow') + da1 = DocumentArray[TextDoc].pull(f'file://{namespace_dir}/meow') + assert len(da1) == len(da2) + assert all(d1.id == d2.id for d1, d2 in zip(da1, da2)) + assert all(d1.text == d2.text for d1, d2 in zip(da1, da2)) + + captured = capsys.readouterr() + assert len(captured.out) == 0 + assert len(captured.err) == 0 + + +def test_pushpull_stream_correct(capsys, tmp_path: Path): + tmp_path.mkdir(parents=True, exist_ok=True) + namespace_dir = tmp_path + da1 = get_test_da(DA_LEN) + + # Verbosity and correctness + DocumentArray[TextDoc].push_stream( + iter(da1), f'file://{namespace_dir}/meow', show_progress=True + ) + doc_stream2 = DocumentArray[TextDoc].pull_stream( + f'file://{namespace_dir}/meow', show_progress=True + ) + + assert all(d1.id == d2.id for d1, d2 in zip(da1, doc_stream2)) + with pytest.raises(StopIteration): + next(doc_stream2) + + captured = capsys.readouterr() + assert len(captured.out) > 0 + assert len(captured.err) == 0 + + # Quiet and chained + doc_stream = DocumentArray[TextDoc].pull_stream( + f'file://{namespace_dir}/meow', show_progress=False + ) + DocumentArray[TextDoc].push_stream( + doc_stream, f'file://{namespace_dir}/meow2', show_progress=False + ) + + captured = capsys.readouterr() + assert len(captured.out) == 0 + assert len(captured.err) == 0 + + +@pytest.mark.slow +def test_pull_stream_vs_pull_full(tmp_path: Path): + tmp_path.mkdir(parents=True, exist_ok=True) + namespace_dir = tmp_path + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN * 1), + f'file://{namespace_dir}/meow-short', + show_progress=False, + ) + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN * 4), + f'file://{namespace_dir}/meow-long', + show_progress=False, + ) + + @profile_memory + def get_total_stream(url: str): + return sum( + len(d.text) + for d in DocumentArray[TextDoc].pull_stream(url, show_progress=False) + ) + + @profile_memory + def get_total_full(url: str): + return sum( + len(d.text) for d in DocumentArray[TextDoc].pull(url, show_progress=False) + ) + + # A warmup is needed to get accurate memory usage comparison + _ = get_total_stream(f'file://{namespace_dir}/meow-short') + short_total_stream, (_, short_stream_peak) = get_total_stream( + f'file://{namespace_dir}/meow-short' + ) + long_total_stream, (_, long_stream_peak) = get_total_stream( + f'file://{namespace_dir}/meow-long' + ) + + _ = get_total_full(f'file://{namespace_dir}/meow-short') + short_total_full, (_, short_full_peak) = get_total_full( + f'file://{namespace_dir}/meow-short' + ) + long_total_full, (_, long_full_peak) = get_total_full( + f'file://{namespace_dir}/meow-long' + ) + + assert ( + short_total_stream == short_total_full + ), 'Streamed and non-streamed pull should have similar statistics' + assert ( + long_total_stream == long_total_full + ), 'Streamed and non-streamed pull should have similar statistics' + + assert ( + abs(long_stream_peak - short_stream_peak) / short_stream_peak < TOLERANCE_RATIO + ), 'Streamed memory usage should not be dependent on the size of the data' + assert ( + abs(long_full_peak - short_full_peak) / short_full_peak > TOLERANCE_RATIO + ), 'Full pull memory usage should be dependent on the size of the data' + + +def test_list_and_delete(tmp_path: Path): + tmp_path.mkdir(parents=True, exist_ok=True) + namespace_dir = str(tmp_path) + + da_names = FileDocStore.list(namespace_dir, show_table=False) + assert len(da_names) == 0 + + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN), f'file://{namespace_dir}/meow', show_progress=False + ) + da_names = FileDocStore.list(namespace_dir, show_table=False) + assert set(da_names) == {'meow'} + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN), f'file://{namespace_dir}/woof', show_progress=False + ) + da_names = FileDocStore.list(namespace_dir, show_table=False) + assert set(da_names) == {'meow', 'woof'} + + assert FileDocStore.delete( + f'{namespace_dir}/meow' + ), 'Deleting an existing DA should return True' + da_names = FileDocStore.list(namespace_dir, show_table=False) + assert set(da_names) == {'woof'} + + with pytest.raises( + FileNotFoundError + ): # Deleting a non-existent DA without safety should raise an error + FileDocStore.delete(f'{namespace_dir}/meow', missing_ok=False) + + assert not FileDocStore.delete( + f'{namespace_dir}/meow', missing_ok=True + ), 'Deleting a non-existent DA should return False' + + +def test_concurrent_push_pull(tmp_path: Path): + # Push to DA that is being pulled should not mess up the pull + tmp_path.mkdir(parents=True, exist_ok=True) + namespace_dir = tmp_path + + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN), + f'file://{namespace_dir}/da0', + show_progress=False, + ) + + global _task + + def _task(choice: str): + if choice == 'push': + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN), + f'file://{namespace_dir}/da0', + show_progress=False, + ) + elif choice == 'pull': + pull_len = sum( + 1 + for _ in DocumentArray[TextDoc].pull_stream( + f'file://{namespace_dir}/da0' + ) + ) + assert pull_len == DA_LEN + else: + raise ValueError(f'Unknown choice {choice}') + + with mp.get_context('fork').Pool(3) as p: + p.map(_task, ['pull', 'push', 'pull']) + + +@pytest.mark.slow +def test_concurrent_push(tmp_path: Path): + # Double push should fail the second push + import time + + tmp_path.mkdir(parents=True, exist_ok=True) + namespace_dir = tmp_path + + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN), + f'file://{namespace_dir}/da0', + show_progress=False, + ) + + def _slowdown_iterator(iterator): + for i, e in enumerate(iterator): + yield e + if i % (DA_LEN // 100) == 0: + time.sleep(0.01) + + global _push + + def _push(choice: str): + if choice == 'slow': + DocumentArray[TextDoc].push_stream( + _slowdown_iterator(gen_text_docs(DA_LEN)), + f'file://{namespace_dir}/da0', + show_progress=False, + ) + return True + elif choice == 'cold_start': + try: + time.sleep(0.1) + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN), + f'file://{namespace_dir}/da0', + show_progress=False, + ) + return True + except ConcurrentPushException: + return False + else: + raise ValueError(f'Unknown choice {choice}') + + with mp.get_context('fork').Pool(3) as p: + results = p.map(_push, ['cold_start', 'slow', 'cold_start']) + assert results == [False, True, False] diff --git a/tests/integrations/store/test_jac.py b/tests/integrations/store/test_jac.py new file mode 100644 index 00000000000..aa1763298b9 --- /dev/null +++ b/tests/integrations/store/test_jac.py @@ -0,0 +1,254 @@ +import multiprocessing as mp +import uuid + +import hubble +import pytest + +from docarray import DocumentArray +from docarray.documents import TextDoc +from docarray.store import JACDocStore +from tests.integrations.store import gen_text_docs, get_test_da, profile_memory + +DA_LEN: int = 2**10 +TOLERANCE_RATIO = 0.5 # Percentage of difference allowed in stream vs non-stream test +RANDOM: str = uuid.uuid4().hex[:8] + + +@pytest.fixture(scope='session', autouse=True) +def testing_namespace_cleanup(): + da_names = list( + filter( + lambda x: x.startswith('test'), + JACDocStore.list('jac://', show_table=False), + ) + ) + for da_name in da_names: + JACDocStore.delete(f'jac://{da_name}') + yield + da_names = list( + filter( + lambda x: x.startswith(f'test{RANDOM}'), + JACDocStore.list('jac://', show_table=False), + ) + ) + for da_name in da_names: + JACDocStore.delete(f'jac://{da_name}') + + +@pytest.mark.slow +@pytest.mark.internet +def test_pushpull_correct(capsys): + DA_NAME: str = f'test{RANDOM}-pushpull-correct' + da1 = get_test_da(DA_LEN) + + # Verbose + da1.push(f'jac://{DA_NAME}', show_progress=True) + da2 = DocumentArray[TextDoc].pull(f'jac://{DA_NAME}', show_progress=True) + assert len(da1) == len(da2) + assert all(d1.id == d2.id for d1, d2 in zip(da1, da2)) + assert all(d1.text == d2.text for d1, d2 in zip(da1, da2)) + + captured = capsys.readouterr() + assert len(captured.out) > 0 + assert len(captured.err) == 0 + + # Quiet + da2.push(f'jac://{DA_NAME}') + da1 = DocumentArray[TextDoc].pull(f'jac://{DA_NAME}') + assert len(da1) == len(da2) + assert all(d1.id == d2.id for d1, d2 in zip(da1, da2)) + assert all(d1.text == d2.text for d1, d2 in zip(da1, da2)) + + captured = capsys.readouterr() + assert ( + len(captured.out) == 0 + ), 'No output should be printed when show_progress=False' + assert len(captured.err) == 0, 'No error should be printed when show_progress=False' + + +@pytest.mark.slow +@pytest.mark.internet +def test_pushpull_stream_correct(capsys): + DA_NAME_1: str = f'test{RANDOM}-pushpull-stream-correct-da1' + DA_NAME_2: str = f'test{RANDOM}-pushpull-stream-correct-da2' + + da1 = get_test_da(DA_LEN) + + # Verbosity and correctness + DocumentArray[TextDoc].push_stream( + iter(da1), f'jac://{DA_NAME_1}', show_progress=True + ) + doc_stream2 = DocumentArray[TextDoc].pull_stream( + f'jac://{DA_NAME_1}', show_progress=True + ) + + assert all(d1.id == d2.id for d1, d2 in zip(da1, doc_stream2)) + with pytest.raises(StopIteration): + next(doc_stream2) + + captured = capsys.readouterr() + assert len(captured.out) > 0 + assert len(captured.err) == 0 + + # Quiet and chained + doc_stream = DocumentArray[TextDoc].pull_stream( + f'jac://{DA_NAME_1}', show_progress=False + ) + DocumentArray[TextDoc].push_stream( + doc_stream, f'jac://{DA_NAME_2}', show_progress=False + ) + + captured = capsys.readouterr() + assert ( + len(captured.out) == 0 + ), 'No output should be printed when show_progress=False' + assert len(captured.err) == 0, 'No error should be printed when show_progress=False' + + +@pytest.mark.slow +@pytest.mark.internet +def test_pull_stream_vs_pull_full(): + import docarray.store.helpers + + docarray.store.helpers.CACHING_REQUEST_READER_CHUNK_SIZE = 2**10 + DA_NAME_SHORT: str = f'test{RANDOM}-pull-stream-vs-pull-full-short' + DA_NAME_LONG: str = f'test{RANDOM}-pull-stream-vs-pull-full-long' + + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN * 1), + f'jac://{DA_NAME_SHORT}', + show_progress=False, + ) + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN * 4), + f'jac://{DA_NAME_LONG}', + show_progress=False, + ) + + @profile_memory + def get_total_stream(url: str): + return sum( + len(d.text) + for d in DocumentArray[TextDoc].pull_stream(url, show_progress=False) + ) + + @profile_memory + def get_total_full(url: str): + return sum( + len(d.text) for d in DocumentArray[TextDoc].pull(url, show_progress=False) + ) + + # A warmup is needed to get accurate memory usage comparison + _ = get_total_stream(f'jac://{DA_NAME_SHORT}') + short_total_stream, (_, short_stream_peak) = get_total_stream( + f'jac://{DA_NAME_SHORT}' + ) + long_total_stream, (_, long_stream_peak) = get_total_stream(f'jac://{DA_NAME_LONG}') + + _ = get_total_full(f'jac://{DA_NAME_SHORT}') + short_total_full, (_, short_full_peak) = get_total_full(f'jac://{DA_NAME_SHORT}') + long_total_full, (_, long_full_peak) = get_total_full(f'jac://{DA_NAME_LONG}') + + assert ( + short_total_stream == short_total_full + ), 'Streamed and non-streamed pull should have similar statistics' + assert ( + long_total_stream == long_total_full + ), 'Streamed and non-streamed pull should have similar statistics' + + assert ( + abs(long_stream_peak - short_stream_peak) / short_stream_peak < TOLERANCE_RATIO + ), 'Streamed memory usage should not be dependent on the size of the data' + assert ( + abs(long_full_peak - short_full_peak) / short_full_peak > TOLERANCE_RATIO + ), 'Full pull memory usage should be dependent on the size of the data' + + +@pytest.mark.slow +@pytest.mark.internet +@pytest.mark.skip(reason='The CI account might be broken') +def test_list_and_delete(): + DA_NAME_0 = f'test{RANDOM}-list-and-delete-da0' + DA_NAME_1 = f'test{RANDOM}-list-and-delete-da1' + + da_names = list( + filter( + lambda x: x.startswith(f'test{RANDOM}-list-and-delete'), + JACDocStore.list(show_table=False), + ) + ) + assert len(da_names) == 0 + + DocumentArray[TextDoc].push( + get_test_da(DA_LEN), f'jac://{DA_NAME_0}', show_progress=False + ) + da_names = list( + filter( + lambda x: x.startswith(f'test{RANDOM}-list-and-delete'), + JACDocStore.list(show_table=False), + ) + ) + assert set(da_names) == {DA_NAME_0} + DocumentArray[TextDoc].push( + get_test_da(DA_LEN), f'jac://{DA_NAME_1}', show_progress=False + ) + da_names = list( + filter( + lambda x: x.startswith(f'test{RANDOM}-list-and-delete'), + JACDocStore.list(show_table=False), + ) + ) + assert set(da_names) == {DA_NAME_0, DA_NAME_1} + + assert JACDocStore.delete( + f'{DA_NAME_0}' + ), 'Deleting an existing DA should return True' + da_names = list( + filter( + lambda x: x.startswith(f'test{RANDOM}-list-and-delete'), + JACDocStore.list(show_table=False), + ) + ) + assert set(da_names) == {DA_NAME_1} + + with pytest.raises( + hubble.excepts.RequestedEntityNotFoundError + ): # Deleting a non-existent DA without safety should raise an error + JACDocStore.delete(f'{DA_NAME_0}', missing_ok=False) + + assert not JACDocStore.delete( + f'{DA_NAME_0}', missing_ok=True + ), 'Deleting a non-existent DA should return False' + + +@pytest.mark.slow +@pytest.mark.internet +def test_concurrent_push_pull(): + # Push to DA that is being pulled should not mess up the pull + DA_NAME_0 = f'test{RANDOM}-concurrent-push-pull-da0' + + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN), + f'jac://{DA_NAME_0}', + show_progress=False, + ) + + global _task + + def _task(choice: str): + if choice == 'push': + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN), + f'jac://{DA_NAME_0}', + show_progress=False, + ) + elif choice == 'pull': + pull_len = sum( + 1 for _ in DocumentArray[TextDoc].pull_stream(f'jac://{DA_NAME_0}') + ) + assert pull_len == DA_LEN + else: + raise ValueError(f'Unknown choice {choice}') + + with mp.get_context('fork').Pool(3) as p: + p.map(_task, ['pull', 'push', 'pull']) diff --git a/tests/integrations/store/test_s3.py b/tests/integrations/store/test_s3.py new file mode 100644 index 00000000000..37b5d6fe69e --- /dev/null +++ b/tests/integrations/store/test_s3.py @@ -0,0 +1,265 @@ +import multiprocessing as mp +import os +import time +import uuid + +import pytest + +from docarray import DocumentArray +from docarray.documents import TextDoc +from docarray.store import S3DocStore +from tests.integrations.store import gen_text_docs, get_test_da, profile_memory + +DA_LEN: int = 2**10 +TOLERANCE_RATIO = 0.5 # Percentage of difference allowed in stream vs non-stream test +BUCKET: str = 'da-pushpull' +RANDOM: str = uuid.uuid4().hex[:8] + + +@pytest.fixture(scope="session") +def minio_container(): + file_dir = os.path.dirname(__file__) + os.system( + f"docker-compose -f {os.path.join(file_dir, 'docker-compose.yml')} up -d --remove-orphans minio" + ) + time.sleep(1) + yield + os.system( + f"docker-compose -f {os.path.join(file_dir, 'docker-compose.yml')} down --remove-orphans" + ) + + +@pytest.fixture(scope='session', autouse=True) +def testing_bucket(minio_container): + import boto3 + from botocore.client import Config + + boto3.Session.resource.__defaults__ = ( + "us-east-1", + None, + False, + None, + "http://localhost:9005", + "minioadmin", + "minioadmin", + None, + Config(signature_version="s3v4"), + ) + boto3.Session.client.__defaults__ = ( + "us-east-1", + None, + False, + None, + "http://localhost:9005", + "minioadmin", + "minioadmin", + None, + Config(signature_version="s3v4"), + ) + # make a bucket + s3 = boto3.resource('s3') + s3.create_bucket(Bucket=BUCKET) + + yield + s3.Bucket(BUCKET).objects.all().delete() + s3.Bucket(BUCKET).delete() + + +@pytest.mark.slow +def test_pushpull_correct(capsys): + namespace_dir = f'{BUCKET}/test{RANDOM}/pushpull-correct' + da1 = get_test_da(DA_LEN) + + # Verbose + da1.push(f's3://{namespace_dir}/meow', show_progress=True) + da2 = DocumentArray[TextDoc].pull(f's3://{namespace_dir}/meow', show_progress=True) + assert len(da1) == len(da2) + assert all(d1.id == d2.id for d1, d2 in zip(da1, da2)) + assert all(d1.text == d2.text for d1, d2 in zip(da1, da2)) + + captured = capsys.readouterr() + assert len(captured.out) > 0 + assert len(captured.err) == 0 + + # Quiet + da2.push(f's3://{namespace_dir}/meow') + da1 = DocumentArray[TextDoc].pull(f's3://{namespace_dir}/meow') + assert len(da1) == len(da2) + assert all(d1.id == d2.id for d1, d2 in zip(da1, da2)) + assert all(d1.text == d2.text for d1, d2 in zip(da1, da2)) + + captured = capsys.readouterr() + assert len(captured.out) == 0 + assert len(captured.err) == 0 + + +@pytest.mark.slow +def test_pushpull_stream_correct(capsys): + namespace_dir = f'{BUCKET}/test{RANDOM}/pushpull-stream-correct' + da1 = get_test_da(DA_LEN) + + # Verbosity and correctness + DocumentArray[TextDoc].push_stream( + iter(da1), f's3://{namespace_dir}/meow', show_progress=True + ) + doc_stream2 = DocumentArray[TextDoc].pull_stream( + f's3://{namespace_dir}/meow', show_progress=True + ) + + assert all(d1.id == d2.id for d1, d2 in zip(da1, doc_stream2)) + with pytest.raises(StopIteration): + next(doc_stream2) + + captured = capsys.readouterr() + assert len(captured.out) > 0 + assert len(captured.err) == 0 + + # Quiet and chained + doc_stream = DocumentArray[TextDoc].pull_stream( + f's3://{namespace_dir}/meow', show_progress=False + ) + DocumentArray[TextDoc].push_stream( + doc_stream, f's3://{namespace_dir}/meow2', show_progress=False + ) + + captured = capsys.readouterr() + assert len(captured.out) == 0 + assert len(captured.err) == 0 + + +@pytest.mark.slow +def test_pull_stream_vs_pull_full(): + namespace_dir = f'{BUCKET}/test{RANDOM}/pull-stream-vs-pull-full' + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN * 1), + f's3://{namespace_dir}/meow-short', + show_progress=False, + ) + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN * 4), + f's3://{namespace_dir}/meow-long', + show_progress=False, + ) + + @profile_memory + def get_total_stream(url: str): + return sum( + len(d.text) + for d in DocumentArray[TextDoc].pull_stream(url, show_progress=False) + ) + + @profile_memory + def get_total_full(url: str): + return sum( + len(d.text) for d in DocumentArray[TextDoc].pull(url, show_progress=False) + ) + + # A warmup is needed to get accurate memory usage comparison + _ = get_total_stream(f's3://{namespace_dir}/meow-short') + short_total_stream, (_, short_stream_peak) = get_total_stream( + f's3://{namespace_dir}/meow-short' + ) + long_total_stream, (_, long_stream_peak) = get_total_stream( + f's3://{namespace_dir}/meow-long' + ) + + _ = get_total_full(f's3://{namespace_dir}/meow-short') + short_total_full, (_, short_full_peak) = get_total_full( + f's3://{namespace_dir}/meow-short' + ) + long_total_full, (_, long_full_peak) = get_total_full( + f's3://{namespace_dir}/meow-long' + ) + + assert ( + short_total_stream == short_total_full + ), 'Streamed and non-streamed pull should have similar statistics' + assert ( + long_total_stream == long_total_full + ), 'Streamed and non-streamed pull should have similar statistics' + + assert ( + abs(long_stream_peak - short_stream_peak) / short_stream_peak < TOLERANCE_RATIO + ), 'Streamed memory usage should not be dependent on the size of the data' + assert ( + abs(long_full_peak - short_full_peak) / short_full_peak > TOLERANCE_RATIO + ), 'Full pull memory usage should be dependent on the size of the data' + + +@pytest.mark.slow +def test_list_and_delete(): + namespace_dir = f'{BUCKET}/test{RANDOM}/list-and-delete' + + da_names = S3DocStore.list(namespace_dir, show_table=False) + assert len(da_names) == 0 + + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN), f's3://{namespace_dir}/meow', show_progress=False + ) + da_names = S3DocStore.list(f'{namespace_dir}', show_table=False) + assert set(da_names) == {'meow'} + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN), f's3://{namespace_dir}/woof', show_progress=False + ) + da_names = S3DocStore.list(f'{namespace_dir}', show_table=False) + assert set(da_names) == {'meow', 'woof'} + + assert S3DocStore.delete( + f'{namespace_dir}/meow' + ), 'Deleting an existing DA should return True' + da_names = S3DocStore.list(namespace_dir, show_table=False) + assert set(da_names) == {'woof'} + + with pytest.raises( + ValueError + ): # Deleting a non-existent DA without safety should raise an error + S3DocStore.delete(f'{namespace_dir}/meow', missing_ok=False) + + assert not S3DocStore.delete( + f'{namespace_dir}/meow', missing_ok=True + ), 'Deleting a non-existent DA should return False' + + +@pytest.mark.slow +def test_concurrent_push_pull(): + # Push to DA that is being pulled should not mess up the pull + namespace_dir = f'{BUCKET}/test{RANDOM}/concurrent-push-pull' + + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN), + f's3://{namespace_dir}/da0', + show_progress=False, + ) + + global _task + + def _task(choice: str): + if choice == 'push': + DocumentArray[TextDoc].push_stream( + gen_text_docs(DA_LEN), + f's3://{namespace_dir}/da0', + show_progress=False, + ) + elif choice == 'pull': + pull_len = sum( + 1 + for _ in DocumentArray[TextDoc].pull_stream(f's3://{namespace_dir}/da0') + ) + assert pull_len == DA_LEN + else: + raise ValueError(f'Unknown choice {choice}') + + with mp.get_context('fork').Pool(3) as p: + p.map(_task, ['pull', 'push', 'pull']) + + +@pytest.mark.skip(reason='Not Applicable') +def test_concurrent_push(): + """ + Amazon S3 does not support object locking for concurrent writers. + If two PUT requests are simultaneously made to the same key, the request with the latest timestamp wins. + However, there is no way for the processes to know if they are the latest or not. + + https://docs.aws.amazon.com/AmazonS3/latest/userguide/Welcome.html#ConsistencyModel + """ + pass