Skip to content

more extensive api x store testing #2447

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 44 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
e309415
add filenotfounderror to tupe of handled errors
d-v-b Oct 30, 2024
0045351
revive remote store test fixture, and parameterize api tests with all…
d-v-b Oct 30, 2024
d670b30
properly construct remotestore instance in test fixture
d-v-b Oct 30, 2024
f6d35fb
add filenotfounderror to tuple of handled errors
d-v-b Oct 30, 2024
311efaf
propagate mode kwarg into open_group
d-v-b Oct 30, 2024
73911b7
propagate mode kwarg into open_group
d-v-b Oct 30, 2024
9917343
xfail zipstore
d-v-b Oct 30, 2024
90df740
revert removal of type ignores
d-v-b Oct 30, 2024
3c3099a
speculative refactor of open func
d-v-b Oct 30, 2024
0219425
refactor store test fixture
d-v-b Oct 30, 2024
bd5eb13
send s3fs fixture to remotestore fixture
d-v-b Oct 31, 2024
1c6044b
remove unused method from group and corresponding test
d-v-b Oct 31, 2024
4c5f06e
explicitly depend on moto server
d-v-b Oct 31, 2024
420ca07
explicitly depend on moto server, also in test deps
d-v-b Oct 31, 2024
bac994d
ignore mypy errors properly
d-v-b Oct 31, 2024
426e1cb
fixup
d-v-b Oct 31, 2024
0013d22
annotate store variables as Store
d-v-b Oct 31, 2024
fb52a7c
add moto[server] to min_deps
d-v-b Oct 31, 2024
53b9faf
Merge branch 'main' into fix/open-v2-array-remotestore
d-v-b Nov 5, 2024
2ab4dc3
Merge branch 'main' of github.com:zarr-developers/zarr-python into fi…
d-v-b Nov 13, 2024
9ec0d01
revert store fixture name convention
d-v-b Nov 15, 2024
7bc982e
remove unused import
d-v-b Nov 15, 2024
d776ead
remove mode arg for remotestore
d-v-b Nov 15, 2024
191c012
fixup
d-v-b Nov 15, 2024
da03f87
Merge branch 'main' of github.com:zarr-developers/zarr-python into fi…
d-v-b Nov 15, 2024
9def17d
add mutability altering funcs
d-v-b Nov 15, 2024
d1c95b2
get consolidated tests to pass
d-v-b Nov 15, 2024
a04e636
fix for open_array in r+ mode
d-v-b Nov 15, 2024
e29e072
fix broken zipstore delete_dir
d-v-b Nov 15, 2024
cbf1a61
xfail zipstore in open_array test
d-v-b Nov 15, 2024
cadfef1
add missing import
d-v-b Nov 15, 2024
f085859
Merge branch 'fix/open-v2-array-remotestore' of github.com:d-v-b/zarr…
d-v-b Nov 15, 2024
c0f777d
use create instead of open_array in fill_value test
d-v-b Nov 15, 2024
94e2468
remove xfail for zipstore in test_open_with_mode_r_plus
d-v-b Nov 15, 2024
15b0211
xfail test that fails for any warning on python 3.12 or higher with r…
d-v-b Nov 15, 2024
8d3b66c
remove xfail for zip
d-v-b Nov 15, 2024
5e5376f
lint
d-v-b Nov 15, 2024
d087e9b
fix open_array for mode r+
d-v-b Nov 15, 2024
0c03d4a
Merge branch 'fix/open-r-plus' of github.com:d-v-b/zarr-python into f…
d-v-b Nov 15, 2024
9f45997
simplify group tests by removing zip and remote store cases
d-v-b Nov 15, 2024
c0d5407
simplify test_array
d-v-b Nov 15, 2024
f946499
use zarr_format fixture
d-v-b Nov 15, 2024
c5b3588
add old open_with_mode_r test
d-v-b Nov 15, 2024
c62de51
Merge branch 'main' of github.com:zarr-developers/zarr-python into fi…
d-v-b Nov 15, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ test = [
"msgpack",
"s3fs",
"pytest-asyncio",
"moto[s3]",
"moto[s3, server]",
"flask-cors",
"flask",
"requests",
Expand Down Expand Up @@ -201,7 +201,7 @@ dependencies = [
'pytest',
'pytest-cov',
'pytest-asyncio',
'moto[s3]',
'moto[s3, server]'
]

[tool.hatch.envs.upstream.env-vars]
Expand Down Expand Up @@ -235,7 +235,7 @@ dependencies = [
'pytest',
'pytest-cov',
'pytest-asyncio',
'moto[s3]',
'moto[s3, server]',
]

[tool.hatch.envs.min_deps.scripts]
Expand Down
4 changes: 1 addition & 3 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
"ones",
"ones_like",
"open",
"open_array",
"open_consolidated",
"open_group",
"open_like",
Expand Down Expand Up @@ -301,7 +300,7 @@ async def open(
store_path = await make_store_path(store, mode=mode, path=path, storage_options=storage_options)

# TODO: the mode check below seems wrong!
if "shape" not in kwargs and mode in {"a", "r", "r+"}:
if "shape" not in kwargs and mode in _READ_MODES:
try:
metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format)
# TODO: remove this cast when we fix typing for array metadata dicts
Expand Down Expand Up @@ -1093,7 +1092,6 @@ async def open_array(
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)

zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)

try:
return await AsyncArray.open(store_path, zarr_format=zarr_format)
except FileNotFoundError:
Expand Down
12 changes: 0 additions & 12 deletions src/zarr/core/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,6 @@ async def _save_metadata(self, ensure_parents: bool = False) -> None:
).items()
]
)

await asyncio.gather(*awaitables)

@property
Expand Down Expand Up @@ -1502,17 +1501,6 @@ def __setitem__(self, key: str, value: Any) -> None:
def __repr__(self) -> str:
return f"<Group {self.store_path}>"

async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group:
new_metadata = replace(self.metadata, attributes=new_attributes)

# Write new metadata
to_save = new_metadata.to_buffer_dict(default_buffer_prototype())
awaitables = [set_or_delete(self.store_path / key, value) for key, value in to_save.items()]
await asyncio.gather(*awaitables)

async_group = replace(self._async_group, metadata=new_metadata)
return replace(self, _async_group=async_group)

@property
def store_path(self) -> StorePath:
return self._async_group.store_path
Expand Down
2 changes: 1 addition & 1 deletion src/zarr/storage/zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ async def set_if_not_exists(self, key: str, value: Buffer) -> None:
async def delete_dir(self, prefix: str) -> None:
# only raise NotImplementedError if any keys are found
self._check_writable()
if not prefix.endswith("/"):
if prefix != "" and not prefix.endswith("/"):
prefix += "/"
async for _ in self.list_prefix(prefix):
raise NotImplementedError
Expand Down
174 changes: 138 additions & 36 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from __future__ import annotations

import os
import pathlib
from dataclasses import dataclass, field
from typing import TYPE_CHECKING

import numpy as np
import numpy.typing as npt
import pytest
from botocore.session import Session
from hypothesis import HealthCheck, Verbosity, settings

from zarr import AsyncGroup, config
Expand All @@ -16,30 +18,92 @@
from zarr.storage.remote import RemoteStore

if TYPE_CHECKING:
from collections.abc import Generator
from collections.abc import AsyncGenerator, Generator
from typing import Any, Literal

import botocore
from _pytest.compat import LEGACY_PATH

from zarr.core.common import ChunkCoords, MemoryOrder, ZarrFormat

s3fs = pytest.importorskip("s3fs")
requests = pytest.importorskip("requests")
moto_server = pytest.importorskip("moto.moto_server.threaded_moto_server")
moto = pytest.importorskip("moto")

# ### amended from s3fs ### #
test_bucket_name = "test"
secure_bucket_name = "test-secure"


def as_mutable(store: Store) -> Store:
"""
Return a mutable version of the store
"""
if isinstance(store, LocalStore):
return LocalStore(store.root, read_only=False)
if isinstance(store, MemoryStore):
return MemoryStore(store._store_dict, read_only=False)
if isinstance(store, RemoteStore):
return RemoteStore(fs=store.fs, path=store.path, read_only=False)
if isinstance(store, ZipStore):
store.close()
return sync(ZipStore.open(path=store.path, read_only=False))
raise ValueError(f"Unknown store type: {type(store)}")


def as_immutable(store: Store) -> Store:
"""
Return an immutable version of the store
"""
if isinstance(store, LocalStore):
return LocalStore(store.root, read_only=True)
if isinstance(store, MemoryStore):
return MemoryStore(store._store_dict, read_only=True)
if isinstance(store, RemoteStore):
return RemoteStore(fs=store.fs, path=store.path, read_only=True)
if isinstance(store, ZipStore):
store.close()
return sync(ZipStore.open(path=store.path, read_only=True))
raise ValueError(f"Unknown store type: {type(store)}")


async def parse_store(
store: Literal["local", "memory", "remote", "zip"], path: str
store: str,
path: str,
s3: s3fs.S3FileSystem, # type: ignore[name-defined]
) -> LocalStore | MemoryStore | RemoteStore | ZipStore:
if store == "local":
return await LocalStore.open(path)
if store == "memory":
return await MemoryStore.open()
if store == "remote":
return await RemoteStore.open(url=path)
if store == "zip":
return await ZipStore.open(path + "/zarr.zip", mode="w")
"""
Take a string representation of a store and convert that string representation
into the appropriate store object, which is then returned.
"""

match store:
case "local":
return LocalStore(path, read_only=False)
case "memory":
return MemoryStore(read_only=False)
case "remote":
return RemoteStore(fs=s3, path=test_bucket_name, read_only=False)
case "zip":
return await ZipStore.open(path + "/zarr.zip", read_only=False, mode="w")

raise AssertionError


@pytest.fixture(params=[str, pathlib.Path])
def path_type(request: pytest.FixtureRequest) -> Any:
"""
A pytest fixture that provides a parameterized path type.

This fixture yields different types of path representations
for testing purposes. The possible types are `str` and
`pathlib.Path`. It can be used to test functions or methods
that need to handle different path type inputs.

Returns:
The path type specified by the current parameter.
"""
return request.param


Expand All @@ -51,34 +115,20 @@ async def store_path(tmpdir: LEGACY_PATH) -> StorePath:


@pytest.fixture
async def local_store(tmpdir: LEGACY_PATH) -> LocalStore:
return await LocalStore.open(str(tmpdir))


@pytest.fixture
async def remote_store(url: str) -> RemoteStore:
return await RemoteStore.open(url)


@pytest.fixture
async def memory_store() -> MemoryStore:
return await MemoryStore.open()


@pytest.fixture
async def zip_store(tmpdir: LEGACY_PATH) -> ZipStore:
return await ZipStore.open(str(tmpdir / "zarr.zip"), mode="w")


@pytest.fixture
async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store:
async def store(
request: pytest.FixtureRequest,
tmpdir: LEGACY_PATH,
s3: s3fs.S3FileSystem, # type: ignore[name-defined]
) -> AsyncGenerator[Store, None]:
param = request.param
return await parse_store(param, str(tmpdir))
store_instance = await parse_store(param, str(tmpdir), s3)
yield store_instance
store_instance.close()


@pytest.fixture(params=["local", "memory", "zip"])
def sync_store(request: pytest.FixtureRequest, tmp_path: LEGACY_PATH) -> Store:
result = sync(parse_store(request.param, str(tmp_path)))
def sync_store(request: pytest.FixtureRequest, tmp_path: LEGACY_PATH, s3_base: str) -> Store:
result = sync(parse_store(request.param, str(tmp_path), s3_base))
if not isinstance(result, Store):
raise TypeError("Wrong store class returned by test fixture! got " + result + " instead")
return result
Expand All @@ -92,10 +142,12 @@ class AsyncGroupRequest:


@pytest.fixture
async def async_group(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> AsyncGroup:
async def async_group(
request: pytest.FixtureRequest, tmpdir: LEGACY_PATH, s3_base: str
) -> AsyncGroup:
param: AsyncGroupRequest = request.param

store = await parse_store(param.store, str(tmpdir))
store = await parse_store(param.store, str(tmpdir), s3_base)
return await AsyncGroup.from_store(
store,
attributes=param.attributes,
Expand Down Expand Up @@ -148,6 +200,56 @@ def zarr_format(request: pytest.FixtureRequest) -> ZarrFormat:
raise ValueError(msg)


@pytest.fixture(scope="module")
def s3_base() -> Generator[str, None, None]:
# writable local S3 system
from moto.server import ThreadedMotoServer

if "AWS_SECRET_ACCESS_KEY" not in os.environ:
os.environ["AWS_SECRET_ACCESS_KEY"] = "foo"
if "AWS_ACCESS_KEY_ID" not in os.environ:
os.environ["AWS_ACCESS_KEY_ID"] = "foo"
server = ThreadedMotoServer(ip_address="127.0.0.1", port=0)
server.start()
host, port = server._server.server_address
endpoint_url = f"http://{host}:{port}"

yield endpoint_url
server.stop()


def get_boto3_client(endpoint_url: str) -> botocore.client.BaseClient:
# NB: we use the sync botocore client for setup
session = Session()
return session.create_client("s3", endpoint_url=endpoint_url, region_name="us-east-1")


@pytest.fixture(autouse=True)
def s3(s3_base: str) -> Generator[s3fs.S3FileSystem, None, None]: # type: ignore[name-defined]
"""
Quoting Martin Durant:
pytest-asyncio creates a new event loop for each async test.
When an async-mode s3fs instance is made from async, it will be assigned to the loop from
which it is made. That means that if you use s3fs again from a subsequent test,
you will have the same identical instance, but be running on a different loop - which fails.

For the rest: it's very convenient to clean up the state of the store between tests,
make sure we start off blank each time.

https://github.com/zarr-developers/zarr-python/pull/1785#discussion_r1634856207
"""
client = get_boto3_client(s3_base)
client.create_bucket(Bucket=test_bucket_name, ACL="public-read")
s3fs.S3FileSystem.clear_instance_cache()
s3 = s3fs.S3FileSystem(anon=False, client_kwargs={"endpoint_url": s3_base}, asynchronous=True)
session = sync(s3.set_session())
s3.invalidate_cache()
yield s3
requests.post(f"{s3_base}/moto-api/reset")
client.close()
sync(session.close())


settings.register_profile(
"ci",
max_examples=1000,
Expand Down
Loading
Loading