Skip to content

WIP: Add functionality to virtualize GeoTIFFs using async_tiff #524

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 17 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,16 @@ fits = [
"kerchunk>=0.2.8",
"astropy",
]
tiff = [
"obstore>=0.5.1",
"async-tiff @ git+https://github.com/developmentseed/async-tiff#subdirectory=python",
]
all_readers = [
"virtualizarr[hdf]",
"virtualizarr[hdf5]",
"virtualizarr[netcdf3]",
"virtualizarr[fits]",
"virtualizarr[tif]",
]

# writers
Expand All @@ -95,7 +100,6 @@ upstream = [
# optional dependencies
'astropy @ git+https://github.com/astropy/astropy',
'fsspec @ git+https://github.com/fsspec/filesystem_spec',
's3fs @ git+https://github.com/fsspec/s3fs',
'kerchunk @ git+https://github.com/fsspec/kerchunk',
'icechunk @ git+https://github.com/earth-mover/icechunk#subdirectory=icechunk-python',
]
Expand Down Expand Up @@ -168,6 +172,9 @@ h5netcdf = ">=1.5.0,<2"
[tool.pixi.feature.icechunk-dev.dependencies]
rust = "*"

[tool.pixi.feature.rio.dependencies]
rioxarray = "*"

# Define commands to run within the test environments
[tool.pixi.feature.test.tasks]
run-mypy = { cmd = "mypy virtualizarr" }
Expand All @@ -181,12 +188,12 @@ run-tests-html-cov = { cmd = "pytest -n auto --run-network-tests --verbose --cov
[tool.pixi.environments]
min-deps = ["dev", "test", "hdf", "hdf5", "hdf5-lib"] # VirtualiZarr/conftest.py using h5py, so the minimum set of dependencies for testing still includes hdf libs
# Inherit from min-deps to get all the test commands, along with optional dependencies
test = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore"]
test-py311 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py311"] # test against python 3.11
test-py312 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py312"] # test against python 3.12
test = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "tif", "rio"]
test-py311 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "tif", "rio", "py311"] # test against python 3.11
test-py312 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "tif", "rio", "py312"] # test against python 3.12
minio = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py312", "minio"]
upstream = ["dev", "test", "hdf", "hdf5", "hdf5-lib", "netcdf3", "upstream", "icechunk-dev"]
all = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "all_readers", "all_writers"]
all = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "tif", "rio", "all_readers", "all_writers"]
docs = ["docs"]

# Define commands to run within the docs environment
Expand Down Expand Up @@ -222,7 +229,9 @@ module = [
"minio",
"numcodecs.*",
"ujson",
"zarr",
"zarr.*",
"async_tiff.*",
"obstore.*",
]
ignore_missing_imports = true

Expand Down
30 changes: 24 additions & 6 deletions virtualizarr/common.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,35 @@
import dataclasses
from collections.abc import Iterable, Mapping
from typing import (
Any,
Hashable,
MutableMapping,
Optional,
)
from typing import Any, Hashable, MutableMapping, Optional, TypedDict

import numpy as np
import xarray as xr
import xarray.indexes
from numcodecs.abc import Codec

from virtualizarr.utils import _FsspecFSFromFilepath


@dataclasses.dataclass
class ZstdProperties:
level: int


@dataclasses.dataclass
class ShuffleProperties:
elementsize: int


@dataclasses.dataclass
class ZlibProperties:
level: int


class CFCodec(TypedDict):
target_dtype: np.dtype
codec: Codec


def construct_fully_virtual_dataset(
virtual_vars: Mapping[str, xr.Variable],
coord_names: Iterable[str] | None = None,
Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/manifests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def create_v3_array_metadata(
fill_value: Any = None,
codecs: Optional[list[Dict[str, Any]]] = None,
attributes: Optional[Dict[str, Any]] = None,
dimension_names: Iterable[str] | None = None,
dimension_names: Optional[Iterable[str]] = None,
) -> ArrayV3Metadata:
"""
Create an ArrayV3Metadata instance with standard configuration.
Expand Down
28 changes: 7 additions & 21 deletions virtualizarr/readers/hdf/filters.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
from __future__ import annotations

import dataclasses
from typing import TYPE_CHECKING, List, Tuple, TypedDict, Union
from typing import TYPE_CHECKING, List, Tuple, Union

import numcodecs.registry as registry
import numpy as np
from numcodecs.abc import Codec
from numcodecs.fixedscaleoffset import FixedScaleOffset
from xarray.coding.variables import _choose_float_dtype

from virtualizarr.readers.common import (
CFCodec,
ShuffleProperties,
ZlibProperties,
ZstdProperties,
)
from virtualizarr.utils import soft_import

h5py = soft_import("h5py", "For reading hdf files", strict=False)
Expand Down Expand Up @@ -48,26 +54,6 @@ def __post_init__(self):
self.cname = blosc_compressor_codes[self.cname]


@dataclasses.dataclass
class ZstdProperties:
level: int


@dataclasses.dataclass
class ShuffleProperties:
elementsize: int


@dataclasses.dataclass
class ZlibProperties:
level: int


class CFCodec(TypedDict):
target_dtype: np.dtype
codec: Codec


def _filter_to_codec(
filter_id: str, filter_properties: Union[int, None, Tuple] = None
) -> Codec:
Expand Down
Loading
Loading