Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement era5 land #21

Merged
merged 18 commits into from
Aug 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,388 changes: 1,388 additions & 0 deletions demo/era5-land_dataset_demo.ipynb

Large diffs are not rendered by default.

96 changes: 60 additions & 36 deletions demo/era5_dataset_demo.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions demo/eth_dataset_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"outputs": [],
"source": [
"import numpy as np\n",
"from zampy.datasets import EthCanopyHeight\n",
"from zampy.datasets.catalog import EthCanopyHeight\n",
"from zampy.datasets.dataset_protocol import TimeBounds, SpatialBounds\n",
"from pathlib import Path\n",
"\n",
Expand Down Expand Up @@ -2660,7 +2660,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
"version": "3.10.0"
},
"orig_nbformat": 4
},
Expand Down
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ classifiers = [
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
]
dependencies = [
"requests",
Expand Down Expand Up @@ -135,10 +136,11 @@ testpaths = ["tests"]
[tool.mypy]
ignore_missing_imports = true
disallow_untyped_defs = true
python_version = "3.9"

[tool.black]
line-length = 88
target-version = ['py38', 'py39', 'py310']
target-version = ['py39', 'py310', 'py311']
include = '\.pyi?$'

[tool.ruff]
Expand Down Expand Up @@ -169,7 +171,7 @@ line-length = 88
exclude = ["docs", "build"]
# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
target-version = "py38"
target-version = "py39"

[tool.ruff.per-file-ignores]
"tests/**" = ["D"]
Expand Down
8 changes: 8 additions & 0 deletions src/zampy/conventions/ALMA.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,13 @@
"total_precipitation": {
"variable": "Rainf",
"units": "millimeter/second"
},
"air_temperature": {
"variable": "Tair",
"units": "kelvin"
},
"dewpoint_temperature": {
"variable": "d2m",
"units": "kelvin"
}
}
13 changes: 2 additions & 11 deletions src/zampy/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,7 @@
"""Datasets implementations."""
from zampy.datasets import dataset_protocol
from zampy.datasets import validation
from zampy.datasets.era5 import ERA5
from zampy.datasets.eth_canopy_height import EthCanopyHeight
from zampy.datasets.catalog import DATASETS


__all__ = ["dataset_protocol", "validation", "EthCanopyHeight", "ERA5"]


# This object tracks which datasets are available.
DATASETS: dict[str, type[dataset_protocol.Dataset]] = {
# All lowercase key.
"era5": ERA5,
"eth_canopy_height": EthCanopyHeight,
}
__all__ = ["dataset_protocol", "validation", "DATASETS"]
14 changes: 14 additions & 0 deletions src/zampy/datasets/catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Catalog of datasets."""
from zampy.datasets import dataset_protocol
from zampy.datasets.era5 import ERA5
from zampy.datasets.era5 import ERA5Land
from zampy.datasets.eth_canopy_height import EthCanopyHeight


# This object tracks which datasets are available.
DATASETS: dict[str, type[dataset_protocol.Dataset]] = {
# All lowercase key.
"era5": ERA5,
"era5_land": ERA5Land,
"eth_canopy_height": EthCanopyHeight,
}
21 changes: 7 additions & 14 deletions src/zampy/datasets/dataset_protocol.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
"""Outline of the dataset protocol."""
import json
import shutil
from abc import abstractmethod
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from typing import List
from typing import Optional
from typing import Protocol
from typing import Tuple
import numpy as np
import xarray as xr

Expand Down Expand Up @@ -79,21 +76,20 @@ class Dataset(Protocol):
crs: str
license: str
bib: str
raw_variables: Tuple[Variable, ...]
variable_names: Tuple[str, ...]
variables: Tuple[Variable, ...]
raw_variables: list[Variable]
variable_names: list[str]
variables: list[Variable]

def __init__(self) -> None:
"""Init."""
...

@abstractmethod
def download(
self,
download_dir: Path,
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
variable_names: List[str],
variable_names: list[str],
overwrite: bool = False,
) -> bool:
"""Download the data.
Expand All @@ -111,7 +107,6 @@ def download(
"""
...

@abstractmethod
def ingest(
self,
download_dir: Path,
Expand All @@ -130,15 +125,14 @@ def ingest(
"""
...

@abstractmethod
def load(
self,
ingest_dir: Path,
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
resolution: float,
regrid_method: str,
variable_names: List[str],
variable_names: list[str],
) -> xr.Dataset:
"""Get the dataset as an xarray Dataset.

Expand All @@ -160,7 +154,6 @@ def load(
"""
...

@abstractmethod
def convert(
self,
ingest_dir: Path,
Expand All @@ -182,7 +175,7 @@ def write_properties_file(
dataset_folder: Path,
spatial_bounds: SpatialBounds,
time_bounds: TimeBounds,
variable_names: List[str],
variable_names: list[str],
) -> None:
"""Write the (serialized) spatial and time bounds to a json file.

Expand Down Expand Up @@ -211,7 +204,7 @@ def write_properties_file(

def read_properties_file(
dataset_folder: Path,
) -> Tuple[SpatialBounds, TimeBounds, List[str]]:
) -> tuple[SpatialBounds, TimeBounds, list[str]]:
"""Load the serialized spatial and time bounds from the json file.

Args:
Expand Down
150 changes: 150 additions & 0 deletions src/zampy/datasets/ecmwf_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""Base module for datasets available on CDS."""

from pathlib import Path
from typing import Union
import xarray as xr
from zampy.datasets import converter
from zampy.datasets import utils
from zampy.datasets import validation
from zampy.datasets.dataset_protocol import SpatialBounds
from zampy.datasets.dataset_protocol import TimeBounds
from zampy.datasets.dataset_protocol import Variable
from zampy.datasets.dataset_protocol import copy_properties_file
from zampy.datasets.dataset_protocol import write_properties_file
from zampy.utils import regrid


## Ignore missing class/method docstrings: they are implemented in the Dataset class.
# ruff: noqa: D102


class ECMWFDataset: # noqa: D101
name: str
time_bounds: TimeBounds
spatial_bounds = SpatialBounds(90, 180, -90, -180)
crs = "EPSG:4326"

raw_variables: list[Variable]
cds_var_names: dict[str, str]
variable_names: list[str]
variables: list[Variable]
license = "cc-by-4.0"
bib = """
@article{hersbach2020era5,
title={The ERA5 global reanalysis},
author={Hersbach, Hans et al.},
journal={Quarterly Journal of the Royal Meteorological Society},
volume={146},
number={730},
pages={1999--2049},
year={2020},
publisher={Wiley Online Library}
}
"""
cds_dataset: str

def __init__(self) -> None:
"""Init."""
pass

def download(
self,
download_dir: Path,
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
variable_names: list[str],
overwrite: bool = False,
) -> bool:
validation.validate_download_request(
self,
download_dir,
time_bounds,
spatial_bounds,
variable_names,
)

download_folder = download_dir / self.name
download_folder.mkdir(parents=True, exist_ok=True)

utils.cds_request(
dataset=self.cds_dataset,
variables=variable_names,
time_bounds=time_bounds,
spatial_bounds=spatial_bounds,
path=download_folder,
cds_var_names=self.cds_var_names,
overwrite=overwrite,
)

write_properties_file(
download_folder, spatial_bounds, time_bounds, variable_names
)

return True

def ingest(
self,
download_dir: Path,
ingest_dir: Path,
overwrite: bool = False,
) -> bool:
download_folder = download_dir / self.name
ingest_folder = ingest_dir / self.name
ingest_folder.mkdir(parents=True, exist_ok=True)

data_file_pattern = f"{self.name}_*.nc"
data_files = list(download_folder.glob(data_file_pattern))

for file in data_files:
utils.convert_to_zampy(
ingest_folder,
file=file,
overwrite=overwrite,
)

copy_properties_file(download_folder, ingest_folder)

return True

def load(
self,
ingest_dir: Path,
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
resolution: float,
regrid_method: str,
variable_names: list[str],
) -> xr.Dataset:
files: list[Path] = []
for var in self.variable_names:
if var in variable_names:
files += (ingest_dir / self.name).glob(f"{self.name}_{var}*.nc")

ds = xr.open_mfdataset(files, chunks={"latitude": 200, "longitude": 200})
ds = ds.sel(time=slice(time_bounds.start, time_bounds.end))
ds = regrid.regrid_data(ds, spatial_bounds, resolution, regrid_method)

return ds

def convert(
self,
ingest_dir: Path,
convention: Union[str, Path],
) -> bool:
converter.check_convention(convention)
ingest_folder = ingest_dir / self.name

data_file_pattern = f"{self.name}_*.nc"

data_files = list(ingest_folder.glob(data_file_pattern))

for file in data_files:
# start conversion process
print(f"Start processing file `{file.name}`.")
ds = xr.open_dataset(file, chunks={"x": 50, "y": 50})
ds = converter.convert(ds, dataset=self, convention=convention)
# TODO: support derived variables
# TODO: other calculations
# call ds.compute()

return True
Loading