Skip to content

Commit c5408da

Browse files
b8raoultfloriankrbmchantryMeraXaaron-hopkinson
authored
feat: refactor dataset create (#379)
## Description This is a rewrite of the code that parses the recipe and invoke the sources and filters in the right order. These changes are merely a simplification of the old version and are 100% backward compatible. The input family of actions has been refactored such that all the standalone action files, e.g. concat are consolidated into a single action.py. Additional related additions. - Add facility to have a source as a parameter of an other source if needed. - Changes are due to code moving to their own file (i.e repeated_dates). - A new CLI tool anemoi-datasets recipe was added to check the change. It can be use to format the YAML and validate it (against a simple JSON schema), and migrate older YAML files to new syntax. The later is experimental, and as been tested with very old recipe files. It can be a good starting point for future migrations, if ever needed. ## What problem does this change solve? This change is in preparation of adding support for observation in anemoi-dataset[create]. ## What issue or task does this change relate to? <!-- link to Issue Number --> ## Additional notes ## <!-- Include any additional information, caveats, or considerations that the reviewer should be aware of. --> ***As a contributor to the Anemoi framework, please ensure that your changes include unit tests, updates to any affected dependencies and documentation, and have been tested in a parallel setting (i.e., with multiple GPUs). As a reviewer, you are also responsible for verifying these aspects and requesting changes if they are not adequately addressed. For guidelines about those please refer to https://anemoi.readthedocs.io/en/latest/*** By opening this pull request, I affirm that all authors agree to the [Contributor License Agreement.](https://github.com/ecmwf/codex/blob/main/Legal/contributor_license_agreement.md) --------- Co-authored-by: Florian Pinault <floriankrb@users.noreply.github.com> Co-authored-by: Matthew Chantry <matthew.chantry@ecmwf.int> Co-authored-by: Marek Jacob <MeraX@users.noreply.github.com> Co-authored-by: Aaron Hopkinson <197336788+aaron-hopkinson@users.noreply.github.com>
1 parent 2b7d134 commit c5408da

32 files changed

+1787
-1574
lines changed

.gitignore

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,3 +141,13 @@ _version.py
141141
*.to_upload
142142
tempCodeRunnerFile.python
143143
Untitled-*.py
144+
*.zip
145+
*.json
146+
*.db
147+
*.tgz
148+
_api/
149+
trace.txt
150+
?/
151+
*.prof
152+
prof/
153+
*.gz

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,12 @@ dependencies = [
5151
"anemoi-transform>=0.1.10",
5252
"anemoi-utils[provenance]>=0.4.32",
5353
"cfunits",
54+
"glom",
55+
"jsonschema",
5456
"numcodecs<0.16", # Until we move to zarr3
5557
"numpy",
5658
"pyyaml",
59+
"ruamel-yaml",
5760
"semantic-version",
5861
"tqdm",
5962
"zarr<=2.18.4",
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# (C) Copyright 2024 Anemoi contributors.
2+
#
3+
# This software is licensed under the terms of the Apache Licence Version 2.0
4+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5+
#
6+
# In applying this licence, ECMWF does not waive the privileges and immunities
7+
# granted to it by virtue of its status as an intergovernmental organisation
8+
# nor does it submit to any jurisdiction.
9+
10+
11+
import argparse
12+
import logging
13+
import sys
14+
from typing import Any
15+
16+
import yaml
17+
18+
from anemoi.datasets.create import validate_config
19+
20+
from .. import Command
21+
from .format import format_recipe
22+
from .migrate import migrate_recipe
23+
24+
LOG = logging.getLogger(__name__)
25+
26+
27+
class Recipe(Command):
28+
def add_arguments(self, command_parser: Any) -> None:
29+
"""Add arguments to the command parser.
30+
31+
Parameters
32+
----------
33+
command_parser : Any
34+
Command parser object.
35+
"""
36+
37+
command_parser.add_argument("--validate", action="store_true", help="Validate recipe.")
38+
command_parser.add_argument("--format", action="store_true", help="Format the recipe.")
39+
command_parser.add_argument("--migrate", action="store_true", help="Migrate the recipe to the latest version.")
40+
41+
group = command_parser.add_mutually_exclusive_group()
42+
group.add_argument("--inplace", action="store_true", help="Overwrite the recipe file in place.")
43+
group.add_argument("--output", type=str, help="Output file path for the converted recipe.")
44+
45+
command_parser.add_argument(
46+
"path",
47+
help="Path to recipe.",
48+
)
49+
50+
def run(self, args: Any) -> None:
51+
52+
if not args.validate and not args.format and not args.migrate:
53+
args.validate = True
54+
55+
with open(args.path) as file:
56+
config = yaml.safe_load(file)
57+
58+
assert isinstance(config, dict)
59+
60+
if args.validate:
61+
if args.inplace and (not args.format and not args.migrate):
62+
argparse.ArgumentError(None, "--inplace is not supported with --validate.")
63+
64+
if args.output and (not args.format and not args.migrate):
65+
argparse.ArgumentError(None, "--output is not supported with --validate.")
66+
67+
validate_config(config)
68+
LOG.info(f"{args.path}: Recipe is valid.")
69+
return
70+
71+
if args.migrate:
72+
config = migrate_recipe(args, config)
73+
if config is None:
74+
LOG.info(f"{args.path}: No changes needed.")
75+
return
76+
77+
args.format = True
78+
79+
if args.format:
80+
formatted = format_recipe(args, config)
81+
assert "dates" in formatted
82+
f = sys.stdout
83+
if args.output:
84+
f = open(args.output, "w")
85+
86+
if args.inplace:
87+
f = open(args.path, "w")
88+
89+
print(formatted, file=f)
90+
f.close()
91+
92+
93+
command = Recipe
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# (C) Copyright 2025 Anemoi contributors.
2+
#
3+
# This software is licensed under the terms of the Apache Licence Version 2.0
4+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5+
#
6+
# In applying this licence, ECMWF does not waive the privileges and immunities
7+
# granted to it by virtue of its status as an intergovernmental organisation
8+
# nor does it submit to any jurisdiction.
9+
10+
11+
import datetime
12+
import logging
13+
14+
from ...dumper import yaml_dump
15+
16+
LOG = logging.getLogger(__name__)
17+
18+
19+
def make_dates(config):
20+
if isinstance(config, dict):
21+
return {k: make_dates(v) for k, v in config.items()}
22+
if isinstance(config, list):
23+
return [make_dates(v) for v in config]
24+
if isinstance(config, str):
25+
try:
26+
return datetime.datetime.fromisoformat(config)
27+
except ValueError:
28+
return config
29+
return config
30+
31+
32+
ORDER = (
33+
"name",
34+
"description",
35+
"dataset_status",
36+
"licence",
37+
"attribution",
38+
"env",
39+
"dates",
40+
"common",
41+
"data_sources",
42+
"input",
43+
"output",
44+
"statistics",
45+
"build",
46+
"platform",
47+
)
48+
49+
50+
def format_recipe(args, config: dict) -> str:
51+
52+
config = make_dates(config)
53+
assert config
54+
55+
return yaml_dump(config, order=ORDER)

0 commit comments

Comments
 (0)