Skip to content

Commit fb46425

Browse files
committed
1. Updating parser spec.
1 parent 659fd47 commit fb46425

File tree

5 files changed

+122
-63
lines changed

5 files changed

+122
-63
lines changed

jobs/set_parser_config/__main__.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
_GENERATORS = {
1919
constants.IDENTIFIER_TYPE_DATASET: set_dataset_id,
2020
constants.IDENTIFIER_TYPE_DIRECTORY: set_directory_id,
21-
constants.IDENTIFIER_TYPE_FILENAME: set_filename_id
21+
constants.IDENTIFIER_TYPE_FILENAME: set_filename_id,
2222
}
2323

2424

@@ -27,8 +27,8 @@ def _main():
2727
2828
"""
2929
for a in pyessv.get_cached():
30-
for s in [i for i in a if i.data]:
31-
for identifier_type, template in s.data.items():
30+
for scope in [i for i in a if i.data]:
31+
for identifier_type, template in scope.data.items():
3232
try:
3333
identifier_type = _IDENTIFIER_TYPES[identifier_type]
3434
except KeyError:
@@ -38,10 +38,10 @@ def _main():
3838
generator = _GENERATORS[identifier_type]
3939
except KeyError:
4040
continue
41-
else:
42-
cfg = generator.get_config(a, s, template)
43-
if cfg is not None:
44-
io_manager.write_scope_parser_config(s, identifier_type, cfg)
41+
42+
cfg = generator.get_config(scope, template)
43+
if cfg is not None:
44+
io_manager.write_scope_parser_config(scope, identifier_type, cfg)
4545

4646

4747
_main()
Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,56 @@
1-
import typing
1+
import re
22

3-
from pyessv import Authority
43
from pyessv import Scope
54

65

7-
def get_config(a: Authority, s: Scope, template_raw: str):
6+
def get_config(s: Scope, template_raw: str) -> dict:
87
"""Returns dataset identifier parser configuration information derived
98
from a previously declared parsing template.
109
11-
:param a: A vocabulary authority.
1210
:param s: A vocabulary scope.
1311
:param template: A raw dataset id parsing template.
1412
:returns: Dataset id parser configuration information.
1513
1614
"""
17-
# Set specs, i.e. set of parser specification embedded in the template.
18-
specs: typing.List[str] = template_raw.split(".")[1:]
19-
specs = [i[2:-2] for i in specs]
20-
specs = [i.replace("_", "-") for i in specs]
15+
parts = [i.replace("_", "-") for i in re.findall("%\((\w*)\)s", template_raw)]
16+
if s.namespace == "wcrp:input4mips":
17+
parts = parts[1:]
18+
19+
return {
20+
"seperator": ".",
21+
"template": template_raw,
22+
"specs": [_get_prefix_spec(s)] + [_get_part_spec(s, i) for i in parts],
23+
"suffix": "#"
24+
}
2125

22-
# Set spec overrides.
23-
if s.namespace == "wcrp:cmip6":
24-
specs[0] = "activity-id"
2526

26-
# Set template prefix.
27+
def _get_prefix_spec(s: Scope) -> dict:
28+
"""Maps a scope to a constant specifiction acting as identifier prefix.
29+
30+
"""
2731
if s.namespace in ("ecmwf:cc4e", "wcrp:cmip6"):
2832
prefix = s.canonical_name.upper()
33+
elif s.namespace in ("wcrp:input4mips"):
34+
prefix = s["activity-id"]["input4mips"].raw_name
2935
else:
3036
prefix = s.canonical_name
3137

3238
return {
33-
"template": template_raw,
34-
"seperator": ".",
35-
"specs": [f"const:{prefix}"] + [f"{s}:{i}" for i in specs],
36-
"suffix": "#"
39+
"type": "const",
40+
"value": prefix,
41+
"is_required": True
42+
}
43+
44+
45+
def _get_part_spec(s: Scope, part: str) -> dict:
46+
"""Maps a template part to a collection specifiction.
47+
48+
"""
49+
if s.namespace == "wcrp:cmip6" and part == "activity-drs":
50+
part = "activity-id"
51+
52+
return {
53+
"type": "collection",
54+
"namespace": f"{s.namespace}:{part}",
55+
"is_required": True
3756
}
Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
import typing
1+
import re
22

3-
from pyessv import Authority
43
from pyessv import Scope
54

65

@@ -15,11 +14,10 @@
1514
}
1615

1716

18-
def get_config(a: Authority, s: Scope, template_raw: str):
17+
def get_config(s: Scope, template_raw: str) -> dict:
1918
"""Returns directory identifier parser configuration information derived
2019
from a previously declared parsing template.
2120
22-
:param a: A vocabulary authority.
2321
:param s: A vocabulary scope.
2422
:param template_raw: A raw dataset id parsing template.
2523
:returns: Directory name parser configuration information.
@@ -29,23 +27,41 @@ def get_config(a: Authority, s: Scope, template_raw: str):
2927
if s.namespace == "wcrp:e3sm":
3028
return
3129

32-
# Set specs.
33-
specs: typing.List[str] = template_raw.split("%(")[2:]
34-
specs = [i.split(")")[0] for i in specs]
35-
specs = [i.replace("_", "-") for i in specs]
30+
parts = [i.replace("_", "-") for i in re.findall("%\((\w*)\)s", template_raw)[2:]]
3631

37-
# Set spec overrides.
38-
if s.namespace == "wcrp:cmip6":
39-
specs[1] = "activity-id"
32+
return {
33+
"seperator": "/",
34+
"template": template_raw,
35+
"specs": [_get_prefix_spec(s)] + [_get_part_spec(s, i) for i in parts],
36+
"suffix": "#"
37+
}
4038

41-
# Set directory prefix.
39+
40+
def _get_prefix_spec(s: Scope) -> dict:
41+
"""Maps a scope to a constant specifiction acting as identifier prefix.
42+
43+
"""
4244
try:
4345
prefix: str = _PROJECT_PREFIX[s.namespace]
4446
except KeyError:
4547
prefix = s.canonical_name.upper()
4648

4749
return {
48-
"template": template_raw,
49-
"seperator": "/",
50-
"specs": [f"const:{prefix}"] + [f"{s}:{i}" for i in specs[1:]]
50+
"type": "const",
51+
"value": prefix,
52+
"is_required": True
53+
}
54+
55+
56+
def _get_part_spec(s: Scope, part: str) -> dict:
57+
"""Maps a template part to a collection specifiction.
58+
59+
"""
60+
if s.namespace == "wcrp:cmip6" and part == "activity-drs":
61+
part = "activity-id"
62+
63+
return {
64+
"type": "collection",
65+
"namespace": f"{s.namespace}:{part}",
66+
"is_required": True
5167
}
Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,66 @@
1-
from pyessv import Authority
1+
import re
2+
23
from pyessv import Scope
34

45

5-
def get_config(a: Authority, s: Scope, template_raw: str):
6+
def get_config(s: Scope, template_raw: str) -> dict:
67
"""Returns directory identifier parser configuration information derived
78
from a previously declared parsing template.
89
9-
:param a: A vocabulary authority.
1010
:param s: A vocabulary scope.
1111
:param template_raw: A raw dataset id parsing template.
1212
:returns: File name parser configuration information.
1313
1414
"""
15-
# Skip ill-defined.
1615
if s.namespace == "wcrp:e3sm":
1716
return
1817

19-
# Discard period start/end + file type.
20-
template = template_raw.split("[")[0]
18+
parts = [i.replace("_", "-") for i in re.findall("%\((\w*)\)s", template_raw)]
19+
if parts[-2] == "period-start" and parts[-1] == "period-end":
20+
parts = parts[:-2] + ["time_range"]
21+
22+
return {
23+
"seperator": "_",
24+
"template": template_raw,
25+
"specs": [_get_part_spec(s, i) for i in parts] + [_get_suffix_spec()]
26+
}
27+
28+
29+
def _get_part_spec(s: Scope, part: str) -> dict:
30+
"""Maps a template part to a collection specifiction.
31+
32+
"""
33+
if s.namespace == "wcrp:cmip6" and part == "activity-drs":
34+
part = "activity-id"
2135

22-
# Set specs.
23-
specs = [i.split(")")[0] for i in template.split("%(")[1:]]
24-
specs = [i.replace("_", "-") for i in specs]
25-
specs = [f"{s}:{i}" for i in specs]
36+
if s.namespace == "ecmwf:cc4e" and part == "project":
37+
return {
38+
"type": "const",
39+
"value": "cc4e",
40+
"is_required": True
41+
}
2642

27-
# Set spec overrides.
28-
if s.namespace == "ecmwf:cc4e":
29-
specs[0] = "const:cc4e"
43+
elif part == "time_range":
44+
return {
45+
"type": "regex",
46+
"expression": "^[0-9]{4}-[0-9]{4}$",
47+
"is_required": False
48+
}
3049

31-
# Append period start - end regex.
32-
if s.namespace != "wcrp:input4mips":
33-
specs.append("regex:^[0-9]{4}-[0-9]{4}$")
50+
else:
51+
return {
52+
"type": "collection",
53+
"namespace": f"{s.namespace}:{part}",
54+
"is_required": True
55+
}
3456

35-
# Append file type.
36-
specs.append("regex:^nc$")
3757

58+
def _get_suffix_spec() -> dict:
59+
"""Maps a scope to a constant specifiction acting as identifier prefix.
60+
61+
"""
3862
return {
39-
"template": template_raw,
40-
"seperator": "_",
41-
"specs": specs
63+
"type": "regex",
64+
"expression": "^nc$",
65+
"is_required": True
4266
}

pyessv/parsing/identifiers/parser_spec.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,19 +61,19 @@ class RegExParsingSpecification(ParsingSpecification):
6161
"""Encapsulates specification information related to a regex element parser.
6262
6363
"""
64-
def __init__(self, regex, is_required):
64+
def __init__(self, expression, is_required):
6565
"""Instance initializer.
6666
67-
:param regex: A regular expression against which an identifier element will be validated.
67+
:param expression: A regular expression against which an identifier element will be validated.
6868
:param is_required: Flag indicating whether the identifier element must exist.
6969
7070
"""
7171
super(RegExParsingSpecification, self).__init__("regex", is_required)
72-
self.regex = regex
72+
self.expression = expression
7373

7474

7575
def __repr__(self):
7676
"""Instance representation.
7777
7878
"""
79-
return f"parser-spec|regex::{self.regex}::{self.is_required}"
79+
return f"parser-spec|regex::{self.expression}::{self.is_required}"

0 commit comments

Comments
 (0)