Skip to content

Commit 5367cb7

Browse files
committed
* builder
* add tests assets * minor changes : tests, set_identifier
1 parent 7e51d3e commit 5367cb7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+2319
-2593
lines changed

jobs/set_parser_config/set_dataset_id.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def get_config(s: Scope, template_raw: str) -> dict:
1212
:returns: Dataset id parser configuration information.
1313
1414
"""
15-
parts = [i.replace("_", "-") for i in re.findall("%\((\w*)\)s", template_raw)]
15+
parts = [i for i in re.findall("%\((\w*)\)s", template_raw)]
1616
if s.namespace == "wcrp:input4mips":
1717
parts = parts[1:]
1818

@@ -44,11 +44,22 @@ def _get_prefix_spec(s: Scope) -> dict:
4444

4545
def _get_part_spec(s: Scope, part: str) -> dict:
4646
"""Maps a template part to a collection specifiction.
47-
47+
"part" is the facet name found in DRS template but this facet name could be in alternative name in pyessv collection
48+
therefore we have to check every alternative collection name of this "Scope" to find this "part"
49+
and write config file according to pyessv collection name.
4850
"""
51+
''' # useless if check alternatives names with a complete archive ..
4952
if s.namespace == "wcrp:cmip6" and part == "activity-drs":
5053
part = "activity-id"
51-
54+
'''
55+
for c in s :
56+
if part in c.all_names:
57+
return {
58+
"type": "collection",
59+
"namespace": f"{c}",
60+
"is_required": True
61+
}
62+
print(f"Pyessv doesn't know this collection : {part} for this scope : {s}")
5263
return {
5364
"type": "collection",
5465
"namespace": f"{s.namespace}:{part}",

jobs/set_parser_config/set_directory_id.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55

66
# Map: project namespace -> project drs prefix.
77
_PROJECT_PREFIX = {
8-
"ecmwf:c3s-cmip5": "c3s-cmip5",
9-
"ecmwf:c3s-cordex": "c3s-cordex",
10-
"wcrp:cordex-adjust": "CORDEX-adjust",
11-
"wcrp:geomip": "GeoMIP",
12-
"wcrp:input4mips": "input4MIPs",
13-
"wcrp:obs4mips": "obs4MIPs",
8+
"c3s-cmip5": "c3s-cmip5",
9+
"c3s-cordex": "c3s-cordex",
10+
"cordex-adjust": "CORDEX-Adjust",
11+
"geomip": "GeoMIP",
12+
"input4mips": "input4MIPs",
13+
"obs4mips": "obs4MIPs",
1414
}
1515

1616

@@ -27,7 +27,7 @@ def get_config(s: Scope, template_raw: str) -> dict:
2727
if s.namespace == "wcrp:e3sm":
2828
return
2929

30-
parts = [i.replace("_", "-") for i in re.findall("%\((\w*)\)s", template_raw)[2:]]
30+
parts = [i for i in re.findall("%\((\w*)\)s", template_raw)[2:]]
3131

3232
return {
3333
"seperator": "/",
@@ -42,7 +42,7 @@ def _get_prefix_spec(s: Scope) -> dict:
4242
4343
"""
4444
try:
45-
prefix: str = _PROJECT_PREFIX[s.namespace]
45+
prefix: str = _PROJECT_PREFIX[s.namespace.split(":")[-1]]
4646
except KeyError:
4747
prefix = s.canonical_name.upper()
4848

@@ -57,9 +57,18 @@ def _get_part_spec(s: Scope, part: str) -> dict:
5757
"""Maps a template part to a collection specifiction.
5858
5959
"""
60+
'''
6061
if s.namespace == "wcrp:cmip6" and part == "activity-drs":
6162
part = "activity-id"
62-
63+
'''
64+
for c in s :
65+
if part in c.all_names:
66+
return {
67+
"type": "collection",
68+
"namespace": f"{c}",
69+
"is_required": True
70+
}
71+
print(f"Pyessv doesn't know this collection : {part} for this scope : {s}")
6372
return {
6473
"type": "collection",
6574
"namespace": f"{s.namespace}:{part}",

jobs/set_parser_config/set_filename_id.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ def get_config(s: Scope, template_raw: str) -> dict:
1515
if s.namespace == "wcrp:e3sm":
1616
return
1717

18-
parts = [i.replace("_", "-") for i in re.findall("%\((\w*)\)s", template_raw)]
19-
if parts[-2] == "period-start" and parts[-1] == "period-end":
18+
parts = [i for i in re.findall("%\((\w*)\)s", template_raw)]
19+
if parts[-2] == "period_start" and parts[-1] == "period_end":
2020
parts = parts[:-2] + ["time_range"]
2121

2222
return {
@@ -48,11 +48,19 @@ def _get_part_spec(s: Scope, part: str) -> dict:
4848
}
4949

5050
else:
51-
return {
52-
"type": "collection",
53-
"namespace": f"{s.namespace}:{part}",
54-
"is_required": True
55-
}
51+
for c in s:
52+
if part in c.all_names:
53+
return {
54+
"type": "collection",
55+
"namespace": f"{c}",
56+
"is_required": True
57+
}
58+
print(f"Pyessv doesn't know this collection : {part} for this scope : {s}")
59+
return {
60+
"type": "collection",
61+
"namespace": f"{s.namespace}:{part}",
62+
"is_required": True
63+
}
5664

5765

5866
def _get_suffix_spec() -> dict:

pyessv/loader.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ def _is_matched(node, identifier):
105105
elif identifier in [format_string(i).lower() for i in node.alternative_names]:
106106
return True
107107

108+
# Tru fixing spec matching in collection alternative name for config_parser
109+
elif identifier in [format_string(i).lower().replace("_","-") for i in node.alternative_names]:
110+
return True
111+
108112
return False
109113

110114

pyessv/parsing/identifiers/builder.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def build_identifier(scope, identifier_type, terms, regex_terms={}):
1515
:param terms: Set of known term.
1616
:param regex_terms: Dictionary of terms matching the regex term in spec : {term:value,...}
1717
18-
:return: str of identifer according to template and input terms
18+
:return: str of identifier according to template and input terms
1919
:rtype: str | ValueError
2020
2121
Note : currently, if a term is optional in the template (i.e in bracket []) and if there is no input term
@@ -40,16 +40,23 @@ def build_identifier(scope, identifier_type, terms, regex_terms={}):
4040
# Template split from configuration
4141
template_part = re.findall("%\((\w*)\)s", cfg.template)
4242
("root" in template_part) and template_part.remove("root") # remove root from template_part if exist
43+
44+
# template_part does not contain const part .. so .. we need to find it at start if it exists ..
45+
if cfg.template[0] != "%":
46+
template_part= [cfg.template[:cfg.template.index(cfg.seperator)] ]+ template_part
47+
4348
if len(template_part) != len(cfg.specs):
44-
msg = f'Invalid config file for identifier : {identifier_type} : different count between template and spec'
49+
msg = f'Invalid config file for identifier : {identifier_type} : ' \
50+
f'different count between template({len(template_part)}) and spec({len(cfg.specs)})' \
51+
f' \n template_part = {template_part} \n cfg.template = {cfg.specs}'
52+
4553
raise ValueError(msg)
4654

4755
# Check if all cfg.spec are in terms or in regex_terms
4856
known_terms = [term.collection.all_names for term in terms]
4957
known_terms.extend(set([(name,) for name in regex_terms.keys()])) # hack to fake multiple name in regex_term
5058
known_terms = set.union(*known_terms)
5159
# print(set.union(*known_terms))
52-
5360
for idx, spec in enumerate(cfg.specs):
5461
if not isinstance(spec, ConstantParsingSpecification) and \
5562
template_part[idx] not in optional_template_part:
@@ -58,6 +65,7 @@ def build_identifier(scope, identifier_type, terms, regex_terms={}):
5865
msg = f'Invalid known terms : missing {template_part[idx]} to build {identifier_type}'
5966
raise ValueError(msg)
6067
elif spec.namespace.split(":")[-1] not in known_terms:
68+
print(spec.namespace.split(":")[-1], known_terms)
6169
msg = f'Invalid known terms : missing {spec.namespace.split(":")[-1]} to build {identifier_type}'
6270
raise ValueError(msg)
6371

pyessv/parsing/identifiers/parser.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ def parse_identifer(scope, identifier_type, identifier, strictness=PARSING_STRIC
2323
2424
"""
2525
assert identifier_type in IDENTIFIER_TYPE_SET, f"Unsupported parser type: {identifier_type}"
26-
26+
if identifier_type == IDENTIFIER_TYPE_FILENAME and identifier[-3:]!=".nc":
27+
raise ValueError("filename extension have to be .nc")
2728
# Set parsing configuration.
2829
cfg = get_config(scope, identifier_type)
2930

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
{
2+
"scope": "compil:cmip5",
3+
"identifiers": [
4+
"cmip5.output1.LASG-IAP.FGOALS-s2.1pctCO2.fx.ocean.fx.r0i0p0",
5+
"cmip5.output1.CSIRO-QCCCE.CSIRO-Mk3-6-0.1pctCO2.fx.atmos.fx.r0i0p0",
6+
"cmip5.output1.MPI-M.MPI-ESM-LR.abrupt4xCO2.day.atmos.day.r1i1p1",
7+
"cmip5.output1.MOHC.HadGEM2-A.amip.fx.atmos.fx.r0i0p0",
8+
"cmip5.output1.CSIRO-QCCCE.CSIRO-Mk3-6-0.amip.fx.atmos.fx.r0i0p0",
9+
"cmip5.output1.CSIRO-BOM.ACCESS1-3.historical.3hr.atmos.3hr.r1i1p1",
10+
"cmip5.output1.CSIRO-BOM.ACCESS1-3.piControl.mon.seaIce.OImon.r1i1p1",
11+
"cmip5.output1.CSIRO-QCCCE.CSIRO-Mk3-6-0.rcp85.mon.ocean.Omon.r10i1p1",
12+
"cmip5.output1.BCC.bcc-csm1-1-m.amip.day.atmos.day.r1i1p1",
13+
"cmip5.output1.IPSL.IPSL-CM5A-LR.esmrcp85.6hr.atmos.6hrPlev.r1i1p1",
14+
"cmip5.output1.LASG-CESS.FGOALS-g2.rcp45.mon.atmos.Amon.r1i1p1",
15+
"cmip5.output1.NCC.NorESM1-M.historicalGHG.mon.atmos.Amon.r1i1p1",
16+
"cmip5.output1.MIROC.MIROC5.rcp26.day.atmos.day.r2i1p1",
17+
"cmip5.output1.LASG-IAP.FGOALS-gl.past1000.fx.ocean.fx.r0i0p0",
18+
"cmip5.output1.MRI.MRI-AGCM3-2S.amip.fx.atmos.fx.r0i0p0",
19+
"cmip5.output1.LASG-CESS.FGOALS-g2.historical.mon.seaIce.OImon.r5i1p1",
20+
"cmip5.output1.NASA-GISS.GISS-E2-R-CC.historical.yr.ocnBgchem.Oyr.r1i1p1",
21+
"cmip5.output1.MIROC.MIROC-ESM-CHEM.rcp85.3hr.atmos.3hr.r1i1p1",
22+
"cmip5.output1.CSIRO-QCCCE.CSIRO-Mk3-6-0.historicalMisc.mon.atmos.Amon.r7i1p4",
23+
"cmip5.output1.CNRM-CERFACS.CNRM-CM5.historicalExt.day.atmos.day.r5i1p1",
24+
"cmip5.output1.LASG-CESS.FGOALS-g2.lgm.fx.atmos.fx.r0i0p0",
25+
"cmip5.output1.ICHEC.EC-EARTH.historical.day.atmos.day.r6i1p1",
26+
"cmip5.output1.CMCC.CMCC-CMS.rcp45.day.atmos.day.r1i1p1",
27+
"cmip5.output1.BNU.BNU-ESM.amip.mon.atmos.Amon.r1i1p1",
28+
"cmip5.output1.INM.inmcm4.esmHistorical.mon.atmos.Amon.r1i1p1",
29+
"cmip5.output1.LASG-CESS.FGOALS-g2.lgm.fx.ocean.fx.r0i0p0",
30+
"cmip5.output1.INM.inmcm4.amip.mon.atmos.Amon.r1i1p1",
31+
"cmip5.output1.CCCma.CanCM4.historical.mon.atmos.Amon.r7i1p1",
32+
"cmip5.output1.UNSW.CSIRO-Mk3L-1-2.rcp45.day.atmos.day.r2i2p1",
33+
"cmip5.output1.NCAR.CCSM4.rcp60.mon.atmos.Amon.r4i1p1",
34+
"cmip5.output1.NCC.NorESM1-ME.1pctCO2.mon.land.Lmon.r1i1p1",
35+
"cmip5.output1.FIO.FIO-ESM.rcp60.mon.atmos.Amon.r1i1p1",
36+
"cmip5.output1.MIROC.MIROC-ESM.1pctCO2.mon.ocnBgchem.Omon.r1i1p1",
37+
"cmip5.output1.BCC.bcc-csm1-1-m.abrupt4xCO2.mon.ocnBgchem.Omon.r1i1p1",
38+
"cmip5.output1.NOAA-GFDL.GFDL-ESM2G.rcp60.day.atmos.day.r1i1p1",
39+
"cmip5.output1.CCCma.CanAM4.amip.day.atmos.cfDay.r4i1p1",
40+
"cmip5.output1.MIROC.MIROC5.piControl.fx.atmos.fx.r0i0p0",
41+
"cmip5.output1.UNSW.CSIRO-Mk3L-1-2.rcp45.fx.atmos.fx.r0i0p0",
42+
"cmip5.output1.CNRM-CERFACS.CNRM-CM5-2.historical.mon.land.Lmon.r1i1p1",
43+
"cmip5.output1.INM.inmcm4.1pctCO2.mon.ocnBgchem.Omon.r1i1p1",
44+
"cmip5.output1.MOHC.HadGEM2-CC.piControl.yr.ocnBgchem.Oyr.r1i1p1",
45+
"cmip5.output1.LASG-CESS.FGOALS-g2.midHolocene.mon.ocean.Omon.r1i1p1",
46+
"cmip5.output1.INPE.HadGEM2-ES.historical.mon.land.Lmon.r5i1p1",
47+
"cmip5.output1.MOHC.HadGEM2-CC.midHolocene.fx.ocean.fx.r0i0p0",
48+
"cmip5.output1.MIROC.MIROC5.rcp85.3hr.atmos.3hr.r2i1p1",
49+
"cmip5.output1.INM.inmcm4.amip.mon.atmos.Amon.r1i1p1",
50+
"cmip5.output1.FIO.FIO-ESM.rcp26.mon.land.Lmon.r2i1p1",
51+
"cmip5.output1.MIROC.MIROC4h.rcp45.mon.land.Lmon.r3i1p1",
52+
"cmip5.output1.MOHC.HadGEM2-ES.rcp85.3hr.atmos.3hr.r1i1p1",
53+
"cmip5.output1.CNRM-CERFACS.CNRM-CM5.rcp26.fx.atmos.fx.r0i0p0",
54+
"cmip5.output1.NCAR.CCSM4.rcp60.mon.seaIce.OImon.r5i1p1",
55+
"cmip5.output1.NCC.NorESM1-M.rcp26.day.atmos.day.r1i1p1",
56+
"cmip5.output1.NCC.NorESM1-M.piControl.mon.atmos.Amon.r1i1p1",
57+
"cmip5.output1.UNSW.CSIRO-Mk3L-1-2.1pctCO2.fx.ocean.fx.r0i0p0",
58+
"cmip5.output1.MRI.MRI-AGCM3-2H.amip.mon.land.Lmon.r1i1p1",
59+
"cmip5.output1.INM.inmcm4.rcp45.day.atmos.day.r1i1p1",
60+
"cmip5.output1.MPI-M.MPI-ESM-P.past1000.mon.land.Lmon.r1i1p1",
61+
"cmip5.output1.NASA-GISS.GISS-E2-R.abrupt4xCO2.mon.ocean.Omon.r1i1p2",
62+
"cmip5.output1.CSIRO-QCCCE.CSIRO-Mk3-6-0.midHolocene.day.atmos.day.r1i1p1",
63+
"cmip5.output1.LASG-CESS.FGOALS-g2.1pctCO2.mon.land.Lmon.r1i1p1",
64+
"cmip5.output1.MRI.MRI-AGCM3-2H.amip.mon.land.Lmon.r1i1p1",
65+
"cmip5.output1.NOAA-GFDL.GFDL-HIRAM-C360.amip.fx.atmos.fx.r0i0p0",
66+
"cmip5.output1.LASG-IAP.FGOALS-s2.midHolocene.day.atmos.day.r1i1p1",
67+
"cmip5.output1.INPE.HadGEM2-ES.historical.day.atmos.day.r5i1p1",
68+
"cmip5.output1.LASG-IAP.FGOALS-s2.historical.day.ocean.day.r1i1p1",
69+
"cmip5.output1.NCC.NorESM1-ME.historical.day.ocean.day.r1i1p1",
70+
"cmip5.output1.NASA-GISS.GISS-E2-R.rcp26.mon.ocean.Omon.r1i1p1",
71+
"cmip5.output1.CCCma.CanESM2.1pctCO2.mon.ocean.Omon.r1i1p1",
72+
"cmip5.output1.NIMR-KMA.HadGEM2-AO.rcp26.day.atmos.day.r1i1p1",
73+
"cmip5.output1.INM.inmcm4.rcp85.fx.atmos.fx.r0i0p0",
74+
"cmip5.output1.MOHC.HadGEM2-ES.piControl.mon.ocean.Omon.r1i1p1",
75+
"cmip5.output1.INPE.HadGEM2-ES.historical.mon.atmos.Amon.r5i1p1",
76+
"cmip5.output1.LASG-IAP.FGOALS-s2.midHolocene.day.atmos.day.r1i1p1",
77+
"cmip5.output1.INPE.HadGEM2-ES.historical.day.atmos.day.r5i1p1",
78+
"cmip5.output1.MPI-M.MPI-ESM-LR.rcp85.mon.ocnBgchem.Omon.r2i1p1",
79+
"cmip5.output1.CSIRO-QCCCE.CSIRO-Mk3-6-0.historicalGHG.mon.atmos.Amon.r10i1p1",
80+
"cmip5.output1.BNU.BNU-ESM.rcp26.mon.ocean.Omon.r1i1p1",
81+
"cmip5.output1.MPI-M.MPI-ESM-P.1pctCO2.day.atmos.day.r1i1p1",
82+
"cmip5.output1.LASG-IAP.FGOALS-s2.midHolocene.day.atmos.day.r1i1p1",
83+
"cmip5.output1.MPI-M.MPI-ESM-MR.abrupt4xCO2.fx.atmos.fx.r0i0p0",
84+
"cmip5.output1.LASG-IAP.FGOALS-s2.1pctCO2.mon.seaIce.OImon.r1i1p1",
85+
"cmip5.output1.MRI.MRI-AGCM3-2S.amip.mon.atmos.Amon.r1i1p1",
86+
"cmip5.output1.MIROC.MIROC5.piControl.fx.atmos.fx.r0i0p0",
87+
"cmip5.output1.IPSL.IPSL-CM5B-LR.historical.fx.land.fx.r0i0p0",
88+
"cmip5.output1.MOHC.HadGEM2-A.amip.fx.atmos.fx.r0i0p0",
89+
"cmip5.output1.NOAA-GFDL.GFDL-HIRAM-C180.amip.mon.atmos.Amon.r3i1p1",
90+
"cmip5.output1.INM.inmcm4.1pctCO2.mon.seaIce.OImon.r1i1p1",
91+
"cmip5.output1.NCC.NorESM1-M.rcp45.day.atmos.day.r1i1p1",
92+
"cmip5.output1.BNU.BNU-ESM.1pctCO2.day.atmos.day.r1i1p1",
93+
"cmip5.output1.NASA-GISS.GISS-E2-R-CC.piControl.yr.ocnBgchem.Oyr.r1i1p1",
94+
"cmip5.output1.LASG-CESS.FGOALS-g2.historical.mon.seaIce.OImon.r1i1p1",
95+
"cmip5.output1.ICHEC.EC-EARTH.rcp45.mon.ocean.Omon.r11i1p1",
96+
"cmip5.output1.FIO.FIO-ESM.rcp60.mon.atmos.Amon.r2i1p1",
97+
"cmip5.output1.INM.inmcm4.esmHistorical.day.atmos.day.r1i1p1",
98+
"cmip5.output1.CSIRO-QCCCE.CSIRO-Mk3-6-0.piControl.fx.atmos.fx.r0i0p0",
99+
"cmip5.output1.UNSW.CSIRO-Mk3L-1-2.abrupt4xCO2.mon.land.Lmon.r3i1p1",
100+
"cmip5.output1.CNRM-CERFACS.CNRM-CM5-2.abrupt4xCO2.mon.ocean.Omon.r1i1p1",
101+
"cmip5.output1.MPI-M.MPI-ESM-MR.piControl.day.atmos.day.r1i1p1",
102+
"cmip5.output1.FIO.FIO-ESM.piControl.mon.seaIce.OImon.r1i1p1",
103+
"cmip5.output1.INM.inmcm4.piControl.day.atmos.day.r1i1p1"
104+
]
105+
}

0 commit comments

Comments
 (0)