Skip to content

Commit 1222aef

Browse files
committed
Merge remote-tracking branch 'origin/parser_virtual' into dev
# Conflicts: # pyessv/__init__.py # pyessv/factory.py # pyessv/model/collection.py # pyessv/parsing/__init__.py # pyessv/parsing/identifiers/parser.py
2 parents 297c569 + 88dd1cd commit 1222aef

File tree

6 files changed

+113
-23
lines changed

6 files changed

+113
-23
lines changed

pyessv/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@
5555
from pyessv.parsing import parse_identifer_set
5656
from pyessv.parsing import parse_namespace
5757
from pyessv.parsing import parse_namespace as parse
58+
59+
from pyessv.parsing import build_identifier
60+
5861
from pyessv.utils.logger import log
5962
from pyessv.utils.logger import log_error
6063
from pyessv.utils.logger import log_warning

pyessv/factory.py

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,26 @@
1+
"""
2+
.. module:: pyessv.factory.py
3+
:copyright: Copyright "December 01, 2016", IPSL
4+
:license: GPL/CeCIL
5+
:platform: Unix, Windows
6+
:synopsis: Encapsulates creation of domain model class instances.
7+
8+
.. moduleauthor:: Mark Conway-Greenslade <momipsl@ipsl.jussieu.fr>
9+
10+
"""
111
import datetime as dt
212

313
from pyessv.constants import REGEX_CANONICAL_NAME
14+
from pyessv.constants import PARSING_STRICTNESS_2
15+
from pyessv.constants import PARSING_STRICTNESS_SET
416
from pyessv.cache import encache
517
from pyessv.exceptions import ValidationError
618
from pyessv.model import Authority
719
from pyessv.model import Collection
20+
from pyessv.model import Node
821
from pyessv.model import Scope
922
from pyessv.model import Term
23+
from pyessv.utils import compat
1024
from pyessv.utils.formatter import format_canonical_name
1125
from pyessv.utils.formatter import format_string
1226
from pyessv.validation import validate
@@ -20,7 +34,7 @@ def create_authority(
2034
create_date=None,
2135
data=None,
2236
alternative_names=[]
23-
):
37+
):
2438
"""Instantiates, initialises & returns a term authority.
2539
2640
:param str name: Canonical name.
@@ -56,7 +70,7 @@ def create_scope(
5670
create_date=None,
5771
data=None,
5872
alternative_names=[]
59-
):
73+
):
6074
"""Instantiates, initialises & returns a term scope.
6175
6276
:param pyessv.Authority authority: CV authority to which scope is bound.
@@ -99,7 +113,7 @@ def create_collection(
99113
data=None,
100114
alternative_names=[],
101115
term_regex=None
102-
):
116+
):
103117
"""Instantiates, initialises & returns a regular expression term collection.
104118
105119
:param pyessv.Scope scope: CV scope to which collection is bound.
@@ -136,7 +150,6 @@ def _callback(instance):
136150
callback=_callback
137151
)
138152

139-
140153
def create_term(
141154
collection,
142155
name,
@@ -147,24 +160,25 @@ def create_term(
147160
data=None,
148161
alternative_names=[],
149162
append=True
150-
):
163+
):
151164
"""Instantiates, initialises & returns a term.
152165
153-
:param collection: The collection to which the term belongs.
154-
:param name: Canonical name.
155-
:param description: Informative description.
156-
:param label: Label for UI purposes.
157-
:param url: Further information URL.
158-
:param create_date: Creation date.
159-
:param data: Arbirtrary data.
160-
:param alternative_names: Collection of associated alternative names.
161-
:param append: Flag indicating whether collection termset is to be extended.
166+
:param pyessv.Collection collection: The collection to which the term belongs.
167+
:param str name: Canonical name.
168+
:param str description: Informative description.
169+
:param str label: Label for UI purposes.
170+
:param str url: Further information URL.
171+
:param datetime create_date: Creation date.
172+
:param dict data: Arbirtrary data.
173+
:param list alternative_names: Collection of associated alternative names.
174+
162175
:returns: A vocabulary term, e.g. ipsl.
176+
:rtype: pyessv.Term
163177
164178
"""
165179
def _callback(instance):
166180
instance.collection = collection
167-
if append is True:
181+
if append:
168182
collection.terms.append(instance)
169183

170184
return _create_node(

pyessv/model/scope.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ class Scope(IterableNode):
1111
"""A scope managed by an authority.
1212
1313
"""
14+
1415
def __init__(self):
1516
"""Instance constructor.
1617

pyessv/parsing/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
from pyessv.parsing.identifiers.parser import parse_identifer
22
from pyessv.parsing.identifiers.parser import parse_identifer_set
33
from pyessv.parsing.namespaces.parser import parse_namespace
4+
from pyessv.parsing.identifiers.builder import build_identifier
45

56
__all__ = [
67
parse_identifer,
78
parse_identifer_set,
89
parse_namespace
910
]
11+

pyessv/parsing/identifiers/builder.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import re
2+
3+
from pyessv import IDENTIFIER_TYPE_SET, IDENTIFIER_TYPE_FILENAME
4+
from pyessv.parsing.identifiers.config import get_config
5+
6+
7+
def build_identifier(scope, identifier_type, terms, regex_terms={}):
8+
"""Build an identifier.
9+
10+
:param scope: Scope associated with the identifier to be parsed.
11+
:param identifier_type: Type of parser to be used.
12+
:param terms: Set of known term.
13+
:param regex_terms: Dictionary of terms matching the regex term in spec : {term:value,...}
14+
15+
"""
16+
17+
assert identifier_type in IDENTIFIER_TYPE_SET, f"Unsupported parser type: {identifier_type}"
18+
19+
# Set parsing configuration.
20+
cfg = get_config(scope, identifier_type)
21+
22+
# Template split from configuration
23+
template_part = re.findall("%\((\w*)\)s", cfg.template)
24+
("root" in template_part) and template_part.remove("root") # remove root from template_part if exist
25+
if len(template_part) != len(cfg.specs):
26+
msg = f'Invalid config file for identifier : {identifier_type} : different count between template and spec'
27+
raise ValueError(msg)
28+
29+
# Check if all cfg.spec are in terms or in regex_terms
30+
known_terms = [term.collection.all_names for term in terms]
31+
known_terms.extend(set([(name,) for name in regex_terms.keys()])) # hack to fake multiple name in regex_term
32+
known_terms = set.union(*known_terms)
33+
# print(set.union(*known_terms))
34+
for idx, spec in enumerate(cfg.specs):
35+
if not spec.startswith("const"):
36+
if spec.startswith("regex"):
37+
if template_part[idx] not in known_terms:
38+
msg = f'Invalid known terms : missing {template_part[idx]} to build {identifier_type}'
39+
raise ValueError(msg)
40+
elif spec.split(":")[-1] not in known_terms:
41+
msg = f'Invalid known terms : missing {spec.split(":")[-1]} to build {identifier_type}'
42+
raise ValueError(msg)
43+
44+
# Building the identifier
45+
identifier_part = list()
46+
for idx, spec in enumerate(cfg.specs):
47+
# ... constants.
48+
if spec.startswith("const"):
49+
identifier_part.append(spec.split(":")[1])
50+
51+
# ... regular expressions
52+
elif spec.startswith("regex"):
53+
identifier_part.append(regex_terms[template_part[idx]])
54+
55+
# ... collection members.
56+
else:
57+
for term in terms:
58+
if spec.replace(term.scope.namespace + ":", "") in term.collection.all_names:
59+
identifier_part.append(term.raw_name)
60+
break
61+
62+
result = cfg.seperator.join(identifier_part)
63+
64+
# only for filename identifier_type:
65+
if identifier_type == IDENTIFIER_TYPE_FILENAME:
66+
result += "." + cfg.template.split(".")[-1]
67+
68+
return result

pyessv/parsing/identifiers/parser.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from multiprocessing.sharedctypes import Value
21
import re
32

43
from pyessv.constants import PARSING_STRICTNESS_2
@@ -25,18 +24,21 @@ def parse_identifer(scope, identifier_type, identifier, strictness=PARSING_STRIC
2524
# Set parsing configuration.
2625
cfg = get_config(scope, identifier_type)
2726

27+
# retrieve optional collection in spec
28+
all_optional_template_str = re.findall("\[(.+?)\]", cfg.template)
29+
optional_template_part = [it for sub in [re.findall("%\((\w+)\)s", opt_col) for opt_col in all_optional_template_str] for it in sub]
30+
2831
# Split identifier into a set of elements.
2932
elements = _get_elements(identifier_type, identifier, cfg.seperator)
30-
if len(elements) != len(cfg.specs):
31-
msg = 'Invalid identifier. Element count mismatch. Expected={}. Actual={}. Identifier={}'
32-
raise ValueError(msg.format(len(cfg.specs), len(elements), identifier))
33+
if len(cfg.specs)-len(optional_template_part) > len(elements) > len(cfg.specs)+len(optional_template_part):
34+
raise ValueError('Invalid identifier. Element count is invalid. Expected={}. Actual={}. Identifier = {}'.format(len(cfg.specs), len(elements), identifier))
3335

34-
# Strip suffixes.
35-
if cfg.suffix is not None and cfg.suffix in elements[-1]:
36-
elements[-1] = elements[-1].split(cfg.suffix)[0]
36+
# Strip suffix ...
37+
if '#' in elements[-1]:
38+
elements[-1] = elements[-1].split("#")[0]
3739

38-
# For each identifier element, execute relevant parse.
3940
result = set()
41+
# For each identifier element, execute relevant parse.
4042
for idx, (element, spec) in enumerate(zip(elements, cfg.specs)):
4143
# ... constants.
4244
if spec.startswith("const"):

0 commit comments

Comments
 (0)