Skip to content
This repository was archived by the owner on Oct 1, 2024. It is now read-only.

Commit 36faa8b

Browse files
sami-m-gjsvgoncalves
authored andcommitted
Moved the rest of xml parsing to dds_glossary/xml.py
1 parent ab2f72c commit 36faa8b

File tree

10 files changed

+507
-385
lines changed

10 files changed

+507
-385
lines changed

dds_glossary/database.py

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,15 @@
77
from sqlalchemy.orm import Session, joinedload, with_polymorphic
88
from sqlalchemy_utils import create_database, database_exists, drop_database
99

10-
from .model import Base, Collection, Concept, ConceptScheme, Member, SemanticRelation
10+
from .model import (
11+
Base,
12+
Collection,
13+
Concept,
14+
ConceptScheme,
15+
Member,
16+
ParsedDataset,
17+
SemanticRelation,
18+
)
1119

1220

1321
def init_engine(
@@ -47,34 +55,23 @@ def init_engine(
4755
return engine
4856

4957

50-
def save_dataset(
51-
engine: Engine,
52-
concept_schemes: list[ConceptScheme],
53-
concepts: list[Concept],
54-
collections: list[Collection],
55-
semantic_relations: list[SemanticRelation],
56-
) -> None:
58+
def save_dataset(engine: Engine, parsed_dataset: ParsedDataset) -> None:
5759
"""
5860
Save a dataset in the database.
5961
6062
Args:
6163
engine (Engine): The database engine.
62-
concept_schemes (list[ConceptScheme]): The concept schemes.
63-
concepts (list[Concept]): The concepts.
64-
collections (list[Collection]): The collections.
65-
semantic_relations (list[SemanticRelation]): The semantic relations.
64+
parsed_dataset (ParsedDataset): The parsed dataset.
6665
"""
6766
with Session(engine) as session:
68-
session.add_all(concept_schemes)
69-
session.add_all(concepts)
70-
session.add_all(collections)
71-
session.add_all(semantic_relations)
72-
73-
members: list[Member] = []
74-
members.extend(concepts)
75-
members.extend(collections)
76-
for collection in collections:
77-
collection.resolve_members_from_xml(members)
67+
session.add_all(parsed_dataset.concept_schemes)
68+
session.add_all(parsed_dataset.concepts)
69+
session.add_all(parsed_dataset.collections)
70+
session.add_all(parsed_dataset.semantic_relations)
71+
session.commit()
72+
73+
session.add_all(parsed_dataset.in_schemes)
74+
session.add_all(parsed_dataset.in_collections)
7875
session.commit()
7976

8077

dds_glossary/model.py

Lines changed: 89 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,14 @@
11
"""Model classes for the dds_glossary package."""
22

33
from abc import abstractmethod
4+
from dataclasses import dataclass, field
45
from typing import ClassVar
56

67
from sqlalchemy.dialects.postgresql import JSONB
78
from sqlalchemy.orm import DeclarativeBase, Mapped, Relationship, mapped_column
8-
from sqlmodel import Column, ForeignKey, SQLModel, String, Table
9+
from sqlmodel import ForeignKey, SQLModel
910

1011
from .enums import MemberType, SemanticRelationType
11-
from .xml import (
12-
get_element_attribute,
13-
get_sub_element_as_str,
14-
get_sub_element_attributes,
15-
get_sub_elements_as_dict,
16-
get_sub_elements_as_dict_of_lists,
17-
)
1812

1913

2014
class Dataset(SQLModel):
@@ -41,6 +35,29 @@ class FailedDataset(Dataset):
4135
error: str
4236

4337

38+
@dataclass
39+
class ParsedDataset:
40+
"""
41+
Class for the parsed datasets.
42+
43+
Attributes:
44+
concept_schemes (list[ConceptScheme]): The concept schemes of the dataset.
45+
concepts (list[Concept]): The concepts of the dataset.
46+
collections (list[Collection]): The collections of the dataset.
47+
semantic_relations (list[SemanticRelation]): The semantic relations of the
48+
dataset.
49+
in_schemes (list[InScheme]): The in schemes of the dataset.
50+
in_collections (list[InCollection]): The in collections of the dataset.
51+
"""
52+
53+
concept_schemes: list["ConceptScheme"] = field(default_factory=list)
54+
concepts: list["Concept"] = field(default_factory=list)
55+
collections: list["Collection"] = field(default_factory=list)
56+
semantic_relations: list["SemanticRelation"] = field(default_factory=list)
57+
in_schemes: list["InScheme"] = field(default_factory=list)
58+
in_collections: list["InCollection"] = field(default_factory=list)
59+
60+
4461
class Base(DeclarativeBase):
4562
"""Base class for all models."""
4663

@@ -132,24 +149,6 @@ class ConceptScheme(Base):
132149
back_populates="concept_schemes",
133150
)
134151

135-
@classmethod
136-
def from_xml_element(cls, element) -> "ConceptScheme":
137-
"""
138-
Return a ConceptScheme instance from an XML element.
139-
140-
Args:
141-
element (ElementBase): The XML element to parse.
142-
143-
Returns:
144-
ConceptScheme: The parsed ConceptScheme instance.
145-
"""
146-
return ConceptScheme(
147-
iri=get_element_attribute(element, "about"),
148-
notation=get_sub_element_as_str(element, "core:notation"),
149-
scopeNote=get_sub_element_as_str(element, "core:scopeNote"),
150-
prefLabels=get_sub_elements_as_dict(element, "core:prefLabel"),
151-
)
152-
153152
def to_dict(self, lang: str = "en") -> dict:
154153
"""
155154
Return the ConceptScheme instance as a dictionary.
@@ -206,30 +205,6 @@ class Member(Base):
206205
back_populates="members",
207206
)
208207

209-
@classmethod
210-
def get_concept_schemes(
211-
cls,
212-
element,
213-
concept_schemes: list[ConceptScheme],
214-
) -> list[ConceptScheme]:
215-
"""
216-
Get the concept schemes to which the member belongs.
217-
218-
Args:
219-
element (ElementBase): The XML element to parse.
220-
concept_schemes (list[ConceptScheme]): The concept schemes to which the
221-
member belongs.
222-
223-
Returns:
224-
list[ConceptScheme]: The concept schemes to which the member belongs.
225-
"""
226-
scheme_iris = get_sub_element_attributes(element, "core:inScheme", "resource")
227-
return [
228-
concept_scheme
229-
for concept_scheme in concept_schemes
230-
if concept_scheme.iri in scheme_iris
231-
]
232-
233208
def to_dict(self, lang: str = "en") -> dict:
234209
"""
235210
Return the Member instance as a dictionary.
@@ -264,7 +239,6 @@ class Collection(Member):
264239
__tablename__ = "collections"
265240

266241
iri: Mapped[str] = mapped_column(ForeignKey(Member.iri), primary_key=True)
267-
member_iris: list[str] = []
268242

269243
members: Mapped[list[Member]] = Relationship(
270244
secondary="in_collection",
@@ -275,41 +249,6 @@ class Collection(Member):
275249
"polymorphic_identity": MemberType.COLLECTION,
276250
}
277251

278-
@classmethod
279-
def from_xml_element(
280-
cls,
281-
element,
282-
concept_schemes: list[ConceptScheme],
283-
) -> "Collection":
284-
"""
285-
Return a Collection instance from an XML element.
286-
287-
Args:
288-
element (ElementBase): The XML element to parse.
289-
290-
Returns:
291-
Collection: The parsed Collection instance.
292-
"""
293-
return Collection(
294-
iri=get_element_attribute(element, "about"),
295-
notation=get_sub_element_as_str(element, "core:notation"),
296-
prefLabels=get_sub_elements_as_dict(element, "core:prefLabel"),
297-
concept_schemes=cls.get_concept_schemes(element, concept_schemes),
298-
member_iris=get_sub_element_attributes(element, "core:member", "resource"),
299-
)
300-
301-
def resolve_members_from_xml(self, members: list[Member]) -> None:
302-
"""
303-
Resolve the collections members from an xml element.
304-
305-
Args:
306-
members (list[Member]): The list of all available members.
307-
308-
Returns:
309-
None
310-
"""
311-
self.members = [member for member in members if member.iri in self.member_iris]
312-
313252

314253
class Concept(Member):
315254
"""
@@ -351,33 +290,6 @@ class Concept(Member):
351290
"polymorphic_identity": MemberType.CONCEPT,
352291
}
353292

354-
@classmethod
355-
def from_xml_element(
356-
cls,
357-
element,
358-
concept_schemes: list[ConceptScheme],
359-
) -> "Concept":
360-
"""
361-
Return a Concept instance from an XML element.
362-
363-
Args:
364-
element (ElementBase): The XML element to parse.
365-
concept_schemes (ConceptScheme): The concept schemes to which the concept
366-
belongs.
367-
368-
Returns:
369-
Concept: The parsed Concept instance.
370-
"""
371-
return Concept(
372-
iri=get_element_attribute(element, "about"),
373-
identifier=get_sub_element_as_str(element, "x_1.1:identifier"),
374-
notation=get_sub_element_as_str(element, "core:notation"),
375-
prefLabels=get_sub_elements_as_dict(element, "core:prefLabel"),
376-
altLabels=get_sub_elements_as_dict_of_lists(element, "core:altLabel"),
377-
scopeNotes=get_sub_elements_as_dict(element, "core:scopeNote"),
378-
concept_schemes=cls.get_concept_schemes(element, concept_schemes),
379-
)
380-
381293
def to_dict(self, lang: str = "en") -> dict:
382294
"""
383295
Return the Concept instance as a dictionary.
@@ -467,32 +379,6 @@ class SemanticRelation(Base):
467379
source_concept: Mapped[Concept] = Relationship(foreign_keys=[source_concept_iri])
468380
target_concept: Mapped[Concept] = Relationship(foreign_keys=[target_concept_iri])
469381

470-
@classmethod
471-
def from_xml_element(cls, element) -> list["SemanticRelation"]:
472-
"""
473-
Return a list of SemanticRelation instances from an XML element.
474-
475-
Args:
476-
element (ElementBase): The XML element to parse.
477-
478-
Returns:
479-
list[SemanticRelation]: The parsed list of SemanticRelation instances.
480-
"""
481-
relations: dict[SemanticRelationType, list[str]] = {}
482-
for relation_type in SemanticRelationType:
483-
relations[relation_type] = get_sub_element_attributes(
484-
element, f"core:{relation_type.value}", "resource"
485-
)
486-
return [
487-
SemanticRelation(
488-
type=relation_type,
489-
source_concept_iri=get_element_attribute(element, "about"),
490-
target_concept_iri=target_concept_iri,
491-
)
492-
for relation_type, target_concept_iris in relations.items()
493-
for target_concept_iri in target_concept_iris
494-
]
495-
496382
def to_dict(self) -> dict:
497383
"""
498384
Return the SemanticRelation instance as a dictionary.
@@ -507,17 +393,69 @@ def to_dict(self) -> dict:
507393
}
508394

509395

510-
in_scheme = Table(
511-
"in_scheme",
512-
Base.metadata,
513-
Column("scheme_iri", String, ForeignKey(ConceptScheme.iri), primary_key=True),
514-
Column("member_iri", String, ForeignKey(Member.iri), primary_key=True),
515-
)
396+
class InScheme(Base):
397+
"""
398+
Association table for the concept schemes and the members.
399+
400+
Attributes:
401+
scheme_iri (str): The Internationalized Resource Identifier of the concept
402+
scheme.
403+
member_iri (str): The Internationalized Resource Identifier of the member.
404+
"""
405+
406+
__tablename__ = "in_scheme"
407+
408+
scheme_iri: Mapped[str] = mapped_column(
409+
ForeignKey(ConceptScheme.iri),
410+
primary_key=True,
411+
)
412+
member_iri: Mapped[str] = mapped_column(
413+
ForeignKey(Member.iri),
414+
primary_key=True,
415+
)
416+
417+
def to_dict(self) -> dict:
418+
"""
419+
Return the InScheme instance as a dictionary.
420+
421+
Returns:
422+
dict: The InScheme instance as a dictionary.
423+
"""
424+
return {
425+
"scheme_iri": self.scheme_iri,
426+
"member_iri": self.member_iri,
427+
}
428+
429+
430+
class InCollection(Base):
431+
"""
432+
Association table for the collections and the members.
516433
434+
Attributes:
435+
collection_iri (str): The Internationalized Resource Identifier of the
436+
collection.
437+
member_iri (str): The Internationalized Resource Identifier of the member.
438+
"""
439+
440+
__tablename__ = "in_collection"
517441

518-
in_collection = Table(
519-
"in_collection",
520-
Base.metadata,
521-
Column("collection_iri", String, ForeignKey(Collection.iri), primary_key=True),
522-
Column("member_iri", String, ForeignKey(Member.iri), primary_key=True),
523-
)
442+
collection_iri: Mapped[str] = mapped_column(
443+
ForeignKey(Collection.iri),
444+
primary_key=True,
445+
)
446+
member_iri: Mapped[str] = mapped_column(
447+
ForeignKey(Member.iri),
448+
primary_key=True,
449+
)
450+
451+
def to_dict(self) -> dict:
452+
"""
453+
Return the InCollection instance as a dictionary.
454+
455+
Returns:
456+
dict: The InCollection instance as a dictionary.
457+
"""
458+
return {
459+
"collection_iri": self.collection_iri,
460+
"member_iri": self.member_iri,
461+
}

0 commit comments

Comments
 (0)