diff --git a/.travis.yml b/.travis.yml index 5244dfb1..78c0d47b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,11 @@ dist: trusty language: python matrix: + allow_failures: + - os: linux + python: "3.9-dev" + dist: bionic + include: - os: linux python: "2.7" @@ -18,7 +23,14 @@ matrix: - os: linux python: "3.8" dist: xenial + - os: linux + python: "3.9-dev" + dist: bionic + - os: osx + language: generic + env: + - OSXENV=2.7.14 - os: osx language: generic env: @@ -30,7 +42,7 @@ matrix: - os: osx language: generic env: - - OSXENV=2.7.14 + - OSXENV=3.8.0 install: - export PYVER=${TRAVIS_PYTHON_VERSION:0:1} diff --git a/appveyor.yml b/appveyor.yml index c3fcdaf7..8cc7906b 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -13,6 +13,9 @@ environment: - PYTHON: "C:\\Python37" PYVER: 3 BITS: 32 + - PYTHON: "C:\\Python38" + PYVER: 3 + BITS: 32 - PYTHON: "C:\\Python27-x64" PYVER: 2 BITS: 64 @@ -22,6 +25,9 @@ environment: - PYTHON: "C:\\Python37-x64" PYVER: 3 BITS: 64 + - PYTHON: "C:\\Python38-x64" + PYVER: 3 + BITS: 64 init: - "ECHO %PYTHON% %vcvars% (%bits%)" diff --git a/odml/base.py b/odml/base.py index 2b00a0be..a074e254 100644 --- a/odml/base.py +++ b/odml/base.py @@ -1,18 +1,22 @@ # -*- coding: utf-8 """ -Collects common base functionality +This module provides base classes for functionality common to odML objects. """ import posixpath -from . import terminology -from .tools.doc_inherit import allow_inherit_docstring - try: from collections.abc import Iterable except ImportError: from collections import Iterable +from . import terminology +from .tools.doc_inherit import allow_inherit_docstring + + class BaseObject(object): + """ + Base class for all odML objects. + """ _format = None def __hash__(self): @@ -41,16 +45,21 @@ def __eq__(self, obj): def __ne__(self, obj): """ - Use the __eq__ function to determine if both objects are equal + Use the __eq__ function to determine if both objects are equal. """ return not self == obj def format(self): + """ + Returns the format class of the current object. + """ return self._format @property def document(self): - """ Returns the Document object in which this object is contained """ + """ + Returns the Document object in which this object is contained. + """ if self.parent is None: return None return self.parent.document @@ -64,7 +73,7 @@ def get_terminology_equivalent(self): def clean(self): """ - Stub that doesn't do anything for this class + Stub that doesn't do anything for this class. """ pass @@ -72,7 +81,10 @@ def clone(self, children=True): """ Clone this object recursively (if children is True) allowing to copy it independently to another document. If children is False, this acts as - a template cloner, creating a copy of the object without any children + a template cloner, creating a copy of the object without any children. + + :param children: True by default. Is used in the classes that inherit + from this class. """ # TODO don't we need some recursion / deepcopy here? import copy @@ -81,6 +93,9 @@ def clone(self, children=True): class SmartList(list): + """ + List class that can hold odml.Sections and odml.Properties. + """ def __init__(self, content_type): """ @@ -108,7 +123,7 @@ def __getitem__(self, key): def __setitem__(self, key, value): """ Replaces item at list[*key*] with *value*. - :param key: index position + :param key: index position. :param value: object that replaces item at *key* position. value has to be of the same content type as the list. In this context usually a Section or a Property. @@ -152,17 +167,17 @@ def __eq__(self, obj): def __ne__(self, obj): """ - Use the __eq__ function to determine if both objects are equal + Use the __eq__ function to determine if both objects are equal. """ return not self == obj def index(self, obj): """ - Find obj in list + Find obj in list. """ - for i, e in enumerate(self): - if e is obj: - return i + for idx, val in enumerate(self): + if val is obj: + return idx raise ValueError("remove: %s not in list" % repr(obj)) def remove(self, obj): @@ -193,6 +208,9 @@ def sort(self, key=lambda x: x.name, reverse=False): @allow_inherit_docstring class Sectionable(BaseObject): + """ + Base class for all odML objects that can store odml.Sections. + """ def __init__(self): from odml.section import BaseSection self._sections = SmartList(BaseSection) @@ -210,18 +228,20 @@ def __iter__(self): @property def document(self): """ - Returns the parent-most node (if its a document instance) or None + Returns the parent-most node (if its a document instance) or None. """ from odml.doc import BaseDocument - p = self - while p.parent: - p = p.parent - if isinstance(p, BaseDocument): - return p + par = self + while par.parent: + par = par.parent + if isinstance(par, BaseDocument): + return par @property def sections(self): - """ The list of sections contained in this section/document """ + """ + The list of sections contained in this section/document. + """ return self._sections def insert(self, position, section): @@ -301,6 +321,7 @@ def itersections(self, recursive=True, yield_self=False, :param filter_func: accepts a function that will be applied to each iterable. Yields iterable if function returns True :type filter_func: function + :param max_depth: number of layers in the document tree to include in the search. """ stack = [] # Below: never yield self if self is a Document @@ -422,6 +443,10 @@ def _match_iterable(self, iterable, key): """ Searches for a key match within a given iterable. Raises ValueError if not found. + + :param iterable: list of odML objects. + :param key: string to search an objects name against. + :returns: odML object that matched the key. """ for obj in iterable: if self._matches(obj, key): @@ -458,17 +483,25 @@ def _get_section_by_path(self, path): return self._match_iterable(self.sections, pathlist[0]) def find(self, key=None, type=None, findAll=False, include_subtype=False): - """ Return the first subsection named *key* of type *type* """ + """ + Returns the first subsection named *key* of type *type*. + + :param key: string to search against an odML objects name. + :param type: type of an odML object. + :param findAll: include further matches after the first one in the result. + :param include_subtype: splits an objects type at '/' and matches the parts + against the provided type. + """ ret = [] if type: type = type.lower() - for s in self._sections: - if self._matches(s, key, type, include_subtype=include_subtype): + for sec in self._sections: + if self._matches(sec, key, type, include_subtype=include_subtype): if findAll: - ret.append(s) + ret.append(sec) else: - return s + return sec if ret: return ret @@ -533,7 +566,7 @@ def find_related(self, key=None, type=None, children=True, siblings=True, def get_path(self): """ - Returns the absolute path of this section + Returns the absolute path of this section. """ node = self path = [] @@ -543,24 +576,24 @@ def get_path(self): return "/" + "/".join(path) @staticmethod - def _get_relative_path(a, b): + def _get_relative_path(path_a, path_b): """ - Returns a relative path for navigation from dir *a* to dir *b* + Returns a relative path for navigation from *path_a* to *path_b*. - If the common parent of both is "/", return an absolute path + If the common parent of both is "/", return an absolute path. """ - a += "/" - b += "/" - parent = posixpath.dirname(posixpath.commonprefix([a, b])) + path_a += "/" + path_b += "/" + parent = posixpath.dirname(posixpath.commonprefix([path_a, path_b])) if parent == "/": - return b[:-1] + return path_b[:-1] - a = posixpath.relpath(a, parent) - b = posixpath.relpath(b, parent) - if a == ".": - return b + path_a = posixpath.relpath(path_a, parent) + path_b = posixpath.relpath(path_b, parent) + if path_a == ".": + return path_b - return posixpath.normpath("../" * (a.count("/") + 1) + b) + return posixpath.normpath("../" * (path_a.count("/") + 1) + path_b) def get_relative_path(self, section): """ @@ -568,11 +601,11 @@ def get_relative_path(self, section): like (e.g. ../other_section) If the common parent of both sections is the document (i.e. /), - return an absolute path + return an absolute path. """ - a = self.get_path() - b = section.get_path() - return self._get_relative_path(a, b) + path_a = self.get_path() + path_b = section.get_path() + return self._get_relative_path(path_a, path_b) def clean(self): """ @@ -586,22 +619,24 @@ def clean(self): def clone(self, children=True, keep_id=False): """ - Clone this object recursively allowing to copy it independently - to another document + Clones this object recursively allowing to copy it independently + to another document. """ from odml.section import BaseSection obj = super(Sectionable, self).clone(children) obj._parent = None obj._sections = SmartList(BaseSection) if children: - for s in self._sections: - obj.append(s.clone(keep_id=keep_id)) + for sec in self._sections: + obj.append(sec.clone(keep_id=keep_id)) return obj @property def repository(self): - """ A URL to a terminology. """ + """ + A URL to a terminology. + """ return self._repository @repository.setter diff --git a/odml/doc.py b/odml/doc.py index 9fb01ce9..46a6cfd2 100644 --- a/odml/doc.py +++ b/odml/doc.py @@ -1,9 +1,12 @@ # -*- coding: utf-8 +""" +This module provides the Base Document class. +""" import uuid from . import base from . import dtypes -from . import format +from . import format as fmt from . import terminology from .tools.doc_inherit import inherit_docstring, allow_inherit_docstring @@ -17,7 +20,7 @@ class BaseDocument(base.Sectionable): properties. """ - _format = format.Document + _format = fmt.Document def __init__(self, author=None, date=None, version=None, repository=None, oid=None): super(BaseDocument, self).__init__() @@ -26,8 +29,8 @@ def __init__(self, author=None, date=None, version=None, repository=None, oid=No self._id = str(uuid.UUID(oid)) else: self._id = str(uuid.uuid4()) - except ValueError as e: - print(e) + except ValueError as exc: + print(exc) self._id = str(uuid.uuid4()) self._author = author self._version = version @@ -153,10 +156,12 @@ def pprint(self, indent=2, max_depth=1, max_length=80, current_depth=0): :param current_depth: number of hierarchical levels printed from the starting Section. """ - doc_str = "[{} [{}] {}, sections: {}, repository: {}]".format(self.author, self.version, - self.date, len(self._sections), self.repository) + annotation = "{} [{}] {}".format(self.author, self.version, self.date) + sec_num = "sections: {}".format(len(self._sections)) + repo = "repository: {}".format(self.repository) + doc_str = "[{}, {}, {}]".format(annotation, sec_num, repo) print(doc_str) - for s in self._sections: - s.pprint(current_depth=current_depth+1, max_depth=max_depth, - indent=indent, max_length=max_length) + for sec in self._sections: + sec.pprint(current_depth=current_depth+1, max_depth=max_depth, + indent=indent, max_length=max_length) diff --git a/odml/dtypes.py b/odml/dtypes.py index 69cae553..931a934e 100644 --- a/odml/dtypes.py +++ b/odml/dtypes.py @@ -1,3 +1,7 @@ +""" +Provides functionality for validation of the data-types specified for odML +""" + import datetime as dt import re import sys @@ -6,10 +10,6 @@ self = sys.modules[__name__].__dict__ -""" -Provides functionality for validation of the data-types specified for odML -""" - try: unicode = unicode except NameError: @@ -21,6 +21,9 @@ class DType(str, Enum): + """ + The DType class enumerates all data types supported by odML. + """ string = 'string' text = 'text' int = 'int' @@ -37,6 +40,12 @@ def __str__(self): def default_values(dtype): + """ + Returns the default value for a provided odml data type. + + :param dtype: odml.DType or string corresponding to an odml data type. + :returns: default value for an identified odml data type or empty string. + """ dtype = dtype.lower() default_dtype_value = { 'string': '', @@ -66,6 +75,12 @@ def default_values(dtype): def infer_dtype(value): + """ + Tries to identify the odml data type for a provided value. + + :param value: single value to infer the odml datatype from. + :returns: The identified dtype name. If it cannot be identified, "string" is returned. + """ dtype = (type(value)).__name__ if dtype in _dtype_map: dtype = _dtype_map[dtype] @@ -75,13 +90,15 @@ def infer_dtype(value): dtype = 'text' return dtype - # If unable to infer a dtype of given value, return default as *string* return 'string' def valid_type(dtype): """ Checks if *dtype* is a valid odML value data type. + + :param dtype: odml.DType or string corresponding to an odml data type. + :returns: Boolean. """ if dtype is None: return True @@ -106,7 +123,14 @@ def valid_type(dtype): def get(string, dtype=None): """ - Convert *string* to the corresponding *dtype* + Converts *string* to the corresponding *dtype*. + The appropriate function is derived from the provided dtype. + If no dtype is provided, the string conversion function is used by default. + + :param string: string to be converted into an odml specific value. + :param dtype: odml.DType or string corresponding to an odml data type. + If provided it is used to identify the appropriate conversion function. + :returns: value converted to the appropriate data type. """ if not dtype: return str_get(string) @@ -118,7 +142,13 @@ def get(string, dtype=None): def set(value, dtype=None): """ - Serialize a *value* of type *dtype* to a unicode string + Serializes a *value* of type *dtype* to a unicode string. + The appropriate function is derived from the provided dtype. + + :param value: odml specific value to be converted into a string. + :param dtype: odml.DType or string corresponding to an odml data type. + If provided it is used to identify the appropriate conversion function. + :returns: value converted to an appropriately formatted string. """ if not dtype: return str_set(value) @@ -134,6 +164,13 @@ def set(value, dtype=None): def int_get(string): + """ + Converts an input string to an integer value. If *string* is empty + the default value for int is returned. + + :param string: string value to convert to int. + :return: Integer value. + """ if string is None or string == "": return default_values("int") @@ -145,6 +182,13 @@ def int_get(string): def float_get(string): + """ + Converts an input string to a float value. If *string* is empty + the default value for float is returned. + + :param string: string value to convert to int. + :return: Float value. + """ if string is None or string == "": return default_values("float") @@ -152,6 +196,12 @@ def float_get(string): def str_get(string): + """ + Handles an input string value and escapes None and empty collections. + + :param string: value to check for None value or empty collections. + :return: string value. + """ # Do not stringify empty list or dict but make sure boolean False gets through. if string in [None, "", [], {}]: return default_values("string") @@ -170,6 +220,14 @@ def str_get(string): def time_get(string): + """ + Checks an input string against the required time format and converts it to + a time object with the default format. If *string* is empty the default + value for time is returned. + + :param string: string value to convert to time. + :return: time object. + """ if string is None or string == "": return default_values("time") @@ -183,6 +241,14 @@ def time_get(string): def date_get(string): + """ + Checks an input string against the required date format and converts it to + a date object with the default format. If *string* is empty the default + value for date is returned. + + :param string: string value to convert to date. + :return: date object. + """ if string is None or string == "": return default_values("date") @@ -196,6 +262,14 @@ def date_get(string): def datetime_get(string): + """ + Checks an input string against the required datetime format and converts + it to a datetime object with the default format. If *string* is empty the + default value for datetime is returned. + + :param string: string value to convert to datetime. + :return: datetime object. + """ if string is None or string == "": return default_values("datetime") @@ -209,6 +283,16 @@ def datetime_get(string): def boolean_get(string): + """ + Handles an input string value and escapes None and empty collections and + provides the default boolean value in these cases. String values + "true", "1", True, "t" are interpreted as boolean True, string values + "false", "0", False, "f" are interpreted as boolean False. + A ValueError is raised if the input value cannot be interpreted as boolean. + + :param string: value to convert to boolean. + :return: boolean value. + """ if string in [None, "", [], {}]: return default_values("boolean") @@ -236,7 +320,11 @@ def boolean_get(string): def tuple_get(string, count=None): """ - Parse a tuple string like "(1024;768)" and return strings of the elements + Parses a tuple string like "(1024;768)" and return a list of strings with the + individual tuple elements. + + :param string: string to be parsed into odML style tuples. + :param count: list of strings. """ if not string: return None @@ -250,6 +338,12 @@ def tuple_get(string, count=None): def tuple_set(value): + """ + Serializes odml style tuples to a string representation. + + :param value: odml style tuple values. + :return: string. + """ if not value: return None return "(%s)" % ";".join(value) diff --git a/odml/fileio.py b/odml/fileio.py index d9e52695..ba10b97a 100644 --- a/odml/fileio.py +++ b/odml/fileio.py @@ -1,3 +1,7 @@ +""" +This module provides convenience functions for saving and loading of odML files. +""" + import os from .tools.odmlparser import ODMLReader, ODMLWriter diff --git a/odml/format.py b/odml/format.py index 3865fb5d..47825a2f 100644 --- a/odml/format.py +++ b/odml/format.py @@ -1,16 +1,20 @@ +""" +The module provides general format information and mappings of +XML and RDF attributes to their Python class equivalents. +""" + import sys from rdflib import Namespace import odml -""" -A module providing general format information -and mappings of xml-attributes to their python class equivalents -""" - class Format(object): + """ + Base format class for all odML object formats. The formats are required + when the corresponding odML objects are serialized to or loaded from files. + """ _name = "" _args = {} _map = {} @@ -73,11 +77,11 @@ def revmap(self, name): # create the reverse map only if requested self._rev_map = {} if sys.version_info < (3, 0): - for k, v in self._map.iteritems(): - self._rev_map[v] = k + for k, val in self._map.iteritems(): + self._rev_map[val] = k else: - for k, v in self._map.items(): - self._rev_map[v] = k + for k, val in self._map.items(): + self._rev_map[val] = k return self._rev_map.get(name, name) def __iter__(self): @@ -86,10 +90,19 @@ def __iter__(self): yield self.map(k) def create(self, *args, **kargs): + """ + This method will call the init method of the odML class implementation + corresponding to the specific format odML class and return the initialised + class instance. e.g. format.Document.create() will return an initialised + odml.Document instance. + """ return getattr(odml, self.__class__.__name__)(*args, **kargs) class Property(Format): + """ + The format class for the odml Property class. + """ _name = "property" _ns = Format._ns _rdf_type = _ns.Property @@ -126,6 +139,9 @@ class Property(Format): class Section(Format): + """ + The format class for the odml Section class. + """ _name = "section" _ns = Format._ns _rdf_type = _ns.Section @@ -159,6 +175,9 @@ class Section(Format): class Document(Format): + """ + The format class for the odml Document class. + """ _name = "odML" _ns = Format._ns _rdf_type = _ns.Document @@ -178,7 +197,7 @@ class Document(Format): 'id': _ns.hasId, 'author': _ns.hasAuthor, 'date': _ns.hasDate, - 'version': _ns.hasDocVersion, # discuss about the changes to the data model + 'version': _ns.hasDocVersion, 'repository': _ns.hasTerminology, 'sections': _ns.hasSection } diff --git a/odml/property.py b/odml/property.py index 8801325d..26a391d5 100644 --- a/odml/property.py +++ b/odml/property.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 - +""" +This module provides the Base Property class. +""" import uuid from . import base @@ -10,54 +12,61 @@ @allow_inherit_docstring class BaseProperty(base.BaseObject): - """An odML Property""" + """ + An odML Property. + + If a value without an explicitly stated dtype has been provided, dtype will + be inferred from the value. + + Example: + >>> p = Property("property1", "a string") + >>> p.dtype + >>> str + >>> p = Property("property1", 2) + >>> p.dtype + >>> int + >>> p = Property("prop", [2, 3, 4]) + >>> p.dtype + >>> int + + :param name: The name of the Property. + :param values: Some data value, it can be a single value or + a list of homogeneous values. + :param parent: the parent object of the new Property. If the object is not an + odml.Section a ValueError is raised. + :param unit: The unit of the stored data. + :param uncertainty: The uncertainty (e.g. the standard deviation) + associated with a measure value. + :param reference: A reference (e.g. an URL) to an external definition + of the value. + :param definition: The definition of the Property. + :param dependency: Another Property this Property depends on. + :param dependency_value: Dependency on a certain value. + :param dtype: The data type of the values stored in the property, + if dtype is not given, the type is deduced from the values. + Check odml.DType for supported data types. + :param value_origin: Reference where the value originated from e.g. a file name. + :param oid: object id, UUID string as specified in RFC 4122. If no id is provided, + an id will be generated and assigned. An id has to be unique + within an odML Document. + :param value: Legacy code to the 'values' attribute. If 'values' is provided, + any data provided via 'value' will be ignored. + """ + _format = frmt.Property def __init__(self, name=None, values=None, parent=None, unit=None, uncertainty=None, reference=None, definition=None, dependency=None, dependency_value=None, dtype=None, value_origin=None, oid=None, value=None): - """ - Create a new Property. If a value without an explicitly stated dtype - has been provided, the method will try to infer the value's dtype. - Example: - >>> p = Property("property1", "a string") - >>> p.dtype - >>> str - >>> p = Property("property1", 2) - >>> p.dtype - >>> int - >>> p = Property("prop", [2, 3, 4]) - >>> p.dtype - >>> int - :param name: The name of the property. - :param values: Some data value, it can be a single value or - a list of homogeneous values. - :param unit: The unit of the stored data. - :param uncertainty: The uncertainty (e.g. the standard deviation) - associated with a measure value. - :param reference: A reference (e.g. an URL) to an external definition - of the value. - :param definition: The definition of the property. - :param dependency: Another property this property depends on. - :param dependency_value: Dependency on a certain value. - :param dtype: The data type of the values stored in the property, - if dtype is not given, the type is deduced from the values. - Check odml.DType for supported data types. - :param value_origin: Reference where the value originated from e.g. a file name. - :param oid: object id, UUID string as specified in RFC 4122. If no id is provided, - an id will be generated and assigned. An id has to be unique - within an odML Document. - :param value: Legacy code to the 'values' attribute. If 'values' is provided, - any data provided via 'value' will be ignored. - """ + try: if oid is not None: self._id = str(uuid.UUID(oid)) else: self._id = str(uuid.uuid4()) - except ValueError as e: - print(e) + except ValueError as exc: + print(exc) self._id = str(uuid.uuid4()) # Use id if no name was provided. @@ -82,7 +91,7 @@ def __init__(self, name=None, values=None, parent=None, unit=None, self._values = [] self.values = values - if not values and (value or isinstance(value, bool) or isinstance(value, int)): + if not values and (value or isinstance(value, (bool, int))): self.values = value self.parent = parent @@ -110,7 +119,7 @@ def __repr__(self): @property def oid(self): """ - The uuid for the property. Required for entity creation and comparison, + The uuid of the Property. Required for entity creation and comparison, saving and loading. """ return self.id @@ -118,7 +127,7 @@ def oid(self): @property def id(self): """ - The uuid for the property. + The uuid of the Property. """ return self._id @@ -127,6 +136,7 @@ def new_id(self, oid=None): new_id sets the object id of the current object to an RFC 4122 compliant UUID. If an id was provided, it is assigned if it is RFC 4122 UUID format compliant. If no id was provided, a new UUID is generated and assigned. + :param oid: UUID string as specified in RFC 4122. """ if oid is not None: @@ -136,6 +146,9 @@ def new_id(self, oid=None): @property def name(self): + """ + The name of the Property. + """ return self._name @name.setter @@ -182,7 +195,7 @@ def dtype(self, new_type): @property def parent(self): """ - The section containing this property. + The Section containing this Property. """ return self._parent @@ -205,6 +218,12 @@ def parent(self, new_parent): @staticmethod def _validate_parent(new_parent): + """ + Checks whether a provided object is a valid odml.Section. + + :param new_parent: object to check whether it is an odml.Section. + :returns: Boolean whether the object is an odml.Section or not. + """ from odml.section import BaseSection if isinstance(new_parent, BaseSection): return True @@ -243,9 +262,9 @@ def _validate_values(self, values): :param values: an iterable that contains the values. """ - for v in values: + for val in values: try: - dtypes.get(v, self.dtype) + dtypes.get(val, self.dtype) except Exception: return False return True @@ -329,16 +348,21 @@ def values(self, new_value): if not self._validate_values(new_value): if self._dtype in ("date", "time", "datetime"): - raise ValueError("odml.Property.values: passed values are not of " - "consistent type \'%s\'! Format should be \'%s\'." % - (self._dtype, dtypes.default_values(self._dtype))) + req_format = dtypes.default_values(self._dtype) + msg = "odml.Property.values: passed values are not of consistent type " + msg += "\'%s\'! Format should be \'%s\'." % (self._dtype, req_format) + raise ValueError(msg) else: - raise ValueError("odml.Property.values: passed values are not of " - "consistent type!") + msg = "odml.Property.values: passed values are not of consistent type!" + raise ValueError(msg) self._values = [dtypes.get(v, self.dtype) for v in new_value] @property def value_origin(self): + """ + Reference where the value originated from e.g. a file name. + :returns: the value_origin of the Property. + """ return self._value_origin @value_origin.setter @@ -349,6 +373,11 @@ def value_origin(self, new_value): @property def uncertainty(self): + """ + The uncertainty (e.g. the standard deviation) associated with + the values of the Property. + :returns: the uncertainty of the Property. + """ return self._uncertainty @uncertainty.setter @@ -367,6 +396,10 @@ def uncertainty(self, new_value): @property def unit(self): + """ + The unit associated with the values of the Property. + :returns: the unit of the Property. + """ return self._unit @unit.setter @@ -377,6 +410,10 @@ def unit(self, new_value): @property def reference(self): + """ + A reference (e.g. an URL) to an external definition of the value. + :returns: the reference of the Property. + """ return self._reference @reference.setter @@ -387,6 +424,9 @@ def reference(self, new_value): @property def definition(self): + """ + :returns the definition of the Property: + """ return self._definition @definition.setter @@ -397,6 +437,10 @@ def definition(self, new_value): @property def dependency(self): + """ + Another Property this Property depends on. + :returns: the dependency of the Property. + """ return self._dependency @dependency.setter @@ -407,6 +451,10 @@ def dependency(self, new_value): @property def dependency_value(self): + """ + Dependency on a certain value in a dependency Property. + :returns: the required value to be found in a dependency Property. + """ return self._dependency_value @dependency_value.setter @@ -592,11 +640,15 @@ def extend(self, obj, strict=True): return new_value = self._convert_value_input(obj) - if len(new_value) > 0 and strict and dtypes.infer_dtype(new_value[0]) != self.dtype: + if len(new_value) > 0 and strict and \ + dtypes.infer_dtype(new_value[0]) != self.dtype: - if not (dtypes.infer_dtype(new_value[0]) == "string" and self.dtype in dtypes.special_dtypes): - raise ValueError("odml.Property.extend: passed value data type found (\"%s\") " - "does not match expected dtype \"%s\"!" % (dtypes.infer_dtype(new_value[0]), self._dtype)) + type_check = dtypes.infer_dtype(new_value[0]) + if not (type_check == "string" and self.dtype in dtypes.special_dtypes): + msg = "odml.Property.extend: passed value data type found " + msg += "(\"%s\") does not match expected dtype \"%s\"!" % (type_check, + self._dtype) + raise ValueError(msg) if not self._validate_values(new_value): raise ValueError("odml.Property.extend: passed value(s) cannot be converted " @@ -624,11 +676,15 @@ def append(self, obj, strict=True): if len(new_value) > 1: raise ValueError("odml.property.append: Use extend to add a list of values!") - if len(new_value) > 0 and strict and dtypes.infer_dtype(new_value[0]) != self.dtype: + if len(new_value) > 0 and strict and \ + dtypes.infer_dtype(new_value[0]) != self.dtype: - if not (dtypes.infer_dtype(new_value[0]) == "string" and self.dtype in dtypes.special_dtypes): - raise ValueError("odml.Property.append: passed value data type found (\"%s\") " - "does not match expected dtype \"%s\"!" % (dtypes.infer_dtype(new_value[0]), self._dtype)) + type_check = dtypes.infer_dtype(new_value[0]) + if not (type_check == "string" and self.dtype in dtypes.special_dtypes): + msg = "odml.Property.append: passed value data type found " + msg += "(\"%s\") does not match expected dtype \"%s\"!" % (type_check, + self._dtype) + raise ValueError(msg) if not self._validate_values(new_value): raise ValueError("odml.Property.append: passed value(s) cannot be converted " @@ -672,6 +728,8 @@ def export_leaf(self): """ Export only the path from this property to the root. Include all properties of parent sections. + + :returns: cloned odml tree to the root of the current document. """ curr = self.parent par = self.parent diff --git a/odml/rdf/fuzzy_finder.py b/odml/rdf/fuzzy_finder.py index 12ec197a..656faeae 100644 --- a/odml/rdf/fuzzy_finder.py +++ b/odml/rdf/fuzzy_finder.py @@ -1,11 +1,15 @@ +""" +This module provides the FuzzyFinder class that enables querying +an odML RDF graph using abstract parameters. +""" from .query_creator import QueryCreator, QueryParser, QueryParserFuzzy class FuzzyFinder(object): """ - FuzzyFinder tool for querying graph through 'fuzzy' queries. - If the user do not know exact attributes and structure of the odML data model, - the finder executes multiple queries to better match the parameters and returns sets of triples. + FuzzyFinder tool for querying graph through 'fuzzy' queries. If the user does not + know the exact attributes and structure of the odML data model, the finder executes + multiple queries to better match the parameters and returns sets of triples. """ def __init__(self, graph=None, q_params=None): self.graph = graph @@ -13,30 +17,34 @@ def __init__(self, graph=None, q_params=None): self.prepared_queries_list = [] self._subsets = [] - def find(self, mode='fuzzy', graph=None, q_str=None, q_params=None): - # TODO warn users if they added non-odml attributes ('naming' instead of 'name' e.g.) + def find(self, mode="fuzzy", graph=None, q_str=None, q_params=None): """ - Apply set of queries to the graph and returns info that was retrieved from queries. - - :param mode: define the type of parser which will be used for parsing parameters or queries. - Please find our more info about concrete parsers in odml/tool/query_creator.py or tutorials. + Applies set of queries to the graph and returns info + that was retrieved from queries. + + :param mode: define the type of parser which will be used for parsing + parameters or queries. Please find our more info about concrete + parsers in odml/tool/query_creator.py or tutorials. :param graph: graph object. :param q_str: query string which used in QueryCreator class. - Example for QueryParser: doc(author:D. N. Adams) section(name:Stimulus) prop(name:Contrast, value:20, unit:%) - Example for QueryParserFuzzy: "FIND sec(name) prop(type) HAVING Stimulus, Contrast" + Example for QueryParser: doc(author:D. N. Adams) + section(name:Stimulus) prop(name:Contrast, value:20, unit:%) + Example for QueryParserFuzzy: "FIND sec(name) prop(type) + HAVING Stimulus, Contrast" :param q_params: dictionary object with set of parameters for a query - Example for QueryParser: {'Sec': [('name', 'Stimulus')], - 'Doc': [('author', 'D. N. Adams')], - 'Prop': [('name', 'Contrast'), ('value':[20, 25]), ('unit':'%')]} - Example for QueryParserFuzzy: {'Sec': ['name', 'type'], - 'Doc': ['author'], - 'Search': ['Stimulus', 'Contrast']} + Example for QueryParser: + {'Sec': [('name', 'Stimulus')], + Doc': [('author', 'D. N. Adams')], + 'Prop': [('name', 'Contrast'), ('value':[20, 25]), ('unit':'%')]} + Example for QueryParserFuzzy: + {'Sec': ['name', 'type'], 'Doc': ['author'], + 'Search': ['Stimulus','Contrast']} :return: string which contains set of triples. """ - if mode == 'fuzzy': + if mode == "fuzzy": q_parser = QueryParserFuzzy() pairs_generator = self._generate_parameters_pairs_fuzzy - elif mode == 'match': + elif mode == "match": q_parser = QueryParser() pairs_generator = self._generate_parameters_pairs else: @@ -56,18 +64,21 @@ def _validate_find_input_attributes(self, graph, q_str, q_params, q_parser): self.graph = graph if q_str and q_params: - raise ValueError("Please pass query parameters only as a string or a dict object") + msg = "Please pass query parameters only as a string or a dict object" + raise ValueError(msg) if q_str: self.q_params = q_parser.parse_query_string(q_str) elif q_params: self.q_params = q_params else: - raise ValueError("Please pass query parameters either as a string or a dict object") + msg = "Please pass query parameters either as a string or a dict object" + raise ValueError(msg) def _generate_parameters_pairs(self): """ Example: {'Sec': [('name', 'some_name'), ('type', 'Stimulus')]} + :return: [('Sec', ('name', 'some_name')), ('Sec', ('type', 'Stimulus'))] """ parameters_pairs = [] @@ -76,27 +87,28 @@ def _generate_parameters_pairs(self): if key in self.q_params.keys(): object_attrs = self.q_params[key] for object_attr in object_attrs: - s = tuple([key, object_attr]) - parameters_pairs.append(s) + obj_pair = tuple([key, object_attr]) + parameters_pairs.append(obj_pair) + return parameters_pairs def _generate_parameters_pairs_fuzzy(self): """ - Generates set of tuples matching search select and where parts of fuzzy finder query - from dictionary of parameters. - + Generates set of tuples matching search select and where parts of + fuzzy finder query from dictionary of parameters. + Example: {'Sec': ['name', 'type'], 'Doc': ['author'], - 'Search': ['Stimulus', 'Contrast']} - :return: [('Sec', ('name', 'Stimulus')), ('Sec', ('name', 'Contrast')), + 'Search': ['Stimulus', 'Contrast']} + :return: [('Sec', ('name', 'Stimulus')), ('Sec', ('name', 'Contrast')), ('Sec', ('type', 'Stimulus')), ('Sec', ('name', 'Contrast')), ('Doc', ('author', 'Stimulus')), ('Doc', ('author', 'Contrast'))] """ parameters_pairs = [] search_values = [] possible_keys = QueryCreator.possible_q_dict_keys - if 'Search' in self.q_params.keys(): - search_values = self.q_params['Search'] + if "Search" in self.q_params.keys(): + search_values = self.q_params["Search"] for key in possible_keys: if key in self.q_params.keys(): @@ -108,7 +120,8 @@ def _generate_parameters_pairs_fuzzy(self): def _generate_parameters_subsets(self, attrs): """ - Generates the set of parameters to create queries from specific to more broad ones. + Generates the set of parameters to create queries + from specific to more broad ones. """ self._subsets = [] if len(attrs) > 0: @@ -119,12 +132,14 @@ def _generate_parameters_subsets(self, attrs): def _subsets_util_dfs(self, index, path, res, attrs): """ Generates all subsets of attrs set using Depth-first search. - Example (with numbers for explicity: [1,2,3] -> [[1], [2], [3], [1,2], [1,3], [2,3], [1,2,3]] - + Example (with numbers for explicity: + [1,2,3] -> [[1], [2], [3], [1,2], [1,3], [2,3], [1,2,3]] + :param index: help index for going through list. :param path: array for saving subsets. :param res: result subset. - :param attrs: input list of attrs e.g. [('Sec', ('name', 'some_name')), ('Sec', ('type', 'Stimulus'))] + :param attrs: input list of attrs e.g. [('Sec', ('name', 'some_name')), + ('Sec', ('type', 'Stimulus'))] """ if path: res.append(path) @@ -141,25 +156,22 @@ def _check_duplicate_attrs(attrs_list, attr): def _output_query_results(self): output_triples_string = "" - # TODO define when we want to stop loop, influence factors(time, previous result etc.) for query in self._subsets: - # FIXME we do need really need to have QueryCreator object - # put it here to write initial query to output string for explicity creator = self._prepare_query(query) - q = creator.get_query() + curr_query = creator.get_query() - triples = self._execute_query(q) + triples = self._execute_query(curr_query) if triples: output_triples_string += creator.query output_triples_string += triples - output_triples_string += '\n' + output_triples_string += "\n" return output_triples_string def _execute_query(self, query): """ Execute prepared query on the graph. - + :param query: prepared query object :return: string with output triples """ @@ -173,17 +185,17 @@ def _execute_query(self, query): def _build_output_str(row): """ Build output string depending on the query variables. - + :param row: rdflib query row. :return: string with values. """ out_str = "" possible_vars = QueryCreator.possible_query_variables - for v in possible_vars.keys(): + for curr_key in possible_vars.keys(): try: - val = getattr(row, v) - out_str += '{0}: {1}\n'.format(possible_vars[v], val) + val = getattr(row, curr_key) + out_str += "{0}: {1}\n".format(possible_vars[curr_key], val) except AttributeError: pass return out_str @@ -192,7 +204,7 @@ def _build_output_str(row): def _prepare_query(args): """ Return a query for given parameters. - + :param args: dict with list of odML object attributes for creation query Example: {'Sec': [('name', 'some_name'), ('type', 'Stimulus')]} :return: QueryCreator object. @@ -204,4 +216,5 @@ def _prepare_query(args): else: q_params[arg[0]] = [arg[1]] creator = QueryCreator(q_params) + return creator diff --git a/odml/rdf/query_creator.py b/odml/rdf/query_creator.py index f5047811..3a09999c 100644 --- a/odml/rdf/query_creator.py +++ b/odml/rdf/query_creator.py @@ -1,3 +1,10 @@ +""" +The module provides access to QueryParser and QueryCreator classes. +QueryParsers parse search strings to odml query dictionaries that can be +consumed by QueryCreators. QueryCreators create RDF queries from +provided odml query dictionaries. +""" + import re from abc import ABCMeta, abstractmethod @@ -10,32 +17,45 @@ class BaseQueryCreator: + """ + An abstract base class for odml specific QueryCreators. + """ __metaclass__ = ABCMeta - possible_query_variables = {'d': 'Document', 's': 'Section', - 'p': 'Property', 'v': 'Bag URI', 'value': 'Value'} + possible_query_variables = {"d": "Document", "s": "Section", + "p": "Property", "v": "Bag URI", "value": "Value"} - possible_q_dict_keys = ['Doc', 'Sec', 'Prop'] + possible_q_dict_keys = ["Doc", "Sec", "Prop"] def __init__(self, q_dict=None): """ :param q_dict: dictionary with query parameters """ self.q_dict = q_dict if q_dict else {} - self.query = '' + self.query = "" super(BaseQueryCreator, self).__init__() @abstractmethod def get_query(self, q_str, q_parser): + """ + Constructs a SPARQL query from an input string. + + :param q_str: input string. + :param q_parser: parser to use on the input string. + :return SPARQL query. + """ pass - + @abstractmethod def _prepare_query(self): pass class BaseQueryParser: + """ + An abstract base class for QueryParsers. + """ __metaclass__ = ABCMeta @@ -44,10 +64,19 @@ def __init__(self): @abstractmethod def parse_query_string(self, q_str): + """ + Parses an input string and return a dictionary consumable by a QueryCreator. + """ pass class QueryParserFuzzy(BaseQueryParser): + """ + This class parses an odml specific input string and uses + heuristics to approximate which Section or Property attributes + should be matched against multiple search parameters and constructs + an odml specific SPARQL query. + """ def __init__(self): super(QueryParserFuzzy, self).__init__() @@ -55,6 +84,7 @@ def __init__(self): def parse_query_string(self, q_str): """ Parse query string and returns dict object with parameters. + :param q_str: query string. Example: FIND sec(name, type) prop(type) HAVING Stimulus, Contrast :return: dict object. @@ -71,72 +101,81 @@ def parse_query_string(self, q_str): having_pattern = re.compile("HAVING(.*)") having_group = re.search(having_pattern, q_str).group(1).strip() if having_group: - if 'Search' in self.q_dict.keys(): - raise ValueError('Search values are already parsed') + if "Search" in self.q_dict.keys(): + raise ValueError("Search values are already parsed") self._parse_having(having_group) else: - raise ValueError('Search values in having part were not specified') + raise ValueError("Search values in having part were not specified") return self.q_dict def _parse_find(self, find_part): """ - Parses find string part into list of specific keys to whih search values would be apllied - e.g. 'sec(name, type) prop(name)' into {'Sec': ['name', 'type'], 'Prop': ['name']} . - - :param find_part: string which represent list of searchable odML data model objects - like document(doc), sections(sec) or properties(prop). - e.g. 'sec(name, type) prop(name)' + Parses find string part into list of specific keys to which search values + would be applied. e.g. 'sec(name, type) prop(name)' + into {'Sec': ['name', 'type'], 'Prop': ['name']}. + + :param find_part: string which represent list of searchable odML data model + objects like document(doc), sections(sec) or properties(prop). + e.g. 'sec(name, type) prop(name)' """ - doc_pattern = re.compile("(doc|document)\(.*?\)") + doc_pattern = re.compile("(doc|document)[(].*?[)]") doc = re.search(doc_pattern, find_part) if doc: self._parse_doc(doc) - sec_pattern = re.compile("(sec|section)\(.*?\)") + sec_pattern = re.compile("(sec|section)[(].*?[)]") sec = re.search(sec_pattern, find_part) if sec: self._parse_sec(sec) - prop_pattern = re.compile("(prop|property)\(.*?\)") + prop_pattern = re.compile("(prop|property)[(].*?[)]") prop = re.search(prop_pattern, find_part) if prop: self._parse_prop(prop) def _parse_doc(self, doc): - p = re.compile("[\(|, ](id|author|date|version|repository|sections)[\)|,]") + re_obj = re.compile("[(, ](id|author|date|version|repository|sections)[),]") if doc: - self.q_dict['Doc'] = re.findall(p, doc.group(0)) + self.q_dict["Doc"] = re.findall(re_obj, doc.group(0)) def _parse_sec(self, sec): - p = re.compile("[\(|, ](id|name|definition|type|repository|reference|sections|properties)[\)|,]") + attr_list = "id|name|definition|type|repository|reference|sections|properties" + pattern = "[(, ](%s)[),]" % attr_list + re_obj = re.compile(pattern) if sec: - self.q_dict['Sec'] = re.findall(p, sec.group(0)) + self.q_dict["Sec"] = re.findall(re_obj, sec.group(0)) def _parse_prop(self, prop): - p = re.compile("[\(|, ](id|name|definition|dtype|unit|uncertainty|reference|value_origin)[\)|,]") + attr_list = "id|name|definition|dtype|unit|uncertainty|reference|value_origin" + pattern = "[(, ](%s)[),]" % attr_list + re_obj = re.compile(pattern) if prop: - self.q_dict['Prop'] = re.findall(p, prop.group(0)) + self.q_dict["Prop"] = re.findall(re_obj, prop.group(0)) def _parse_having(self, having_part): """ - Parses search value string into list of specific values + Parses search value string into list of specific values. e.g. 'Stimulus, Contrast, Date' into list [Stimulus, Contrast, Date]. - + :param having_part: string with search values, e.g. 'Stimulus, Contrast' - Also spaces errors in the string like 'Stimulus, , Contrast' will be ignored. + Also spaces errors in the string like 'Stimulus, , Contrast' + will be ignored. """ search_values_list = [] search_params = re.compile("(.*?)(?:,|$)") if having_part: search_values = re.findall(search_params, having_part) - for v in search_values: - if v.strip(): - search_values_list.append(v.strip()) - self.q_dict['Search'] = search_values_list + for val in search_values: + if val.strip(): + search_values_list.append(val.strip()) + self.q_dict["Search"] = search_values_list class QueryParser(BaseQueryParser): + """ + This class parses an odml specific input string into an odml specific SPARQL query. + """ def __init__(self): super(QueryParser, self).__init__() @@ -144,62 +183,71 @@ def __init__(self): def parse_query_string(self, q_str): """ :param q_str: query string - Example: doc(author:D. N. Adams) section(name:Stimulus) prop(name:Contrast, value:20, unit:%) + Example: doc(author:D. N. Adams) section(name:Stimulus) + prop(name:Contrast, value:20, unit:%) :return: dict object Example: {'Sec': [('name', 'Stimulus')], 'Doc': [('author', 'D. N. Adams')], 'Prop': [('name', 'Contrast'), ('value':[20]), ('unit':'%')]} """ - doc_pattern = re.compile("(doc|document)\(.*?\)") + doc_pattern = re.compile("(doc|document)[(].*?[)]") doc = re.search(doc_pattern, q_str) if doc: self._parse_doc(doc) - sec_pattern = re.compile("(sec|section)\(.*?\)") + sec_pattern = re.compile("(sec|section)[(].*?[)]") sec = re.search(sec_pattern, q_str) if sec: self._parse_sec(sec) - prop_pattern = re.compile("(prop|property)\(.*?\)") + prop_pattern = re.compile("(prop|property)[(].*?[)]") prop = re.search(prop_pattern, q_str) if prop: self._parse_prop(prop) - + return self.q_dict def _parse_doc(self, doc): - p = re.compile("[, |\(](id|author|date|version|repository|sections):(.*?)[,|\)]") + attr_list = "id|author|date|version|repository|sections" + pattern = "[, (](%s):(.*?)[,)]" % attr_list + re_obj = re.compile(pattern) if doc: - self.q_dict['Doc'] = re.findall(p, doc.group(0)) + self.q_dict["Doc"] = re.findall(re_obj, doc.group(0)) def _parse_sec(self, sec): - p = re.compile("[, |\(](id|name|definition|type|repository|reference|sections|properties):(.*?)[,|\)]") + attr_list = "id|name|definition|type|repository|reference|sections|properties" + pattern = "[, (](%s):(.*?)[,)]" % attr_list + re_obj = re.compile(pattern) if sec: - self.q_dict['Sec'] = re.findall(p, sec.group(0)) + self.q_dict["Sec"] = re.findall(re_obj, sec.group(0)) def _parse_prop(self, prop): - p = re.compile("[, |\(](id|name|definition|dtype|unit|uncertainty|reference|value_origin):(.*?)[,|\)]") + attr_list = "id|name|definition|dtype|unit|uncertainty|reference|value_origin" + pattern = "[, (](%s):(.*?)[,)]" % attr_list + re_obj = re.compile(pattern) if prop: - self.q_dict['Prop'] = re.findall(p, prop.group(0)) + self.q_dict["Prop"] = re.findall(re_obj, prop.group(0)) - p_value = re.compile("value:\[(.*)]") + p_value = re.compile(r"value:\[(.*)]") value_group = re.findall(p_value, prop.group(0)) if value_group: values = re.split(", ?", value_group[0]) - self.q_dict['Prop'].append(('value', values)) + self.q_dict["Prop"].append(("value", values)) class QueryCreator(BaseQueryCreator): - """ - Class for simplifying the creation of prepared SPARQL queries - + """ + Class for simplifying the creation of prepared SPARQL queries. + Usage: - q = "doc(author:D. N. Adams) section(name:Stimulus) prop(name:Contrast, value:20, unit:%)" - prepared_query = QueryCreator().get_query(q, QueryParser()) - - q = "FIND sec(name, type) prop(name) HAVING Recording, Recording-2012-04-04-ab, Date" - prepared_query = QueryCreator().get_query(q, QueryParser2()) + query = "doc(author:D. N. Adams) section(name:Stimulus) + prop(name:Contrast, value:20, unit:%)" + prepared_query = QueryCreator().get_query(query, QueryParser()) + + query = "FIND sec(name, type) prop(name) HAVING Recording, + Recording-2012-04-04-ab, Date" + prepared_query = QueryCreator().get_query(query, QueryParser2()) """ def __init__(self, q_dict=None): @@ -211,11 +259,10 @@ def __init__(self, q_dict=None): def get_query(self, q_str=None, q_parser=None): """ :param q_parser: one of possible query parsers. - :param q_str: doc(author:D. N. Adams) section(name:Stimulus) prop(name:Contrast, value:20, unit:%) - :return rdflib prepare query. + :param q_str: doc(author:D. N. Adams) section(name:Stimulus) + prop(name:Contrast, value:20, unit:%) + :return rdflib prepared query. """ - # TODO find out if the validation for the q_str is important - # We can possibly warn about not used parts and print the parsed dictionary if not self.q_dict: if not q_str: raise AttributeError("Please fulfill q_str param (query string)") @@ -223,62 +270,69 @@ def get_query(self, q_str=None, q_parser=None): raise AttributeError("Please fulfill q_parser param (query parser)") self.q_dict = q_parser.parse_query_string(q_str) self._prepare_query() - return prepareQuery(self.query, initNs={"odml": Namespace("https://g-node.org/odml-rdf#"), - "rdf": RDF}) + + use_ns = {"odml": Namespace("https://g-node.org/odml-rdf#"), "rdf": RDF} + return prepareQuery(self.query, initNs=use_ns) def _prepare_query(self): """ Creates rdflib query using parameters from self.q_dict. + :return: string representing rdflib query. """ odml_uri = "https://g-node.org/odml-rdf#" - self.query = 'SELECT * WHERE {\n' + self.query = "SELECT * WHERE {\n" - if 'Doc' in self.q_dict.keys(): - doc_attrs = self.q_dict['Doc'] + if "Doc" in self.q_dict.keys(): + doc_attrs = self.q_dict["Doc"] if len(doc_attrs) > 0: - self.query += '?d rdf:type odml:Document .\n' + self.query += "?d rdf:type odml:Document .\n" for i in doc_attrs: if len(i) > 2: - raise ValueError("Attributes in the query \"{}\" are not valid.".format(i)) + msg = "Attributes in the query \"{}\" are not valid.".format(i) + raise ValueError(msg) else: attr = Document.rdf_map(i[0]) if attr: - self.query += '?d {0} \"{1}\" .\n'.format(re.sub(odml_uri, - "odml:", attr), i[1]) - if 'Sec' in self.q_dict.keys(): - sec_attrs = self.q_dict['Sec'] + re_sub = re.sub(odml_uri, "odml:", attr) + self.query += "?d {0} \"{1}\" .\n".format(re_sub, i[1]) + + if "Sec" in self.q_dict.keys(): + sec_attrs = self.q_dict["Sec"] if len(sec_attrs) > 0: - self.query += '?d odml:hasSection ?s .\n' \ - '?s rdf:type odml:Section .\n' + self.query += "?d odml:hasSection ?s .\n" + self.query += "?s rdf:type odml:Section .\n" for i in sec_attrs: if len(i) > 2: - raise ValueError("Attributes in the query \"{}\" are not valid.".format(i)) + msg = "Attributes in the query \"{}\" are not valid.".format(i) + raise ValueError(msg) else: attr = Section.rdf_map(i[0]) if attr: - self.query += '?s {0} \"{1}\" .\n'.format(re.sub(odml_uri, - "odml:", attr), i[1]) - if 'Prop' in self.q_dict.keys(): - prop_attrs = self.q_dict['Prop'] + re_sub = re.sub(odml_uri, "odml:", attr) + self.query += "?s {0} \"{1}\" .\n".format(re_sub, i[1]) + + if "Prop" in self.q_dict.keys(): + prop_attrs = self.q_dict["Prop"] if len(prop_attrs) > 0: - self.query += '?s odml:hasProperty ?p .\n' \ - '?p rdf:type odml:Property .\n' + self.query += "?s odml:hasProperty ?p .\n" + self.query += "?p rdf:type odml:Property .\n" for i in prop_attrs: if len(i) > 2: - raise ValueError("Attributes in the query \"{}\" are not valid.".format(i)) - elif i[0] == 'value': + msg = "Attributes in the query \"{}\" are not valid.".format(i) + raise ValueError(msg) + elif i[0] == "value": values = i[1] if values: self.query += "?p odml:hasValue ?v .\n?v rdf:type rdf:Bag .\n" - for v in values: - self.query += '?v rdf:li \"{}\" .\n'.format(v) + for val in values: + self.query += "?v rdf:li \"{}\" .\n".format(val) else: attr = Property.rdf_map(i[0]) if attr: - self.query += '?p {0} \"{1}\" .\n'.format(re.sub(odml_uri, - "odml:", attr), i[1]) + re_sub = re.sub(odml_uri, "odml:", attr) + self.query += "?p {0} \"{1}\" .\n".format(re_sub, i[1]) - self.query += '}\n' + self.query += "}\n" return self.query diff --git a/odml/section.py b/odml/section.py index 77c2ae28..23fd2805 100644 --- a/odml/section.py +++ b/odml/section.py @@ -1,8 +1,16 @@ # -*- coding: utf-8 +""" +This module provides the Base Section class. +""" import uuid +try: + from collections.abc import Iterable +except ImportError: + from collections import Iterable + from . import base -from . import format +from . import format as fmt from . import terminology from .doc import BaseDocument # this is supposedly ok, as we only use it for an isinstance check @@ -10,21 +18,35 @@ # it MUST however not be used to create any Property objects from .tools.doc_inherit import inherit_docstring, allow_inherit_docstring -try: - from collections.abc import Iterable -except ImportError: - from collections import Iterable @allow_inherit_docstring class BaseSection(base.Sectionable): - """ An odML Section """ + """ + An odML Section. + + :param name: string providing the name of the Section. If the name is not + provided, the uuid of the Property is assigned as its name. + :param type: String providing a grouping description for similar Sections. + :param parent: the parent object of the new Section. If the object is not + an odml.Section or an odml.Document, a ValueError is raised. + :param definition: String describing the definition of the Section. + :param reference: A reference (e.g. an URL) to an external definition + of the Section. + :param repository: URL to a repository where this Section can be found. + :param link: Specifies a soft link, i.e. a path within the document. + :param include: Specifies an arbitrary URL. Can only be used if *link* is not set. + :param oid: object id, UUID string as specified in RFC 4122. If no id is provided, + an id will be generated and assigned. An id has to be unique + within an odML Document. + """ + type = None reference = None # the *import* property _link = None _include = None _merged = None - _format = format.Section + _format = fmt.Section def __init__(self, name=None, type=None, parent=None, definition=None, reference=None, @@ -39,8 +61,8 @@ def __init__(self, name=None, type=None, parent=None, self._id = str(uuid.UUID(oid)) else: self._id = str(uuid.uuid4()) - except ValueError as e: - print(e) + except ValueError as exc: + print(exc) self._id = str(uuid.uuid4()) # Use id if no name was provided. @@ -68,7 +90,7 @@ def __repr__(self): def __iter__(self): """ - Iterate over each section and property contained in this section + Iterate over each Section and Property contained in this Section. """ for section in self._sections: yield section @@ -77,14 +99,14 @@ def __iter__(self): def __len__(self): """ - Number of children (sections AND properties) + Number of children (Sections AND Properties). """ return len(self._sections) + len(self._props) @property def oid(self): """ - The uuid for the section. Required for entity creation and comparison, + The uuid for the Section. Required for entity creation and comparison, saving and loading. """ return self.id @@ -101,6 +123,7 @@ def new_id(self, oid=None): new_id sets the id of the current object to a RFC 4122 compliant UUID. If an id was provided, it is assigned if it is RFC 4122 UUID format compliant. If no id was provided, a new UUID is generated and assigned. + :param oid: UUID string as specified in RFC 4122. """ if oid is not None: @@ -110,6 +133,9 @@ def new_id(self, oid=None): @property def name(self): + """ + The name of the Section. + """ return self._name @name.setter @@ -128,7 +154,7 @@ def include(self): """ The same as :py:attr:`odml.section.BaseSection.link`, except that include specifies an arbitrary url instead of a local path within - the same document + the same document. """ return self._include @@ -169,7 +195,7 @@ def include(self, new_value): @property def link(self): """ - Specifies a softlink, i.e. a path within the document + A softlink, i.e. a path within the document. When the merge()-method is called, the link will be resolved creating according copies of the section referenced by the link attribute. When the unmerge() method is called (happens when running clean()) @@ -209,11 +235,10 @@ def link(self, new_value): @property def definition(self): - """ Name Definition of the section """ - if hasattr(self, "_definition"): - return self._definition - else: - return None + """ + The definition of the Section. + """ + return self._definition @definition.setter def definition(self, new_value): @@ -227,6 +252,10 @@ def definition(self): @property def reference(self): + """ + A reference (e.g. an URL) to an external definition of the Section. + :returns: The reference of the Section. + """ return self._reference @reference.setter @@ -240,23 +269,31 @@ def reference(self, new_value): # properties @property def properties(self): - """ The list of all properties contained in this section """ + """ + The list of all properties contained in this Section, + """ return self._props @property def props(self): - """ The list of all properties contained in this section; - NIXpy format style alias for 'properties'.""" + """ + The list of all properties contained in this Section; + NIXpy format style alias for 'properties'. + """ return self._props @property def sections(self): - """ The list of all child-sections of this section """ + """ + The list of all child-sections of this Section. + """ return self._sections @property def parent(self): - """ The parent section, the parent document or None """ + """ + The parent Section, Document or None. + """ return self._parent @parent.setter @@ -277,16 +314,21 @@ def parent(self, new_parent): "\nodml.Document or odml.Section expected") def _validate_parent(self, new_parent): - if isinstance(new_parent, BaseDocument) or \ - isinstance(new_parent, BaseSection): + """ + Checks whether a provided object is a valid odml.Section or odml.Document.. + + :param new_parent: object to check whether it is an odml.Section or odml.Document. + :returns: Boolean whether the object is an odml.Section, odml.Document or not. + """ + if isinstance(new_parent, (BaseDocument, BaseSection)): return True return False def get_repository(self): """ - Returns the repository responsible for this section, + Returns the repository responsible for this Section, which might not be the *repository* attribute, but may - be inherited from a parent section / the document + be inherited from a parent Section / the Document. """ if self._repository is None and self.parent is not None: return self.parent.get_repository() @@ -308,7 +350,7 @@ def get_terminology_equivalent(self): def get_merged_equivalent(self): """ - Return the merged object or None + Returns the merged object or None. """ return self._merged @@ -421,8 +463,8 @@ def clone(self, children=True, keep_id=False): obj._props = base.SmartList(BaseProperty) if children: - for p in self._props: - obj.append(p.clone(keep_id)) + for prop in self._props: + obj.append(prop.clone(keep_id)) return obj @@ -543,12 +585,12 @@ def unmerge(self, section): for obj in removals: self.remove(obj) - # The path may not be valid anymore, so make sure to update it - # however this does not reflect changes happening while the section - # is unmerged + # The path may not be valid anymore, so make sure to update it. + # However this does not reflect changes happening while the section + # is unmerged. if self._link is not None: - # TODO get_absolute_path, # TODO don't change if the section can - # still be reached using the old link + # TODO get_absolute_path + # TODO don't change if the section can still be reached using the old link self._link = self.get_relative_path(section) self._merged = None @@ -571,17 +613,17 @@ def can_be_merged(self): return self._link is not None or self._include is not None def _reorder(self, childlist, new_index): - l = childlist - old_index = l.index(self) + lst = childlist + old_index = lst.index(self) # 2 cases: insert after old_index / insert before if new_index > old_index: new_index += 1 - l.insert(new_index, self) + lst.insert(new_index, self) if new_index < old_index: - del l[old_index + 1] + del lst[old_index + 1] else: - del l[old_index] + del lst[old_index] return old_index def reorder(self, new_index): @@ -617,9 +659,10 @@ def create_property(self, name, value=None, dtype=None, oid=None): def pprint(self, indent=2, max_depth=1, max_length=80, current_depth=0): """ - Pretty print method to visualize Section-Property trees. + Pretty prints Section-Property trees for nicer visualization. :param indent: number of leading spaces for every child Section or Property. + :param max_depth: number of maximum child section layers to traverse and print. :param max_length: maximum number of characters printed in one line. :param current_depth: number of hierarchical levels printed from the starting Section. @@ -627,25 +670,27 @@ def pprint(self, indent=2, max_depth=1, max_length=80, current_depth=0): spaces = " " * (current_depth * indent) sec_str = "{} {} [{}]".format(spaces, self.name, self.type) print(sec_str) - for p in self.props: - p.pprint(current_depth=current_depth, indent=indent, - max_length=max_length) + for prop in self.props: + prop.pprint(current_depth=current_depth, indent=indent, + max_length=max_length) + if max_depth == -1 or current_depth < max_depth: - for s in self.sections: - s.pprint(current_depth=current_depth+1, max_depth=max_depth, - indent=indent, max_length=max_length) + for sec in self.sections: + sec.pprint(current_depth=current_depth+1, max_depth=max_depth, + indent=indent, max_length=max_length) elif max_depth == current_depth: child_sec_indent = spaces + " " * indent more_indent = spaces + " " * (current_depth + 2 * indent) - for s in self.sections: - print("{} {} [{}]\n{}[...]".format(child_sec_indent, - s.name, s.type, - more_indent)) + for sec in self.sections: + print("{} {} [{}]\n{}[...]".format(child_sec_indent, sec.name, + sec.type, more_indent)) def export_leaf(self): """ - Export only the path from this section to the root. + Exports only the path from this section to the root. Include all properties for all sections, but no other subsections. + + :returns: cloned odml tree to the root of the current document. """ curr = self par = self diff --git a/odml/templates.py b/odml/templates.py index d0084918..8f46de14 100644 --- a/odml/templates.py +++ b/odml/templates.py @@ -61,7 +61,8 @@ def cache_load(url): cache_file = os.path.join(cache_dir, filename) - if not os.path.exists(cache_file) or dati.fromtimestamp(os.path.getmtime(cache_file)) < (dati.now() - CACHE_AGE): + if not os.path.exists(cache_file) or \ + dati.fromtimestamp(os.path.getmtime(cache_file)) < (dati.now() - CACHE_AGE): try: data = urllib2.urlopen(url).read() if sys.version_info.major > 2: diff --git a/odml/terminology.py b/odml/terminology.py index 1b4170fd..2076fa2c 100644 --- a/odml/terminology.py +++ b/odml/terminology.py @@ -1,6 +1,5 @@ """ -Handles (deferred) loading of terminology data and access to it -for odML documents +Handles (deferred) loading of terminology data and access to it for odML documents. """ import datetime @@ -27,8 +26,10 @@ def cache_load(url): """ - Load the url and store it in a temporary cache directory - subsequent requests for this url will use the cached version + Loads the url and store it in a temporary cache directory + subsequent requests for this url will use the cached version. + + :param url: URL from where to load an odML terminology file from. """ filename = '.'.join([md5(url.encode()).hexdigest(), os.path.basename(url)]) cache_dir = os.path.join(tempfile.gettempdir(), "odml.cache") @@ -46,23 +47,30 @@ def cache_load(url): data = urllib2.urlopen(url).read() if sys.version_info.major > 2: data = data.decode("utf-8") - except Exception as e: - print("failed loading '%s': %s" % (url, e)) + except Exception as exc: + print("failed loading '%s': %s" % (url, exc)) return - fp = open(cache_file, "w") - fp.write(str(data)) - fp.close() + + file_obj = open(cache_file, "w") + file_obj.write(str(data)) + file_obj.close() + return open(cache_file) class Terminologies(dict): + """ + Terminologies facilitates synchronous and deferred loading, caching, + browsing and importing of full or partial odML terminologies. + """ loading = {} def load(self, url): """ - Load and cache a terminology-url + Loads and caches an odML XML file from a URL. - Returns the odml-document for the url + :param url: location of an odML XML file. + :return: The odML document loaded from url. """ if url in self: return self[url] @@ -75,25 +83,34 @@ def load(self, url): return self._load(url) def _load(self, url): - # TODO also cache the data locally on disk - # if url.startswith("http"): return None - fp = cache_load(url) - if fp is None: + """ + Cache loads an odML XML file from a URL and returns + the result as a parsed odML document. + + :param url: location of an odML XML file. + :return: The odML document loaded from url. + It will silently return None, if any exceptions + occur to enable loading of nested odML files. + """ + file_obj = cache_load(url) + if file_obj is None: print("did not successfully load '%s'" % url) return try: - term = XMLReader(filename=url, ignore_errors=True).from_file(fp) + term = XMLReader(filename=url, ignore_errors=True).from_file(file_obj) term.finalize() - except ParserException as e: + except ParserException as exc: print("Failed to load %s due to parser errors" % url) - print(' "%s"' % e) + print(' "%s"' % exc) term = None self[url] = term return term def deferred_load(self, url): """ - Start a thread to load the terminology in background + Starts a background thread to load an odML XML file from a URL. + + :param url: location of an odML XML file. """ if url in self or url in self.loading: return @@ -107,4 +124,4 @@ def deferred_load(self, url): if __name__ == "__main__": - f = cache_load(REPOSITORY) + FILE_OBJECT = cache_load(REPOSITORY) diff --git a/odml/tools/converters/format_converter.py b/odml/tools/converters/format_converter.py index e4670d8e..b12258c1 100644 --- a/odml/tools/converters/format_converter.py +++ b/odml/tools/converters/format_converter.py @@ -1,3 +1,26 @@ +""" +The FormatConverter can be used from the command line and within scripts +to convert between the different odML formats and update previous file format +versions to the most recent one. + +A full list of the available odML output formats is available via the +CONVERSION_FORMATS constant. + +Command line usage: +python -m [-out ] [-r] + +Examples: +1) >> python -m odml.tools.converters.format_converter v1_1 -out -r + + Convert files from the path to .xml odml version 1.1, writes them + to including subdirectories and its files from the input path. + +2) >> python -m odml.tools.converters.format_converter odml + + Converts files from path to .odml and writes them + to not including subdirectories. +""" + import argparse import copy import os @@ -24,24 +47,31 @@ class FormatConverter(object): + """ + Class for converting between the different odML file formats. + """ @classmethod def convert(cls, args=None): """ - Enable usage of the argparse for calling convert_dir(...) - Example: - 1) >> python -m odml.tools.converters.format_converter ./..path../input_dir v1_1 -out ./..path../output_dir -r - - Convert files from the path <./..path../input_dir> to .xml odml version 1.1, - writes them into <./..path../output_dir> including subdirectories - and its files from the input path. - - 2) >> python -m odml.tools.converters.format_converter ./..path../input_dir odml - - Converts files from path <./..path../input_dir> to .odml, - writes them into <./..path../input_dir_odml> not including subdirectories. + Enables usage of the argparse for calling convert_dir(...) + + Usage: + python -m [-out ] [-r] + + Examples: + 1) >> python -m odml.tools.converters.format_converter v1_1 -out -r + Convert files from the path to .xml odml version 1.1, writes them + to including subdirectories and its files from the input path. + + 2) >> python -m odml.tools.converters.format_converter odml + Converts files from path to .odml and writes them + to not including subdirectories. + + :param args: Command line arguments. See usage for details. """ - parser = argparse.ArgumentParser(description="Convert directory with odml files to another format") + desc = "Convert directory with odml files to another format" + parser = argparse.ArgumentParser(description=desc) parser.add_argument("input_dir", help="Path to input directory") parser.add_argument("result_format", choices=list(CONVERSION_FORMATS), help="Format of output files") @@ -49,21 +79,23 @@ def convert(cls, args=None): parser.add_argument("-r", "--recursive", action="store_true", help="Enable converting files from subdirectories") args = parser.parse_args(args) - r = True if args.recursive else False - cls.convert_dir(args.input_dir, args.output_dir, r, args.result_format) + recursive = True if args.recursive else False + cls.convert_dir(args.input_dir, args.output_dir, recursive, args.result_format) @classmethod def convert_dir(cls, input_dir, output_dir, parse_subdirs, res_format): """ Convert files from given input directory to the specified res_format. + :param input_dir: Path to input directory - :param output_dir: Path for output directory. If None, new directory will be created on the same level as input + :param output_dir: Path for output directory. + If None, new directory will be created on the same level as input :param parse_subdirs: If True enable converting files from subdirectories - :param res_format: Format of output files. - Possible choices: "v1_1" (converts to version 1.1 from version 1 xml) - "odml" (converts to .odml from version 1.1 .xml files) - "turtle", "nt" etc. (converts to rdf formats from version 1.1 .odml files) - (see full list of rdf serializers in CONVERSION_FORMATS) + :param res_format: Format of output files. + Possible choices: "v1_1" (version 1 to version 1.1 xml files) + "odml" (version 1.1 .xml to .odml files) + "turtle", "nt" etc. (version 1.1 to RDF files) + (full list of RDF serializers in CONVERSION_FORMATS) """ if res_format not in CONVERSION_FORMATS: raise ValueError("Format for output files is incorrect. " @@ -73,8 +105,9 @@ def convert_dir(cls, input_dir, output_dir, parse_subdirs, res_format): input_dir = os.path.join(input_dir, '') if output_dir is None: + # find the directory that contains input_dir input_dir_name = os.path.basename(os.path.dirname(input_dir)) - root_dir = os.path.dirname(os.path.dirname(input_dir)) # find the directory that contains input_dir + root_dir = os.path.dirname(os.path.dirname(input_dir)) output_dir_name = input_dir_name + "_" + res_format output_dir = os.path.join(root_dir, output_dir_name) cls._create_sub_directory(output_dir) @@ -84,10 +117,10 @@ def convert_dir(cls, input_dir, output_dir, parse_subdirs, res_format): if not parse_subdirs: for file_name in os.listdir(input_dir): if os.path.isfile(os.path.join(input_dir, file_name)): - cls._convert_file(os.path.join(input_dir, file_name), os.path.join(output_dir, file_name), - res_format) + cls._convert_file(os.path.join(input_dir, file_name), + os.path.join(output_dir, file_name), res_format) else: - for dir_path, dir_names, file_names in os.walk(input_dir): + for dir_path, _, file_names in os.walk(input_dir): for file_name in file_names: in_file_path = os.path.join(dir_path, file_name) out_dir = re.sub(r"" + input_dir, r"" + output_dir, dir_path) @@ -98,19 +131,27 @@ def convert_dir(cls, input_dir, output_dir, parse_subdirs, res_format): @classmethod def _convert_file(cls, input_path, output_path, res_format): """ - Convert a file from given input_path to res_format. + Converts a file from given input_path to res_format. Will raise a ValueError + if the provided output format (res_format) is not supported. + + :param input_path: full path including file name of the file to be converted. + :param output_path: full path including file name of the output file. + If required the file extension will be adjusted to the + output format. + :param res_format: Format the input file will be converted to. Only formats + listed in constant CONVERSION_FORMATS are supported. """ if res_format == "v1_1": VersionConverter(input_path).write_to_file(output_path) elif res_format == "odml": if not output_path.endswith(".odml"): - p, _ = os.path.splitext(output_path) - output_path = p + ".odml" + file_path, _ = os.path.splitext(output_path) + output_path = file_path + ".odml" odml.save(odml.load(input_path), output_path) elif res_format in CONVERSION_FORMATS: if not output_path.endswith(CONVERSION_FORMATS[res_format]): - p, _ = os.path.splitext(output_path) - output_path = p + CONVERSION_FORMATS[res_format] + file_path, _ = os.path.splitext(output_path) + output_path = file_path + CONVERSION_FORMATS[res_format] RDFWriter(odml.load(input_path)).write_file(output_path, res_format) else: raise ValueError("Format for output files is incorrect. " @@ -119,7 +160,9 @@ def _convert_file(cls, input_path, output_path, res_format): @staticmethod def _create_sub_directory(dir_path): """ - Creates the new directory to store the converted file. + Creates a new directory if it does not yet exist. + + :param dir_path: path of the required directory. """ if not os.path.isdir(dir_path): os.makedirs(dir_path) @@ -127,9 +170,11 @@ def _create_sub_directory(dir_path): @staticmethod def _check_input_output_directory(input_dir, output_dir): """ - Checks if provided directory is valid - not None, is directory and not a root folder in the File System - if output dir was not provided. - Raise relevant exceptions. + Checks if the provided directories are valid. Will raise a ValueError + if the directories do not exist. + + :param input_dir: input file directory. + :param output_dir: output file directory. Can be None. """ if not input_dir or not os.path.isdir(input_dir): raise ValueError("The path to input directory is not a valid path") @@ -139,8 +184,9 @@ def _check_input_output_directory(input_dir, output_dir): if not output_dir: if os.path.dirname(input_dir) == input_dir: - raise ValueError("The input directory cannot be a root folder of the File System if " - "output directory was not specified") + msg = "Cannot write to %s. Please specify an output directory" % input_dir + raise ValueError(msg) + if __name__ == "__main__": FormatConverter.convert(sys.argv[1:]) diff --git a/odml/tools/converters/version_converter.py b/odml/tools/converters/version_converter.py index cc997098..ef8b18b3 100644 --- a/odml/tools/converters/version_converter.py +++ b/odml/tools/converters/version_converter.py @@ -1,17 +1,21 @@ +""" +This module provides the class VersionConverter to convert +odML XML files from version 1.0 to 1.1. +""" + import io import json import os import sys +import uuid import yaml from lxml import etree as ET -from ... import format +from ...format import Document, Section, Property from ...info import FORMAT_VERSION from ...terminology import Terminologies, REPOSITORY_BASE -import uuid - try: unicode = unicode except NameError: @@ -20,7 +24,7 @@ class VersionConverter(object): """ - Class for converting odml xml files from version 1.0 to 1.1 + Class for converting odML XML files from version 1.0 to 1.1. """ _version_map = { 'filename': 'value_origin', @@ -35,6 +39,7 @@ def _parse_xml(self): """ _parse_xml checks whether the provided file object can be parsed and returns the parsed lxml tree. + :return: ElementTree """ # Make pretty print available by resetting format @@ -69,6 +74,7 @@ def _parse_dict_document(cls, parsed_doc): _parse_dict_document parses a python dictionary containing a valid v1.0 odML document into an lxml.ElementTree XML equivalent and returns the resulting lxml ElementTree. + :param parsed_doc: python dictionary containing a valid v1.0 odML document. :return: lxml ElementTree """ @@ -92,6 +98,7 @@ def _parse_dict_sections(cls, parent_element, section_list): _parse_dict_sections parses a list containing python dictionaries of v1.0 odML style sections into lxml.Element XML equivalents and appends the parsed Sections to the provided lxml.Element parent. + :param parent_element: lxml.Element to which parsed sections will be appended. :param section_list: list of python dictionaries containing valid v1.0 odML Sections. @@ -116,6 +123,7 @@ def _parse_dict_properties(cls, parent_element, props_list): _parse_dict_properties parses a list containing python dictionaries of v1.0 odML style properties into lxml.Element XML equivalents and appends the parsed Properties to the provided lxml.Element parent. + :param parent_element: lxml.Element to which parsed properties will be appended. :param props_list: list of python dictionaries containing valid v1.0 odML Properties. @@ -138,6 +146,7 @@ def _parse_dict_values(parent_element, value_list): _parse_dict_values parses a list containing python dictionaries of v1.0 odML style values into lxml.Element XML equivalents and appends the parsed Values to the provided lxml.Element parent. + :param parent_element: lxml.Element to which parsed values will be appended. :param value_list: list of python dictionaries containing valid v1.0 odML Values. """ @@ -160,6 +169,8 @@ def _convert(self, tree): Unites multiple value objects and moves all supported Value elements to its parent Property. Exports only Document, Section and Property elements, that are supported by odML v1.1. + + :param tree: lxml.ElementTree containing a v1.0 odML document. """ # Reset status messages self.conversion_log = [] @@ -174,28 +185,28 @@ def _convert(self, tree): # Exclude unsupported Section attributes, ignore comments, handle repositories. for sec in root.iter("section"): sec_name = sec.find("name").text - for e in sec: - if e.tag not in format.Section.arguments_keys and isinstance(e.tag, str): + for elem in sec: + if elem.tag not in Section.arguments_keys and isinstance(elem.tag, str): self._log("[Info] Omitted non-Section attribute " - "'%s: %s/%s'" % (sec_name, e.tag, e.text)) - sec.remove(e) + "'%s: %s/%s'" % (sec_name, elem.tag, elem.text)) + sec.remove(elem) continue - if e.tag == "repository": - self._handle_repository(e) - elif e.tag == "include": - self._handle_include(e) + if elem.tag == "repository": + self._handle_repository(elem) + elif elem.tag == "include": + self._handle_include(elem) # Exclude unsupported Document attributes, ignore comments, handle repositories. - for e in root: - if e.tag not in format.Document.arguments_keys and isinstance(e.tag, str): + for elem in root: + if elem.tag not in Document.arguments_keys and isinstance(elem.tag, str): self._log("[Info] Omitted non-Document " - "attribute '%s/%s'" % (e.tag, e.text)) - root.remove(e) + "attribute '%s/%s'" % (elem.tag, elem.text)) + root.remove(elem) continue - if e.tag == "repository": - self._handle_repository(e) + if elem.tag == "repository": + self._handle_repository(elem) tree = self._check_add_ids(tree) @@ -205,6 +216,7 @@ def _handle_include(self, element): """ _handle_include checks whether a provided include element is v1.1 compatible and logs a warning message otherwise. + :param element: lxml element containing the provided include link. """ content = element.text @@ -235,6 +247,7 @@ def _handle_include(self, element): def _handle_repository(self, element): """ The method handles provided odML repositories. + :param element: lxml element containing the provided odML repository link. """ content = element.text @@ -263,9 +276,10 @@ def _handle_repository(self, element): def _handle_properties(self, root): """ - Method removes all property elements w/o name attribute, converts Value + Removes all property elements without name attribute, converts Value elements from v1.0 to v1.1 style and removes unsupported Property elements. - :param root: + + :param root: lxml.ElementTree containing a v1.0 odML property list. """ for prop in root.iter("property"): main_val = ET.Element("value") @@ -315,7 +329,7 @@ def _handle_properties(self, root): if elem.tag == "dependency_value": elem.tag = "dependencyvalue" - if (elem.tag not in format.Property.arguments_keys and + if (elem.tag not in Property.arguments_keys and isinstance(elem.tag, str)): self._log("[Info] Omitted non-Property attribute " "'%s: %s/%s'" % (prop_id, elem.tag, elem.text)) @@ -326,6 +340,7 @@ def _handle_value(self, value, log_id): Values changed from odML v1.0 to v1.1. This function moves all supported odML Property elements from a v1.0 Value element to the parent Property element. Adds a log entry for every non-exported element. + :param value: etree element containing the v1.0 Value. :param log_id: String containing Section and Property name and type to log omitted elements and value contents. @@ -348,7 +363,7 @@ def _handle_value(self, value, log_id): check_export.text, val_elem.text)) # Include only supported Property attributes - elif val_elem.tag in format.Property.arguments_keys: + elif val_elem.tag in Property.arguments_keys: new_elem = ET.Element(val_elem.tag) new_elem.text = val_elem.text @@ -377,6 +392,7 @@ def _replace_same_name_entities(cls, tree): """ Changes same section names in the doc by adding <-{index}> to the next section occurrences. + :param tree: ElementTree of the doc :return: ElementTree """ @@ -404,6 +420,7 @@ def _change_entity_name(tree, elem_map, name): """ Adds numbering to identical element names where their odml.Section or odml.Property parents reside on the same level in the tree. + :param tree: The element tree containing the 'name' element. :param elem_map: lxml path to occurrence maps of named Sections or Properties. :param name: lxml element containing the name text of a Section or Property. @@ -419,6 +436,7 @@ def _check_add_ids(self, tree): """ Checks, whether elements (properties) possess an UUID and adds one in case of absence. + :param tree: ElementTree of the doc :return: ElementTree """ @@ -436,6 +454,7 @@ def _add_id(element): """ Checks, whether an element possesses an ID. If yes, make sure it has the right format. Otherwise a new UUID is created. + :param element: lxml element. """ oid = element.find("id") @@ -445,8 +464,8 @@ def _add_id(element): try: if oid.text is not None: new_id.text = str(uuid.UUID(oid.text)) - except ValueError as e: - print(e) + except ValueError as exc: + print(exc) element.remove(oid) element.append(new_id) @@ -454,6 +473,9 @@ def _log(self, msg): """ Adds the passed message to the conversion_log attribute and prints the message to the command line. + + :param msg: string that is appended to the conversion log and + printed to the command line. """ self.conversion_log.append(msg) print(msg) @@ -471,6 +493,11 @@ def convert(self, backend="XML"): This method returns the content of the provided file object converted to odML version 1.1 as a string object which is directly consumable by the odml.tools.ODMLReader. + Will raise an Exception, if the backend format is not supported. + + :param backend: File format of the source file. 'JSON', 'YAML' and 'XML' are + supported. Default backend is 'XML'. + :returns an odML v1.1 document as an XML string """ if backend.upper() == "JSON": old_tree = self._parse_json() @@ -488,6 +515,7 @@ def write_to_file(self, filename, backend="XML"): """ This method converts the content of the provided converter file object to odML version 1.1 and writes the results to `filename`. + :param filename: Output file. :param backend: Format of the source file, default is XML. """ diff --git a/odml/tools/dict_parser.py b/odml/tools/dict_parser.py index d6ab3a76..35e0f2fc 100644 --- a/odml/tools/dict_parser.py +++ b/odml/tools/dict_parser.py @@ -1,6 +1,6 @@ """ -Dict parser converts the content of a dictionary -into a proper and verified odML document. +The dict_parser module provides access to the DictWriter and DictReader class. +Both handle the conversion of odML documents from and to Python dictionary objects. """ from .. import format as odmlfmt @@ -10,13 +10,20 @@ class DictWriter: """ - A writer to parse an odML document to a Python dictionary object equivalent. + A writer to parse an odml.Document to a Python dictionary object equivalent. """ def __init__(self): self.doc = None # odML document def to_dict(self, odml_document): + """ + Parses a full odml.Document to a Python dict object. Will also parse any + contained odml.Sections, their subsections and odml.Properties. + + :param odml_document: an odml.Document. + :return: parsed odml.Document as a Python dict object. + """ self.doc = odml_document parsed_doc = {} @@ -39,6 +46,13 @@ def to_dict(self, odml_document): return parsed_doc def get_sections(self, section_list): + """ + Parses a list of odml.Sections to a Python dict object. Will also parse any + contained subsections and odml.Properties. + + :param section_list: list of odml.Sections. + :return: list of parsed odml.Sections as a single Python dict object. + """ section_seq = [] for section in section_list: @@ -70,6 +84,12 @@ def get_sections(self, section_list): @staticmethod def get_properties(props_list): + """ + Parses a list of odml.Properties to a Python dict object. + + :param props_list: list of odml.Properties. + :return: list of parsed odml.Properties as a single Python dict object. + """ props_seq = [] for prop in props_list: @@ -100,7 +120,7 @@ def get_properties(props_list): class DictReader: """ - A reader to parse dictionaries with odML content into a proper odML document. + A reader to parse dictionaries with odML content into an odml.Document. """ def __init__(self, show_warnings=True): @@ -114,6 +134,16 @@ def __init__(self, show_warnings=True): self.warnings = [] def is_valid_attribute(self, attr, fmt): + """ + Checks whether a provided attribute is valid for a provided odml class + (Document, Section, Property). + + :param attr: Python dictionary tag that will be checked if it is a valid + attribute for the provided format class. + :param fmt: required odml format class format.Document, format.Section or + format.Property against which the attribute is checked. + :returns: the attribute if the attribute is valid, None otherwise. + """ if attr in fmt.arguments_keys: return attr @@ -124,19 +154,30 @@ def is_valid_attribute(self, attr, fmt): self.warnings.append(msg) if self.show_warnings: print(msg) + return None def to_odml(self, parsed_doc): + """ + Parses a Python dictionary object containing an odML document to an odml.Document. + Will raise a ParserException if the Python dictionary does not contain a valid + odML document. Also raises an InvalidVersionException if the odML document + is of a previous odML format version. + + :param parsed_doc: Python dictionary object containing an odML document. + :returns: parsed odml.Document. + """ self.parsed_doc = parsed_doc # Parse only odML documents of supported format versions. if 'Document' not in self.parsed_doc: msg = "Missing root element 'Document'" raise ParserException(msg) - elif 'odml-version' not in self.parsed_doc: + + if 'odml-version' not in self.parsed_doc: raise ParserException("Invalid odML document: Could not find odml-version.") - elif self.parsed_doc.get('odml-version') != FORMAT_VERSION: + if self.parsed_doc.get('odml-version') != FORMAT_VERSION: msg = ("Cannot parse odML document with format version '%s'. \n" "\tUse the 'VersionConverter' from 'odml.tools.converters' " "to import previous odML formats." @@ -163,6 +204,13 @@ def to_odml(self, parsed_doc): return doc def parse_sections(self, section_list): + """ + Parses a list of Python dictionary objects containing odML sections to the + odml.Section equivalents including any subsections and properties. + + :param section_list: list of Python dictionary objects containing odML sections. + :returns: list of parsed odml.Sections + """ odml_sections = [] for section in section_list: @@ -192,6 +240,13 @@ def parse_sections(self, section_list): return odml_sections def parse_properties(self, props_list): + """ + Parses a list of Python dictionary objects containing odML properties to the + odml.Property equivalents. + + :param props_list: list of Python dictionary objects containing odML properties. + :returns: list of parsed odml.Properties + """ odml_props = [] for _property in props_list: diff --git a/odml/tools/doc_inherit.py b/odml/tools/doc_inherit.py index f6e19512..02f394c3 100644 --- a/odml/tools/doc_inherit.py +++ b/odml/tools/doc_inherit.py @@ -12,6 +12,14 @@ def allow_inherit_docstring(cls): + """ + The base classes of a provided class will be used to copy and inherit the first + docstring it finds. + + :param cls: class the decorator function will be used on to inherit the docstring + from its base classes. + :returns: class with the inherited docstring. + """ bases = cls.__bases__ for attr, attribute in cls.__dict__.items(): if hasattr(attribute, "inherit_docstring"): @@ -27,5 +35,9 @@ def allow_inherit_docstring(cls): def inherit_docstring(obj): + """ + Sets the inherit_docstring attribute of an object to True and returns the object. + """ obj.inherit_docstring = True + return obj diff --git a/odml/tools/dumper.py b/odml/tools/dumper.py index 4f8e1b21..bcde2815 100644 --- a/odml/tools/dumper.py +++ b/odml/tools/dumper.py @@ -1,47 +1,73 @@ """ -Dumps odML-Structures +The dumper module provides functions to dump odML objects; +Document, Section, Property; to the command line. """ from .xmlparser import to_csv def get_props(obj, props): + """ + Retrieves the values of a list of provided properties + from an object and returns all values as a concatenated string. + + :param obj: odml object from which to retrieve specific property values. + :param props: list of properties + :returns: the obj property values as a concatenated string + """ out = [] - for p in props: - if hasattr(obj, p): - x = getattr(obj, p) - if x is not None: - if isinstance(x, list) or isinstance(x, tuple): - out.append("%s=%s" % (p, to_csv(x))) + for prop in props: + if hasattr(obj, prop): + curr = getattr(obj, prop) + if curr is not None: + if isinstance(curr, (list, tuple)): + out.append("%s=%s" % (prop, to_csv(curr))) else: - out.append("%s=%s" % (p, repr(x))) + out.append("%s=%s" % (prop, repr(curr))) return ", ".join(out) -def dumpProperty(property, indent=1): - # TODO : (PEP8) Find a better way to split the following line - print("%*s:%s (%s)" % (indent, " ", property.name, - get_props(property, ["definition", "values", "uncertainty", "unit", - "dtype", "value_reference", "dependency", - "dependencyValue"]))) +def dump_property(prop, indent=1): + """ + Prints the content of an odml.Property. + + :param prop: odml.Property + :param indent: number of prepended whitespaces. Default is 1. + """ + prop_list = ["definition", "values", "uncertainty", "unit", "dtype", + "value_reference", "dependency", "dependencyValue"] + prop_string = get_props(prop, prop_list) + print("%*s:%s (%s)" % (indent, " ", prop.name, prop_string)) -def dumpSection(section, indent=1): +def dump_section(section, indent=1): + """ + Prints the content of an odml.Section including any subsections + and odml.Properties. + + :param section: odml.Section + :param indent: number of prepended whitespaces. Default is 1. + """ if section is None: return - # TODO : (PEP8) Find a better way to split the following line - print("%*s*%s (%s)" % (indent, " ", section.name, - get_props(section, ["type", "definition", "link", - "include", "repository"]))) + prop_list = ["type", "definition", "link", "include", "repository"] + prop_string = get_props(section, prop_list) + print("%*s*%s (%s)" % (indent, " ", section.name, prop_string)) for prop in section.properties: - dumpProperty(prop, indent + 1) + dump_property(prop, indent + 1) for sub in section.sections: - dumpSection(sub, indent * 2) + dump_section(sub, indent * 2) + +def dump_doc(doc): + """ + Prints the content of an odml.Document including any subsections + and odml.Properties. -def dumpDoc(doc): + :param doc: odml.Section + """ for sec in doc: - dumpSection(sec) + dump_section(sec) diff --git a/odml/tools/odmlparser.py b/odml/tools/odmlparser.py index 186a8eb1..e2cf2d18 100644 --- a/odml/tools/odmlparser.py +++ b/odml/tools/odmlparser.py @@ -1,17 +1,17 @@ #!/usr/bin/env python """ -A generic odML parsing module. - -Parses odML files and documents. +A generic odML parsing module. It parses odML files and documents. +All supported formats can be found in parser_utils.SUPPORTED_PARSERS. """ import datetime import json import sys -import yaml from os.path import basename +import yaml + from . import xmlparser from .dict_parser import DictWriter, DictReader from ..info import FORMAT_VERSION @@ -28,7 +28,8 @@ class ODMLWriter: """ - A generic odML document writer, for XML, YAML and JSON. + A generic odML document writer for JSON, XML, YAML and RDF. + The output format is specified on init. Usage: xml_writer = ODMLWriter(parser='XML') @@ -45,6 +46,16 @@ def __init__(self, parser='XML'): self.parser = parser def write_file(self, odml_document, filename): + """ + Writes an odml.Document to a file using the format + defined in the ODMLWriter.parser property. Supported formats are + JSON, XML, YAML and RDF. + Will raise a ParserException if the odml.Document is not valid. + + :param odml_document: odml.Document. + :param filename: path and filename of the output file. + """ + # Write document only if it does not contain validation errors. validation = Validation(odml_document) msg = "" @@ -63,12 +74,21 @@ def write_file(self, odml_document, filename): file.write(self.to_string(odml_document)) def to_string(self, odml_document): + """ + Parses an odml.Document to a string in the file format + defined in the ODMLWriter.parser property. Supported formats are + JSON, XML, YAML and RDF. + + :param odml_document: odml.Document. + :return: string containing the content of the odml.Document in the + specified format. + """ string_doc = '' if self.parser == 'XML': string_doc = unicode(xmlparser.XMLWriter(odml_document)) elif self.parser == "RDF": - # Use turtle as default output format for now. + # Use XML as default output format for now. string_doc = RDFWriter(odml_document).get_rdf_str("xml") else: self.parsed_doc = DictWriter().to_dict(odml_document) @@ -77,7 +97,7 @@ def to_string(self, odml_document): 'odml-version': FORMAT_VERSION} if self.parser == 'YAML': - yaml.add_representer(datetime.time, YAMLTimeSerializer) + yaml.add_representer(datetime.time, yaml_time_serializer) string_doc = yaml.dump(odml_output, default_flow_style=False) elif self.parser == 'JSON': string_doc = json.dumps(odml_output, indent=4, @@ -89,13 +109,19 @@ def to_string(self, odml_document): return string_doc -# Required to serialize datetime.time as string objects -def YAMLTimeSerializer(dumper, data): +def yaml_time_serializer(dumper, data): + """ + This function is required to serialize datetime.time as string objects + when working with YAML as output format. + """ return dumper.represent_scalar('tag:yaml.org,2002:str', str(data)) -# Required to serialize datetime values with JSON. class JSONDateTimeSerializer(json.JSONEncoder): + """ + Required to serialize datetime objects as string objects when working with JSON + as output format. + """ def default(self, o): if isinstance(o, (datetime.datetime, datetime.date, datetime.time)): return str(o) @@ -128,7 +154,18 @@ def __init__(self, parser='XML', show_warnings=True): self.warnings = [] def from_file(self, file, doc_format=None): - + """ + Loads an odML document from a file. The ODMLReader.parser specifies the + input file format. If the input file is an RDF file, the specific RDF format + has to be provided as well. + Available RDF formats: 'xml', 'n3', 'turtle', 'nt', 'pretty-xml', + 'trix', 'trig', 'nquads'. + + :param file: file path to load an odML document from. + :param doc_format: Required for RDF files only and provides the specific format + of an RDF file. + :return: parsed odml.Document + """ if self.parser == 'XML': par = xmlparser.XMLReader(ignore_errors=True, show_warnings=self.show_warnings) @@ -139,13 +176,12 @@ def from_file(self, file, doc_format=None): elif self.parser == 'YAML': with open(file) as yaml_data: try: - yaml.SafeLoader.add_constructor( - "tag:yaml.org,2002:python/unicode", - UnicodeLoaderConstructor) + yaml.SafeLoader.add_constructor("tag:yaml.org,2002:python/unicode", + unicode_loader_constructor) self.parsed_doc = yaml.safe_load(yaml_data) except yaml.parser.ParserError as err: print(err) - return + return None par = DictReader(show_warnings=self.show_warnings) self.doc = par.to_odml(self.parsed_doc) @@ -159,7 +195,7 @@ def from_file(self, file, doc_format=None): self.parsed_doc = json.load(json_data) except ValueError as err: # Python 2 does not support JSONDecodeError print("JSON Decoder Error: %s" % err) - return + return None par = DictReader(show_warnings=self.show_warnings) self.doc = par.to_odml(self.parsed_doc) @@ -175,6 +211,18 @@ def from_file(self, file, doc_format=None): return self.doc def from_string(self, string, doc_format=None): + """ + Loads an odML document from a string object. The ODMLReader.parser specifies the + input file format. If the input string contains an RDF format, + the specific RDF format has to be provided as well. + Available RDF formats: 'xml', 'n3', 'turtle', 'nt', 'pretty-xml', + 'trix', 'trig', 'nquads'. + + :param string: file path to load an odML document from. + :param doc_format: Required for RDF files only and provides the specific format + of an RDF file. + :return: parsed odml.Document + """ if self.parser == 'XML': self.doc = xmlparser.XMLReader().from_string(string) @@ -208,7 +256,9 @@ def from_string(self, string, doc_format=None): return self.doc -# Constructor for PyYAML to load unicode characters # Needed only for < Python 3 -def UnicodeLoaderConstructor(loader, node): +def unicode_loader_constructor(_, node): + """ + Constructor for PyYAML to load unicode characters + """ return node.value diff --git a/odml/tools/version_converter.py b/odml/tools/version_converter.py index a419935b..91f82730 100644 --- a/odml/tools/version_converter.py +++ b/odml/tools/version_converter.py @@ -1,3 +1,8 @@ +""" +This module provides backwards compatibility for the VersionConverter class. +It is deprecated and will be removed in future versions. +""" + from .converters import VersionConverter print("[DEPRECATION WARNING] The VersionConverter file has been moved to " diff --git a/odml/tools/xmlparser.py b/odml/tools/xmlparser.py index 64a63bb2..f400e9c6 100644 --- a/odml/tools/xmlparser.py +++ b/odml/tools/xmlparser.py @@ -1,23 +1,26 @@ #!/usr/bin/env python """ -The XML parsing module. -Parses odML files. Can be invoked standalone: +The xmlparser module provides access to the XMLWriter and XMLReader classes. +Both handle the conversion of odML documents from and to XML files and strings. + +The parser can be invoked standalone: python -m odml.tools.xmlparser file.odml """ import csv import sys + +from os.path import basename + from lxml import etree as ET from lxml.builder import E # this is needed for py2exe to include lxml completely from lxml import _elementpath as _dummy -from os.path import basename - try: from StringIO import StringIO except ImportError: from io import StringIO -from .. import format +from .. import format as ofmt from ..info import FORMAT_VERSION from .parser_utils import InvalidVersionException, ParserException @@ -38,6 +41,12 @@ def to_csv(val): + """ + Modifies odML values for serialization to strings and files. + + :param val: odML value. + :return: modified value string. + """ # Make sure all individual values do not contain # leading or trailing whitespaces. unicode_values = list(map(unicode.strip, map(unicode, val))) @@ -53,6 +62,12 @@ def to_csv(val): def from_csv(value_string): + """ + Reads a string containing odML values and parses them into a list. + + :param value_string: string of odML values. + :return: list of values. + """ if not value_string: return [] if value_string[0] == "[" and value_string[-1] == "]": @@ -73,7 +88,7 @@ def from_csv(value_string): class XMLWriter: """ - Creates XML nodes storing the information of an odML Document + Creates XML nodes storing the information of an odML Document. """ header = "%s\n%s\n" % (XML_HEADER, EXTERNAL_STYLE_HEADER) @@ -81,38 +96,42 @@ def __init__(self, odml_document): self.doc = odml_document @staticmethod - def save_element(e): + def save_element(curr_el): """ - returns an xml node for the odML object e + Returns an XML node for the odML object curr_el. + + :param curr_el: odML object. Supported objects are odml.Document, odml.Section, + odml.Property. + :returns: parsed XML Node. """ - fmt = e.format() + fmt = curr_el.format() cur = E(fmt.name) # generate attributes - if isinstance(fmt, format.Document.__class__): + if isinstance(fmt, ofmt.Document.__class__): cur.attrib['version'] = FORMAT_VERSION # generate elements for k in fmt.arguments_keys: - if not hasattr(e, fmt.map(k)): + if not hasattr(curr_el, fmt.map(k)): continue - val = getattr(e, fmt.map(k)) + val = getattr(curr_el, fmt.map(k)) if val is None: continue - if isinstance(fmt, format.Property.__class__) and k == "value": + if isinstance(fmt, ofmt.Property.__class__) and k == "value": # Custom odML tuples require special handling for save loading from file. - if e.dtype and e.dtype.endswith("-tuple") and len(val) > 0: + if curr_el.dtype and curr_el.dtype.endswith("-tuple") and len(val) > 0: ele = E(k, "(%s)" % ";".join(val[0])) else: ele = E(k, to_csv(val)) cur.append(ele) else: if isinstance(val, list): - for v in val: - if v is None: + for curr_val in val: + if curr_val is None: continue - ele = XMLWriter.save_element(v) + ele = XMLWriter.save_element(curr_val) cur.append(ele) else: if sys.version_info < (3,): @@ -131,6 +150,7 @@ def __unicode__(self): def write_file(self, filename, local_style=False, custom_template=None): """ write_file saves the XMLWriters odML document to an XML file. + :param filename: location and name where the file will be written to. :param local_style: Optional boolean. By default an odML XML document is saved with a default header containing an external stylesheet for @@ -172,14 +192,15 @@ def write_file(self, filename, local_style=False, custom_template=None): def load(filename): """ - shortcut function for XMLReader().from_file(filename) + Shortcut function for XMLReader().from_file(filename). """ return XMLReader().from_file(filename) class XMLReader(object): """ - A reader to parse xml-files or strings into odml data structures + A reader to parse XML files or strings into odML data structures. + Usage: >>> doc = XMLReader().from_file("file.odml") """ @@ -196,7 +217,7 @@ def __init__(self, ignore_errors=False, show_warnings=True, filename=None): :param filename: Path to an odml file. """ self.parser = ET.XMLParser(remove_comments=True) - self.tags = dict([(obj.name, obj) for obj in format.__all__]) + self.tags = dict([(obj.name, obj) for obj in ofmt.__all__]) self.ignore_errors = ignore_errors self.show_warnings = show_warnings self.filename = filename @@ -205,8 +226,11 @@ def __init__(self, ignore_errors=False, show_warnings=True, filename=None): @staticmethod def _handle_version(root): """ - Check if the odML version of a handed in parsed lxml.etree is supported - by the current library and raise an Exception otherwise. + Checks if the odML version of a handed in parsed lxml.etree is supported + by the current library and raise a ParserException otherwise. If the + lxml.etree contains an XML file of a previous odML format version, + an InvalidVersionException is raised. + :param root: Root node of a parsed lxml.etree. The root tag has to contain a supported odML version number, otherwise it is not accepted as a valid odML file. @@ -225,15 +249,18 @@ def _handle_version(root): def from_file(self, xml_file): """ - parse the datastream from a file like object *xml_file* - and return an odml data structure + Parses the datastream from a file like object and return an odML data structure. + If the file cannot be parsed, a ParserException is raised. + + :param xml_file: file path to an XML input file or file like object. + :returns: a parsed odml.Document. """ try: root = ET.parse(xml_file, self.parser).getroot() if hasattr(xml_file, "close"): xml_file.close() - except ET.XMLSyntaxError as e: - raise ParserException(e.msg) + except ET.XMLSyntaxError as exc: + raise ParserException(exc.msg) self._handle_version(root) doc = self.parse_element(root) @@ -244,27 +271,61 @@ def from_file(self, xml_file): return doc def from_string(self, string): + """ + Parses an XML string and return an odML data structure. + If the string cannot be parsed, a ParserException is raised. + + :param string: XML string. + :returns: a parsed odml.Document. + """ try: root = ET.XML(string, self.parser) - except ET.XMLSyntaxError as e: - raise ParserException(e.msg) + except ET.XMLSyntaxError as exc: + raise ParserException(exc.msg) self._handle_version(root) return self.parse_element(root) - def check_mandatory_arguments(self, data, ArgClass, tag_name, node): - for k, v in ArgClass.arguments: - if v != 0 and not ArgClass.map(k) in data: + def check_mandatory_arguments(self, data, arg_class, tag_name, node): + """ + Checks if a passed attribute is required for a specific odML class. + If the attribute is required and not present in the data, the + parsers error method is called. + + :param data: list of mandatory arguments. + :param arg_class: odML class corresponding to the content of the parent node. + :param tag_name: name of the current XML node. + :param node: XML node. + """ + for k, val in arg_class.arguments: + if val != 0 and not arg_class.map(k) in data: self.error("missing element <%s> within <%s> tag\n" % (k, tag_name) + repr(data), node) - def is_valid_argument(self, tag_name, ArgClass, parent_node, child=None): - if tag_name not in ArgClass.arguments_keys: + def is_valid_argument(self, tag_name, arg_class, parent_node, child=None): + """ + Checks if an argument is valid in the scope of a specific odML class. + If the attribute is not valid, the parsers error method is called. + + :param tag_name: string containing the name of the current XML node. + :param arg_class: odML class corresponding to the content of the parent node. + :param parent_node: the parent XML node. + :param child: current XML node. + """ + if tag_name not in arg_class.arguments_keys: self.error("Invalid element <%s> inside <%s> tag\n" % (tag_name, parent_node.tag), parent_node if child is None else child) def error(self, msg, elem): + """ + If the parsers ignore_errors property is set to False, a ParserException + will be raised. Otherwise the message is passed to the parsers warning + method. + + :param msg: Error message. + :param elem: XML node corresponding to the error. + """ if elem is not None: msg += " (line %d)" % elem.sourceline if self.ignore_errors: @@ -272,6 +333,14 @@ def error(self, msg, elem): raise ParserException(msg) def warn(self, msg, elem): + """ + Adds a message to the parsers warnings property. If the parsers show_warnings + property is set to True, an additional error message will be written + to sys.stderr. + + :param msg: Warning message. + :param elem: XML node corresponding to the warning. + """ if elem is not None: msg = "warning[%s:%d:<%s>]: %s\n" % ( self.filename, elem.sourceline, elem.tag, msg) @@ -283,6 +352,14 @@ def warn(self, msg, elem): sys.stderr.write(msg) def parse_element(self, node): + """ + Identifies the odML object corresponding to the current XML node e.g. + odml.Document, odml.Section or odml.Property. It will call the + parsers method corresponding to the identified odML object e.g. parse_odML, + parse_section, parse_property and return the results. + + :param node: XML node. + """ if node.tag not in self.tags: self.error("Invalid element <%s> " % node.tag, node) return None # won't be able to parse this one @@ -303,14 +380,14 @@ def parse_tag(self, root, fmt, insert_children=True): extra_args = {} children = [] - for k, v in root.attrib.iteritems(): + for k, val in root.attrib.iteritems(): k = k.lower() # 'version' is currently the only supported XML attribute. if k == 'version' and root.tag == 'odML': continue # We currently do not support XML attributes. - self.error("Attribute not supported, ignoring '%s=%s' " % (k, v), root) + self.error("Attribute not supported, ignoring '%s=%s' " % (k, val), root) for node in root: node.tag = node.tag.lower() @@ -350,8 +427,8 @@ def parse_tag(self, root, fmt, insert_children=True): obj = fmt.create() try: obj = fmt.create(**arguments) - except Exception as e: - self.error(str(e), root) + except Exception as exc: + self.error(str(exc), root) if insert_children: for child in children: @@ -359,14 +436,39 @@ def parse_tag(self, root, fmt, insert_children=True): return obj + # function 'parse_element' requires the captialisation of 'parse_odML' + # to properly parse the root of an odML document. def parse_odML(self, root, fmt): + """ + Parses the content of an XML node into an odml.Document including all + subsections and odml.Properties. + + :param root: XML node + :param fmt: odML class corresponding to the content of the XML node. + :return: parsed odml.Document + """ doc = self.parse_tag(root, fmt) return doc def parse_section(self, root, fmt): + """ + Parses the content of an XML node into an odml.Section including all subsections + and odml.Properties. + + :param root: XML node + :param fmt: odML class corresponding to the content of the XML node. + :return: parsed odml.Section + """ return self.parse_tag(root, fmt) def parse_property(self, root, fmt): + """ + Parses the content of an XML node into an odml.Property. + + :param root: XML node + :param fmt: odML class corresponding to the content of the XML node. + :return: parsed odml.Property + """ return self.parse_tag(root, fmt, insert_children=False) @@ -380,4 +482,4 @@ def parse_property(self, root, fmt): if len(args) < 1: parser.print_help() else: - dumper.dumpDoc(load(args[0])) + dumper.dump_doc(load(args[0])) diff --git a/odml/validation.py b/odml/validation.py index 2e9852d7..9b9561ff 100644 --- a/odml/validation.py +++ b/odml/validation.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 """ -Generic odML validation framework +Generic odML validation framework. """ LABEL_ERROR = 'error' @@ -9,11 +9,10 @@ class ValidationError(object): """ - Represents an error found in the validation process + Represents an error found in the validation process. - The error is bound to an odML-object (*obj*) or a list of - those and contains a message and a rank which may be one of: - 'error', 'warning'. + The error is bound to an odML-object (*obj*) or a list of those and contains + a message and a rank which may be one of: 'error', 'warning'. """ def __init__(self, obj, msg, rank=LABEL_ERROR): @@ -23,14 +22,23 @@ def __init__(self, obj, msg, rank=LABEL_ERROR): @property def is_warning(self): + """ + :returns: Boolean whether the current ValidationError has rank 'Warning'. + """ return self.rank == LABEL_WARNING @property def is_error(self): + """ + :returns: Boolean whether the current ValidationError has rank 'Error'. + """ return self.rank == LABEL_ERROR @property def path(self): + """ + :returns: The absolute path to the odml object the ValidationError is bound to. + """ return self.obj.get_path() def __repr__(self): @@ -40,27 +48,29 @@ def __repr__(self): class Validation(object): + """ + Validation provides a set of default validations that can used to validate + an odml.Document. Custom validations can be added via the 'register_handler' method. + + :param doc: odml.Document that the validation will be applied to. + """ _handlers = {} @staticmethod def register_handler(klass, handler): """ - Add a validation handler for a odml class. - *type* may be one of the following: - * odML - * section - * property - - And is called in the validation process for each corresponding - object. The *handler* is assumed to be a generator function - yielding all ValidationErrors it finds: + Adds a validation handler for an odml class. The handler is called in the + validation process for each corresponding object. + The *handler* is assumed to be a generator function yielding + all ValidationErrors it finds. - handler(obj) + Section handlers are only called for sections and not for the document node. + If both are required, the handler needs to be registered twice. - The section handlers are only called for sections and not for - the document node. If both are required, you need to register - the handler twice. + :param klass: string corresponding to an odml class. Valid strings are + 'odML', 'section' and 'property'. + :param handler: validation function applied to the odml class. """ Validation._handlers.setdefault(klass, set()).add(handler) @@ -75,6 +85,12 @@ def __init__(self, doc): self.validate(prop) def validate(self, obj): + """ + Runs all registered handlers that are applicable to a provided odml class instance. + Occurring validation errors will be collected in the Validation.error attribute. + + :param obj: odml class instance. + """ handlers = self._handlers.get(obj.format().name, []) for handler in handlers: for err in handler(obj): @@ -82,12 +98,14 @@ def validate(self, obj): def error(self, validation_error): """ - Register an error found during the validation process + Registers an error found during the validation process. """ self.errors.append(validation_error) def __getitem__(self, obj): - """return a list of the errors for a certain object""" + """ + Return a list of the errors for a certain object. + """ errors = [] for err in self.errors: if err.obj is obj: @@ -99,7 +117,11 @@ def __getitem__(self, obj): # validation rules def section_type_must_be_defined(sec): - """test that no section has an undefined type""" + """ + Tests that no Section has an undefined type. + + :param sec: odml.Section. + """ if sec.type is None or sec.type == '' or sec.type == 'undefined': yield ValidationError(sec, 'Section type undefined', LABEL_WARNING) @@ -114,23 +136,20 @@ def section_repository_present(sec): """ repo = sec.get_repository() if repo is None: - yield ValidationError(sec, - 'A section should have an associated repository', - LABEL_WARNING) + msg = "A section should have an associated repository" + yield ValidationError(sec, msg, LABEL_WARNING) return try: tsec = sec.get_terminology_equivalent() except Exception as exc: - yield ValidationError(sec, - 'Could not load terminology: %s' % exc, - LABEL_WARNING) + msg = "Could not load terminology: %s" % exc + yield ValidationError(sec, msg, LABEL_WARNING) return if tsec is None: - yield ValidationError(sec, - "Section type '%s' not found in terminology" % sec.type, - LABEL_WARNING) + msg = "Section type '%s' not found in terminology" % sec.type + yield ValidationError(sec, msg, LABEL_WARNING) Validation.register_handler('section', section_repository_present) @@ -141,8 +160,7 @@ def document_unique_ids(doc): Traverse an odML Document and check whether all assigned ids are unique within the document. - Yields all duplicate odML object id entries - that are encountered. + Yields all duplicate odML object id entries that are encountered. :param doc: odML document """ @@ -156,12 +174,10 @@ def section_unique_ids(parent, id_map=None): Traverse a parent (odML Document or Section) and check whether all assigned ids are unique. - A "id":"odML object / path" dictionary of additional - 'to-be-excluded' ids may be handed in via the - *id_map* attribute. + A "id":"odML object / path" dictionary of additional 'to-be-excluded' ids may be + handed in via the *id_map* attribute. - Yields all duplicate odML object id entries - that are encountered. + Yields all duplicate odML object id entries that are encountered. :param parent: odML Document or Section :param id_map: "id":"odML object / path" dictionary @@ -174,8 +190,8 @@ def section_unique_ids(parent, id_map=None): yield i if sec.id in id_map: - yield ValidationError(sec, "Duplicate id in Section '%s' and %s" % - (sec.get_path(), id_map[sec.id])) + msg = "Duplicate id in Section '%s' and %s" % (sec.get_path(), id_map[sec.id]) + yield ValidationError(sec, msg) else: id_map[sec.id] = "Section '%s'" % sec.get_path() @@ -185,15 +201,12 @@ def section_unique_ids(parent, id_map=None): def property_unique_ids(section, id_map=None): """ - Check whether all ids assigned to the odML - Properties of an odML Section are unique. + Checks whether all ids assigned to the odML Properties of an odML Section are unique. - A "id":"odML object / path" dictionary of additional - 'to-be-excluded' ids may be handed in via the - *id_map* attribute. + A "id":"odML object / path" dictionary of additional 'to-be-excluded' ids may be + handed in via the *id_map* attribute. - Yields all duplicate odML object id entries - that are encountered. + Yields all duplicate odML object id entries that are encountered. :param section: odML Section :param id_map: "id":"odML object / path" dictionary @@ -203,8 +216,9 @@ def property_unique_ids(section, id_map=None): for prop in section.properties: if prop.id in id_map: - yield ValidationError(prop, "Duplicate id in Property '%s' and %s" % - (prop.get_path(), id_map[prop.id])) + msg = "Duplicate id in Property '%s' and %s" % (prop.get_path(), + id_map[prop.id]) + yield ValidationError(prop, msg) else: id_map[prop.id] = "Property '%s'" % prop.get_path() @@ -215,13 +229,13 @@ def property_unique_ids(section, id_map=None): def object_unique_names(obj, children, attr=lambda x: x.name, msg="Object names must be unique"): """ - Test that object names within one section are unique - - *attr* is a function, that returns the item that needs to be unique + Tests that object names within one Section are unique. - *children* is a function, that returns the children to be - considered. This is to be able to use the same function - for sections and properties + :param obj: odml class instance the validation is applied on. + :param children: a function that returns the children to be considered. + This is to be able to use the same function for sections and properties. + :param attr: a function that returns the item that needs to be unique + :param msg: error message that will be registered upon a ValidationError. """ names = set(map(attr, children(obj))) if len(names) == len(children(obj)): @@ -234,6 +248,11 @@ def object_unique_names(obj, children, attr=lambda x: x.name, def section_unique_name_type(obj): + """ + Tests that the values of names and types within the scope of a Section are unique. + + :param obj: odml class instance the validation is applied on. + """ for i in object_unique_names( obj, attr=lambda x: (x.name, x.type), @@ -243,6 +262,11 @@ def section_unique_name_type(obj): def property_unique_names(obj): + """ + Tests that the values of Property names within the scope of a Section are unique. + + :param obj: odml class instance the validation is applied on. + """ for i in object_unique_names(obj, lambda x: x.properties): yield i @@ -254,11 +278,9 @@ def property_unique_names(obj): def property_terminology_check(prop): """ - Executes a couple of checks: - - 1. warn, if there are properties that do not occur in the terminology + 1. warn, if there are properties that do not occur in the terminology. 2. warn, if there are multiple values with different units or the unit does - not match the one in the terminology + not match the one in the terminology. """ tsec = prop.parent.get_terminology_equivalent() if tsec is None: @@ -266,9 +288,8 @@ def property_terminology_check(prop): try: tsec.properties[prop.name] except KeyError: - yield ValidationError(prop, - "Property '%s' not found in terminology" % prop.name, - LABEL_WARNING) + msg = "Property '%s' not found in terminology" % prop.name + yield ValidationError(prop, msg, LABEL_WARNING) Validation.register_handler('property', property_terminology_check) @@ -276,8 +297,8 @@ def property_terminology_check(prop): def property_dependency_check(prop): """ - Warn, if the dependency attribute refers to a non-existent attribute - or the dependency_value does not match + Produces a warning if the dependency attribute refers to a non-existent attribute + or the dependency_value does not match. """ dep = prop.dependency if dep is None: @@ -286,14 +307,13 @@ def property_dependency_check(prop): try: dep_obj = prop.parent[dep] except KeyError: - yield ValidationError(prop, - "Property refers to a non-existent dependency object", - LABEL_WARNING) + msg = "Property refers to a non-existent dependency object" + yield ValidationError(prop, msg, LABEL_WARNING) return if prop.dependency_value not in dep_obj.values[0]: - yield ValidationError(prop, "Dependency-value is not equal to value of" - " the property's dependency", LABEL_WARNING) + msg = "Dependency-value is not equal to value of the property's dependency" + yield ValidationError(prop, msg, LABEL_WARNING) Validation.register_handler('property', property_dependency_check) diff --git a/test/test_dumper.py b/test/test_dumper.py index 2a326ad4..da879980 100644 --- a/test/test_dumper.py +++ b/test/test_dumper.py @@ -38,7 +38,7 @@ def setUp(self): def test_dump_doc(self): # This test dumps the whole document and checks it word by word. # If possible, maybe some better way of testing this ? - odml.tools.dumper.dumpDoc(self.doc) + odml.tools.dumper.dump_doc(self.doc) output = [x.strip() for x in self.captured_stdout.getvalue().split('\n') if x] expected_output = [] expected_output.append("*Cell ()") diff --git a/test/test_links.py b/test/test_links.py index 0b75517d..64beeea0 100644 --- a/test/test_links.py +++ b/test/test_links.py @@ -6,7 +6,7 @@ class TestLinks(unittest.TestCase): def setUp(self): self.doc = samplefile.SampleFileCreator().create_document() - # for s in self.doc: xmlparser.dumpSection(s) + # for s in self.doc: xmlparser.dump_section(s) def test_link_creation(self): obj = self.doc.sections[0].sections[0] @@ -28,14 +28,14 @@ def no_test_circles(self): obj = self.doc.sections[0].sections[0] dst = self.doc.sections[0] - samplefile.dumper.dumpSection(dst) + samplefile.dumper.dump_section(dst) obj.link = "/sec 0" # self.assertEqual(obj.sections, dst.sections) # this will FAIL # self.assertEqual(obj.properties, dst.properties) obj.clean() - samplefile.dumper.dumpSection(dst) + samplefile.dumper.dump_section(dst) def test_merge(self): obj = self.doc.sections[0].sections[0] # must be an empty section diff --git a/test/test_samplefile.py b/test/test_samplefile.py index 6fc2d25b..ffa95133 100644 --- a/test/test_samplefile.py +++ b/test/test_samplefile.py @@ -104,7 +104,7 @@ class SampleFileCreatorTest(unittest.TestCase): def test_samplefile(self): doc = SampleFileCreator().create_document() - # dumper.dumpDoc(doc) + # dumper.dump_doc(doc) class SampleFileOperationTest(unittest.TestCase): @@ -181,10 +181,10 @@ def test_restore(self): # self.assertEqual(a, b) # print "A ---------------------------------" # for sec in doc.sections: -# xmlparser.dumpSection(sec) +# xmlparser.dump_section(sec) # print "B ---------------------------------" # for sec in self.doc.sections: -# xmlparser.dumpSection(sec) +# xmlparser.dump_section(sec) # print "-----------------------------------"