From 9c469b59015bfc37543420d8ea85758bb13a64ee Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Wed, 16 Oct 2024 10:20:38 +1000 Subject: [PATCH] Redo XSD Datetime, Date, Time, Duration parser and serializers (#2929) * New xsd_datetime module, with parsers and serializers for XSD_Duration, XSD_Date, XSD_DatetTime, XSD_Time, XSD_gYear, XSD_gYearMonth. Based on isoformat for Python <3.11, and builtin fromisoformat for Python 3.11+ * ruff fixes in test suite changes * Fix a failing test * Add missing exports to xsd_datetime * Fix some version constraints to help CI tests pass * Fix generating negative duartion strings. This fixes the broken doctest. * Fix black formatting in xsd_datetime _again_. * Add isodate back into the dockerfile requirements so that can still build correctly with RDFLib v7.0 * correctly calculate total years in Duration constructor. * Fix some docstring generation errors * For documentation-generation reasons, don't re-export builtin parsers as xsd parsers. * Add ashleysommer to contributors list on the xsd_datetime module. * Fix wording in xsd_datetime header. --------- Co-authored-by: Nicholas Car --- devtools/constraints.min | 4 +- docker/latest/requirements.in | 6 +- docker/latest/requirements.txt | 8 +- docs/rdf_terms.rst | 3 +- poetry.lock | 25 +- pyproject.toml | 2 +- rdflib/plugins/sparql/operators.py | 19 +- rdflib/plugins/sparql/sparql.py | 4 +- rdflib/term.py | 32 +- rdflib/xsd_datetime.py | 677 +++++++++++++++++++ test/test_literal/test_datetime.py | 17 +- test/test_literal/test_duration.py | 9 +- test/test_literal/test_literal.py | 30 +- test/test_sparql/test_datetime_processing.py | 4 +- test/test_sparql/test_functions.py | 2 +- 15 files changed, 752 insertions(+), 90 deletions(-) create mode 100644 rdflib/xsd_datetime.py diff --git a/devtools/constraints.min b/devtools/constraints.min index 0034ea304..66deec089 100644 --- a/devtools/constraints.min +++ b/devtools/constraints.min @@ -1,11 +1,11 @@ # This file selects minimum versions to ensure that the test suite passes on # these versions. The file's extension (`.min`) is chosen to evade Dependabot # which operates on `*.{txt,in}` files. -isodate==0.6.0 +isodate==0.7.2; python_version < "3.11" pyparsing==2.1.0 importlib-metadata==4.0.0 berkeleydb==18.1.2 networkx==2.0 -html5lib==1.0.1 +html5lib-modern==1.2.0 lxml==4.3.0 orjson==3.9.14 diff --git a/docker/latest/requirements.in b/docker/latest/requirements.in index 99c4ce22d..4083467e9 100644 --- a/docker/latest/requirements.in +++ b/docker/latest/requirements.in @@ -1,4 +1,6 @@ -# This file is used for building a docker image of hte latest rdflib release. It +# This file is used for building a docker image of the latest rdflib release. It # will be updated by dependabot when new releases are made. rdflib==7.0.0 -html5lib +html5lib-modern==1.2.0 +# isodate is required to allow the Dockerfile to build on with pre-RDFLib-7.1 releases. +isodate==0.7.2 diff --git a/docker/latest/requirements.txt b/docker/latest/requirements.txt index 80c3106b0..31ad73154 100644 --- a/docker/latest/requirements.txt +++ b/docker/latest/requirements.txt @@ -6,12 +6,12 @@ # html5lib-modern==1.2 # via -r docker/latest/requirements.in -isodate==0.6.1 +isodate==0.7.2; python_version < "3.11" # via rdflib pyparsing==3.0.9 # via rdflib rdflib==7.0.0 # via -r docker/latest/requirements.in -six==1.16.0 - # via - # isodate +# isodate is required to allow the Dockerfile to build on with pre-RDFLib-7.1 releases. +isodate==0.7.2 + # via -r docker/latest/requirements.in diff --git a/docs/rdf_terms.rst b/docs/rdf_terms.rst index b44b0a584..f83127da8 100644 --- a/docs/rdf_terms.rst +++ b/docs/rdf_terms.rst @@ -207,7 +207,8 @@ rdf:HTML :class:`xml.dom.minidom.DocumentFragment` .. [#f1] plain literals map directly to value space .. [#f2] Date, time and datetime literals are mapped to Python - instances using the `isodate `_ + instances using the RDFlib xsd_datetime module, that is based + on the `isodate `_ package). .. [#f3] this is a bit dirty - by accident the ``html5lib`` parser diff --git a/poetry.lock b/poetry.lock index eca17dbac..6cc708b96 100644 --- a/poetry.lock +++ b/poetry.lock @@ -384,18 +384,15 @@ files = [ [[package]] name = "isodate" -version = "0.6.1" +version = "0.7.2" description = "An ISO 8601 date/time/duration parser and formatter" optional = false -python-versions = "*" +python-versions = ">=3.7" files = [ - {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, - {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, + {file = "isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15"}, + {file = "isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6"}, ] -[package.dependencies] -six = "*" - [[package]] name = "jinja2" version = "3.1.2" @@ -1039,7 +1036,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -1138,17 +1134,6 @@ core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.te doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.11.*)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (<0.4)", "pytest-ruff (>=0.2.1)", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] - [[package]] name = "snowballstemmer" version = "2.2.0" @@ -1406,4 +1391,4 @@ orjson = ["orjson"] [metadata] lock-version = "2.0" python-versions = "^3.8.1" -content-hash = "8ad16d001c8cbd7ecd6516ee5997432868618f4dc31e89d646a54a065919269f" +content-hash = "9a2198d4a9d403531f401138a609d2c5d38899b56f4ec0af7de3b606d1e1e62e" diff --git a/pyproject.toml b/pyproject.toml index 62a0085b1..e4b431df1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ rdfgraphisomorphism = 'rdflib.tools.graphisomorphism:main' [tool.poetry.dependencies] python = "^3.8.1" -isodate = "^0.6.0" +isodate = {version=">=0.7.2,<1.0.0", python = "<3.11"} pyparsing = ">=2.1.0,<4" berkeleydb = {version = "^18.1.0", optional = true} networkx = {version = ">=2,<4", optional = true} diff --git a/rdflib/plugins/sparql/operators.py b/rdflib/plugins/sparql/operators.py index 93f748220..e4d19f664 100644 --- a/rdflib/plugins/sparql/operators.py +++ b/rdflib/plugins/sparql/operators.py @@ -21,7 +21,6 @@ from typing import Any, Callable, Dict, NoReturn, Optional, Tuple, Union, overload from urllib.parse import quote -import isodate from pyparsing import ParseResults from rdflib.namespace import RDF, XSD @@ -47,6 +46,7 @@ URIRef, Variable, ) +from rdflib.xsd_datetime import Duration, parse_datetime # type: ignore[attr-defined] def Builtin_IRI(expr: Expr, ctx: FrozenBindings) -> URIRef: @@ -521,8 +521,13 @@ def Builtin_TZ(e: Expr, ctx) -> Literal: if not d.tzinfo: return Literal("") n = d.tzinfo.tzname(d) - if n == "UTC": + if n is None: + n = "" + elif n == "UTC": n = "Z" + elif n.startswith("UTC"): + # Replace tzname like "UTC-05:00" with simply "-05:00" to match Jena tz fn + n = n[3:] return Literal(n) @@ -687,7 +692,7 @@ def default_cast(e: Expr, ctx: FrozenBindings) -> Literal: # type: ignore[retur if x.datatype and x.datatype not in (XSD.dateTime, XSD.string): raise SPARQLError("Cannot cast %r to XSD:dateTime" % x.datatype) try: - return Literal(isodate.parse_datetime(x), datatype=e.iri) + return Literal(parse_datetime(x), datatype=e.iri) except: # noqa: E722 raise SPARQLError("Cannot interpret '%r' as datetime" % x) @@ -1085,7 +1090,7 @@ def dateTimeObjects(expr: Literal) -> Any: def isCompatibleDateTimeDatatype( # type: ignore[return] obj1: Union[py_datetime.date, py_datetime.datetime], dt1: URIRef, - obj2: Union[isodate.Duration, py_datetime.timedelta], + obj2: Union[Duration, py_datetime.timedelta], dt2: URIRef, ) -> bool: """ @@ -1098,7 +1103,7 @@ def isCompatibleDateTimeDatatype( # type: ignore[return] return True elif dt2 == XSD.dayTimeDuration or dt2 == XSD.Duration: # checking if the dayTimeDuration has no Time Component - # else it wont be compatible with Date Literal + # else it won't be compatible with Date Literal if "T" in str(obj2): return False else: @@ -1110,7 +1115,7 @@ def isCompatibleDateTimeDatatype( # type: ignore[return] elif dt2 == XSD.dayTimeDuration or dt2 == XSD.Duration: # checking if the dayTimeDuration has no Date Component # (by checking if the format is "PT...." ) - # else it wont be compatible with Time Literal + # else it won't be compatible with Time Literal if "T" == str(obj2)[1]: return True else: @@ -1139,7 +1144,7 @@ def calculateDuration( def calculateFinalDateTime( obj1: Union[py_datetime.date, py_datetime.datetime], dt1: URIRef, - obj2: Union[isodate.Duration, py_datetime.timedelta], + obj2: Union[Duration, py_datetime.timedelta], dt2: URIRef, operation: str, ) -> Literal: diff --git a/rdflib/plugins/sparql/sparql.py b/rdflib/plugins/sparql/sparql.py index 235e2dc37..8249a0ee8 100644 --- a/rdflib/plugins/sparql/sparql.py +++ b/rdflib/plugins/sparql/sparql.py @@ -19,8 +19,6 @@ Union, ) -import isodate - import rdflib.plugins.sparql from rdflib.graph import ConjunctiveGraph, Dataset, Graph from rdflib.namespace import NamespaceManager @@ -302,7 +300,7 @@ def __init__( @property def now(self) -> datetime.datetime: if self._now is None: - self._now = datetime.datetime.now(isodate.tzinfo.UTC) + self._now = datetime.datetime.now(datetime.timezone.utc) return self._now def clone( diff --git a/rdflib/term.py b/rdflib/term.py index 0a5f7c52c..bdc0e9732 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -38,7 +38,6 @@ "Literal", "Variable", ] - import logging import math import warnings @@ -67,19 +66,22 @@ from uuid import uuid4 import html5lib -from isodate import ( + +import rdflib +import rdflib.util +from rdflib.compat import long_type + +from .xsd_datetime import ( # type: ignore[attr-defined] Duration, duration_isoformat, - parse_date, parse_datetime, - parse_duration, parse_time, + parse_xsd_date, + parse_xsd_duration, + parse_xsd_gyear, + parse_xsd_gyearmonth, ) -import rdflib -import rdflib.util -from rdflib.compat import long_type - if TYPE_CHECKING: from .namespace import NamespaceManager from .paths import AlternativePath, InvPath, NegatedPath, Path, SequencePath @@ -1424,7 +1426,7 @@ def eq(self, other: Any) -> bool: ): return self.value == other # NOTE for type ignore: bool is a subclass of int so this won't ever run. - elif isinstance(other, bool): # type: ignore[unreachable] + elif isinstance(other, bool): # type: ignore[unreachable, unused-ignore] if self.datatype == _XSD_BOOLEAN: return self.value == other @@ -2030,13 +2032,13 @@ def _castPythonToLiteral( # noqa: N802 XSDToPython: Dict[Optional[str], Optional[Callable[[str], Any]]] = { None: None, # plain literals map directly to value space URIRef(_XSD_PFX + "time"): parse_time, - URIRef(_XSD_PFX + "date"): parse_date, - URIRef(_XSD_PFX + "gYear"): parse_date, - URIRef(_XSD_PFX + "gYearMonth"): parse_date, + URIRef(_XSD_PFX + "date"): parse_xsd_date, + URIRef(_XSD_PFX + "gYear"): parse_xsd_gyear, + URIRef(_XSD_PFX + "gYearMonth"): parse_xsd_gyearmonth, URIRef(_XSD_PFX + "dateTime"): parse_datetime, - URIRef(_XSD_PFX + "duration"): parse_duration, - URIRef(_XSD_PFX + "dayTimeDuration"): parse_duration, - URIRef(_XSD_PFX + "yearMonthDuration"): parse_duration, + URIRef(_XSD_PFX + "duration"): parse_xsd_duration, + URIRef(_XSD_PFX + "dayTimeDuration"): parse_xsd_duration, + URIRef(_XSD_PFX + "yearMonthDuration"): parse_xsd_duration, URIRef(_XSD_PFX + "hexBinary"): _unhexlify, URIRef(_XSD_PFX + "string"): None, URIRef(_XSD_PFX + "normalizedString"): None, diff --git a/rdflib/xsd_datetime.py b/rdflib/xsd_datetime.py new file mode 100644 index 000000000..bc3bebd67 --- /dev/null +++ b/rdflib/xsd_datetime.py @@ -0,0 +1,677 @@ +""" +Large parts of this module are taken from the ``isodate`` package. +https://pypi.org/project/isodate/ +Modifications are made to isodate features to allow compatibility with +XSD dates and durations that are not necessarily valid ISO8601 strings. + +Copyright (c) 2024, Ashley Sommer, and RDFLib contributors +Copyright (c) 2021, Hugo van Kemenade and contributors +Copyright (c) 2009-2018, Gerhard Weis and contributors +Copyright (c) 2009, Gerhard Weis +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +- Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of the nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +from __future__ import annotations + +import re +import sys +from datetime import date, datetime, time, timedelta +from decimal import ROUND_FLOOR, Decimal +from typing import List, Tuple, Union, cast + +if sys.version_info[:3] < (3, 11, 0): + from isodate import parse_date, parse_datetime, parse_time +else: + # On python 3.11, use the built-in parsers + parse_date = date.fromisoformat + parse_datetime = datetime.fromisoformat + parse_time = time.fromisoformat + + +def fquotmod( + val: Decimal, low: Union[Decimal, int], high: Union[Decimal, int] +) -> Tuple[int, Decimal]: + """ + A divmod function with boundaries. + + """ + # assumes that all the maths is done with Decimals. + # divmod for Decimal uses truncate instead of floor as builtin + # divmod, so we have to do it manually here. + a: Decimal = val - low + b: Union[Decimal, int] = high - low + div: Decimal = (a / b).to_integral(ROUND_FLOOR) + mod: Decimal = a - div * b + # if we were not using Decimal, it would look like this. + # div, mod = divmod(val - low, high - low) + mod += low + return int(div), mod + + +def max_days_in_month(year: int, month: int) -> int: + """ + Determines the number of days of a specific month in a specific year. + """ + if month in (1, 3, 5, 7, 8, 10, 12): + return 31 + if month in (4, 6, 9, 11): + return 30 + if month < 1 or month > 12: + raise ValueError("Month must be in 1..12") + # Month is February + if ((year % 400) == 0) or ((year % 100) != 0) and ((year % 4) == 0): + return 29 + return 28 + + +class Duration: + """ + A class which represents a duration. + + The difference to datetime.timedelta is, that this class handles also + differences given in years and months. + A Duration treats differences given in year, months separately from all + other components. + + A Duration can be used almost like any timedelta object, however there + are some restrictions: + - It is not really possible to compare Durations, because it is unclear, + whether a duration of 1 year is bigger than 365 days or not. + - Equality is only tested between the two (year, month vs. timedelta) + basic components. + + A Duration can also be converted into a datetime object, but this requires + a start date or an end date. + + The algorithm to add a duration to a date is defined at + http://www.w3.org/TR/xmlschema-2/#adding-durations-to-dateTimes + """ + + def __init__( + self, + days: float = 0, + seconds: float = 0, + microseconds: float = 0, + milliseconds: float = 0, + minutes: float = 0, + hours: float = 0, + weeks: float = 0, + months: Union[Decimal, float, int, str] = 0, + years: Union[Decimal, float, int, str] = 0, + ): + """ + Initialise this Duration instance with the given parameters. + """ + if not isinstance(months, Decimal): + months = Decimal(str(months)) + if not isinstance(years, Decimal): + years = Decimal(str(years)) + new_years, months = fquotmod(months, 0, 12) + self.months = months + self.years = Decimal(years + new_years) + self.tdelta = timedelta( + days, seconds, microseconds, milliseconds, minutes, hours, weeks + ) + if self.years < 0 and self.tdelta.days < 0: + raise ValueError("Duration cannot have negative years and negative days") + + def __getstate__(self): + return self.__dict__ + + def __setstate__(self, state): + self.__dict__.update(state) + + def __getattr__(self, name): + """ + Provide direct access to attributes of included timedelta instance. + """ + return getattr(self.tdelta, name) + + def __str__(self): + """ + Return a string representation of this duration similar to timedelta. + """ + params = [] + if self.years: + params.append("%d years" % self.years) + if self.months: + fmt = "%d months" + if self.months <= 1: + fmt = "%d month" + params.append(fmt % self.months) + params.append(str(self.tdelta)) + return ", ".join(params) + + def __repr__(self): + """ + Return a string suitable for repr(x) calls. + """ + return "%s.%s(%d, %d, %d, years=%s, months=%s)" % ( + self.__class__.__module__, + self.__class__.__name__, + self.tdelta.days, + self.tdelta.seconds, + self.tdelta.microseconds, + str(self.years), + str(self.months), + ) + + def __hash__(self): + """ + Return a hash of this instance so that it can be used in, for + example, dicts and sets. + """ + return hash((self.tdelta, self.months, self.years)) + + def __neg__(self): + """ + A simple unary minus. + + Returns a new Duration instance with all it's negated. + """ + negduration = Duration(years=-self.years, months=-self.months) + negduration.tdelta = -self.tdelta + return negduration + + def __add__(self, other: Union[Duration, timedelta, date, datetime]): + """ + Durations can be added with Duration, timedelta, date and datetime + objects. + """ + if isinstance(other, Duration): + newduration = Duration( + years=self.years + other.years, months=self.months + other.months + ) + newduration.tdelta = self.tdelta + other.tdelta + return newduration + elif isinstance(other, timedelta): + newduration = Duration(years=self.years, months=self.months) + newduration.tdelta = self.tdelta + other + return newduration + try: + # try anything that looks like a date or datetime + # 'other' has attributes year, month, day + # and relies on 'timedelta + other' being implemented + if not (float(self.years).is_integer() and float(self.months).is_integer()): + raise ValueError( + "fractional years or months not supported for date calculations" + ) + newmonth: Decimal = Decimal(other.month) + self.months + carry, newmonth = fquotmod(newmonth, 1, 13) + newyear: int = other.year + int(self.years) + carry + maxdays: int = max_days_in_month(newyear, int(newmonth)) + newday: Union[int, float] + if other.day > maxdays: + newday = maxdays + else: + newday = other.day + newdt = other.replace(year=newyear, month=int(newmonth), day=newday) + # does a timedelta + date/datetime + return self.tdelta + newdt + except AttributeError: + # other probably was not a date/datetime compatible object + pass + # we have tried everything .... return a NotImplemented + return NotImplemented + + __radd__ = __add__ + + def __mul__(self, other): + if isinstance(other, int): + newduration = Duration(years=self.years * other, months=self.months * other) + newduration.tdelta = self.tdelta * other + return newduration + return NotImplemented + + __rmul__ = __mul__ + + def __sub__(self, other: Union[Duration, timedelta]): + """ + It is possible to subtract Duration and timedelta objects from Duration + objects. + """ + if isinstance(other, Duration): + newduration = Duration( + years=self.years - other.years, months=self.months - other.months + ) + newduration.tdelta = self.tdelta - other.tdelta + return newduration + try: + # do maths with our timedelta object .... + newduration = Duration(years=self.years, months=self.months) + newduration.tdelta = self.tdelta - other + return newduration + except TypeError: + # looks like timedelta - other is not implemented + pass + return NotImplemented + + def __rsub__(self, other: Union[timedelta, date, datetime]): + """ + It is possible to subtract Duration objects from date, datetime and + timedelta objects. + """ + # TODO: there is some weird behaviour in date - timedelta ... + # if timedelta has seconds or microseconds set, then + # date - timedelta != date + (-timedelta) + # for now we follow this behaviour to avoid surprises when mixing + # timedeltas with Durations, but in case this ever changes in + # the stdlib we can just do: + # return -self + other + # instead of all the current code + + if isinstance(other, timedelta): + tmpdur = Duration() + tmpdur.tdelta = other + return tmpdur - self + try: + # check if other behaves like a date/datetime object + # does it have year, month, day and replace? + if not (float(self.years).is_integer() and float(self.months).is_integer()): + raise ValueError( + "fractional years or months not supported for date calculations" + ) + newmonth: Decimal = Decimal(other.month) - self.months + carry, newmonth = fquotmod(newmonth, 1, 13) + newyear: int = other.year - int(self.years) + carry + maxdays: int = max_days_in_month(newyear, int(newmonth)) + newday: Union[int, float] + if other.day > maxdays: + newday = maxdays + else: + newday = other.day + newdt = other.replace(year=newyear, month=int(newmonth), day=newday) + return newdt - self.tdelta + except AttributeError: + # other probably was not compatible with data/datetime + pass + return NotImplemented + + def __eq__(self, other): + """ + If the years, month part and the timedelta part are both equal, then + the two Durations are considered equal. + """ + if isinstance(other, Duration): + if (self.years * 12 + self.months) == ( + other.years * 12 + other.months + ) and self.tdelta == other.tdelta: + return True + return False + # check if other con be compared against timedelta object + # will raise an AssertionError when optimisation is off + if self.years == 0 and self.months == 0: + return self.tdelta == other + return False + + def __ne__(self, other): + """ + If the years, month part or the timedelta part is not equal, then + the two Durations are considered not equal. + """ + if isinstance(other, Duration): + if (self.years * 12 + self.months) != ( + other.years * 12 + other.months + ) or self.tdelta != other.tdelta: + return True + return False + # check if other can be compared against timedelta object + # will raise an AssertionError when optimisation is off + if self.years == 0 and self.months == 0: + return self.tdelta != other + return True + + def totimedelta(self, start=None, end=None): + """ + Convert this duration into a timedelta object. + + This method requires a start datetime or end datetime, but raises + an exception if both are given. + """ + if start is None and end is None: + raise ValueError("start or end required") + if start is not None and end is not None: + raise ValueError("only start or end allowed") + if start is not None: + return (start + self) - start + return end - (end - self) + + +ISO8601_PERIOD_REGEX = re.compile( + r"^(?P[+-])?" + r"P(?!\b)" + r"(?P[0-9]+([,.][0-9]+)?Y)?" + r"(?P[0-9]+([,.][0-9]+)?M)?" + r"(?P[0-9]+([,.][0-9]+)?W)?" + r"(?P[0-9]+([,.][0-9]+)?D)?" + r"((?PT)(?P[0-9]+([,.][0-9]+)?H)?" + r"(?P[0-9]+([,.][0-9]+)?M)?" + r"(?P[0-9]+([,.][0-9]+)?S)?)?$" +) +# regular expression to parse ISO duration strings. + + +def parse_xsd_duration( + dur_string: str, as_timedelta_if_possible: bool = True +) -> Union[Duration, timedelta]: + """ + Parses an ISO 8601 durations into datetime.timedelta or Duration objects. + + If the ISO date string does not contain years or months, a timedelta + instance is returned, else a Duration instance is returned. + + The following duration formats are supported: + -``PnnW`` duration in weeks + -``PnnYnnMnnDTnnHnnMnnS`` complete duration specification + -``PYYYYMMDDThhmmss`` basic alternative complete date format + -``PYYYY-MM-DDThh:mm:ss`` extended alternative complete date format + -``PYYYYDDDThhmmss`` basic alternative ordinal date format + -``PYYYY-DDDThh:mm:ss`` extended alternative ordinal date format + + The '-' is optional. + + Limitations: ISO standard defines some restrictions about where to use + fractional numbers and which component and format combinations are + allowed. This parser implementation ignores all those restrictions and + returns something when it is able to find all necessary components. + In detail: + - it does not check, whether only the last component has fractions. + - it allows weeks specified with all other combinations + The alternative format does not support durations with years, months or + days set to 0. + """ + if not isinstance(dur_string, str): + raise TypeError(f"Expecting a string: {dur_string!r}") + match = ISO8601_PERIOD_REGEX.match(dur_string) + if not match: + # try alternative format: + if dur_string.startswith("P"): + durdt = parse_datetime(dur_string[1:]) + if as_timedelta_if_possible and durdt.year == 0 and durdt.month == 0: + # FIXME: currently not possible in alternative format + # create timedelta + return timedelta( + days=durdt.day, + seconds=durdt.second, + microseconds=durdt.microsecond, + minutes=durdt.minute, + hours=durdt.hour, + ) + else: + # create Duration + return Duration( + days=durdt.day, + seconds=durdt.second, + microseconds=durdt.microsecond, + minutes=durdt.minute, + hours=durdt.hour, + months=durdt.month, + years=durdt.year, + ) + raise ValueError("Unable to parse duration string " + dur_string) + groups = match.groupdict() + for key, val in groups.items(): + if key not in ("separator", "sign"): + if val is None: + groups[key] = "0n" + # print groups[key] + if key in ("years", "months"): + groups[key] = Decimal(groups[key][:-1].replace(",", ".")) + else: + # these values are passed into a timedelta object, + # which works with floats. + groups[key] = float(groups[key][:-1].replace(",", ".")) + ret: Union[Duration, timedelta] + if as_timedelta_if_possible and groups["years"] == 0 and groups["months"] == 0: + ret = timedelta( + days=groups["days"], # type: ignore[arg-type] + hours=groups["hours"], # type: ignore[arg-type] + minutes=groups["minutes"], # type: ignore[arg-type] + seconds=groups["seconds"], # type: ignore[arg-type] + weeks=groups["weeks"], # type: ignore[arg-type] + ) + if groups["sign"] == "-": + ret = timedelta(0) - ret + else: + ret = Duration( + years=cast(Decimal, groups["years"]), + months=cast(Decimal, groups["months"]), + days=groups["days"], # type: ignore[arg-type] + hours=groups["hours"], # type: ignore[arg-type] + minutes=groups["minutes"], # type: ignore[arg-type] + seconds=groups["seconds"], # type: ignore[arg-type] + weeks=groups["weeks"], # type: ignore[arg-type] + ) + if groups["sign"] == "-": + ret = Duration(0) - ret + + return ret + + +def duration_isoformat(tdt: Union[Duration, timedelta], in_weeks: bool = False) -> str: + if not in_weeks: + ret: List[str] = [] + minus = False + has_year_or_month = False + if isinstance(tdt, Duration): + if tdt.years == 0 and tdt.months == 0: + pass # don't do anything, we have no year or month + else: + has_year_or_month = True + months = tdt.years * 12 + tdt.months + if months < 0: + minus = True + months = abs(months) + # We can use divmod instead of fquotmod here because its month_count + # not month_index, and we don't have any negative months at this point. + new_years, new_months = divmod(months, 12) + if new_years: + ret.append(str(new_years) + "Y") + if tdt.months: + ret.append(str(new_months) + "M") + tdt = tdt.tdelta + usecs: int = ((tdt.days * 86400) + tdt.seconds) * 1000000 + tdt.microseconds + if usecs < 0: + if minus: + raise ValueError( + "Duration cannot have negative years and negative days" + ) + elif has_year_or_month: + raise ValueError( + "Duration cannot have positive years and months but negative days" + ) + minus = True + usecs = abs(usecs) + if usecs == 0: + # No delta parts other than years and months + pass + else: + seconds, usecs = divmod(usecs, 1000000) + minutes, seconds = divmod(seconds, 60) + hours, minutes = divmod(minutes, 60) + days, hours = divmod(hours, 24) + if days: + ret.append(str(days) + "D") + if hours or minutes or seconds or usecs: + ret.append("T") + if hours: + ret.append(str(hours) + "H") + if minutes: + ret.append(str(minutes) + "M") + if seconds or usecs: + if usecs: + ret.append(("%d.%06d" % (seconds, usecs)).rstrip("0")) + else: + ret.append("%d" % seconds) + ret.append("S") + if ret: + return ("-P" if minus else "P") + "".join(ret) + else: + # at least one component has to be there. + return "-P0D" if minus else "P0D" + else: + if tdt.days < 0: + return f"-P{abs(tdt.days // 7)}W" + return f"P{tdt.days // 7}W" + + +def xsd_datetime_isoformat(dt: datetime): + if dt.microsecond == 0: + no_tz_str = dt.strftime("%Y-%m-%dT%H:%M:%S") + else: + no_tz_str = dt.strftime("%Y-%m-%dT%H:%M:%S.%f") + if dt.tzinfo is None: + return no_tz_str + else: + offset_string = dt.strftime("%z") + if offset_string == "+0000": + return no_tz_str + "Z" + first_char = offset_string[0] + if first_char == "+" or first_char == "-": + offset_string = offset_string[1:] + sign = first_char + else: + sign = "+" + tz_part = sign + offset_string[:2] + ":" + offset_string[2:] + return no_tz_str + tz_part + + +def parse_xsd_date(date_string: str): + """ + XSD Dates have more features than ISO8601 dates, specifically + XSD allows timezones on dates, that must be stripped off. + Also, XSD requires dashed separators, while ISO8601 is optional. + RDFLib test suite has some date strings with times, the times are expected + to be dropped during parsing. + """ + if date_string.endswith("Z") or date_string.endswith("z"): + date_string = date_string[:-1] + if date_string.startswith("-"): + date_string = date_string[1:] + minus = True + else: + minus = False + if "T" in date_string: + # RDFLib test suite has some strange date strings, with times. + # this has the side effect of also dropping the + # TZ part, that is not wanted anyway for a date. + date_string = date_string.split("T")[0] + else: + has_plus = date_string.rfind("+") + if has_plus > 0: + # Drop the +07:00 timezone part + date_string = date_string[:has_plus] + else: + split_parts = date_string.rsplit("-", 1) + if len(split_parts) > 1 and ":" in split_parts[-1]: + # Drop the -09:00 timezone part + date_string = split_parts[0] + if "-" not in date_string: + raise ValueError("XSD Date string must contain at least two dashes") + return parse_date(date_string if not minus else ("-" + date_string)) + + +def parse_xsd_gyear(gyear_string: str): + """ + XSD gYear has more features than ISO8601 dates, specifically + XSD allows timezones on a gYear, that must be stripped off. + """ + if gyear_string.endswith("Z") or gyear_string.endswith("z"): + gyear_string = gyear_string[:-1] + if gyear_string.startswith("-"): + gyear_string = gyear_string[1:] + minus = True + else: + minus = False + has_plus = gyear_string.rfind("+") + if has_plus > 0: + # Drop the +07:00 timezone part + gyear_string = gyear_string[:has_plus] + else: + split_parts = gyear_string.rsplit("-", 1) + if len(split_parts) > 1 and ":" in split_parts[-1]: + # Drop the -09:00 timezone part + gyear_string = split_parts[0] + if len(gyear_string) < 4: + raise ValueError("gYear string must be at least 4 numerals in length") + gyear_string = gyear_string.lstrip("0") # strip all leading zeros + try: + y = int(gyear_string if not minus else ("-" + gyear_string)) + except ValueError: + raise ValueError("gYear string must be a valid integer") + return date(y, 1, 1) + + +def parse_xsd_gyearmonth(gym_string: str): + """ + XSD gYearMonth has more features than ISO8601 dates, specifically + XSD allows timezones on a gYearMonth, that must be stripped off. + """ + if gym_string.endswith("Z") or gym_string.endswith("z"): + gym_string = gym_string[:-1] + if gym_string.startswith("-"): + gym_string = gym_string[1:] + minus = True + else: + minus = False + has_plus = gym_string.rfind("+") + if has_plus > 0: + # Drop the +07:00 timezone part + gym_string = gym_string[:has_plus] + else: + split_parts = gym_string.rsplit("-", 1) + if len(split_parts) > 1 and ":" in split_parts[-1]: + # Drop the -09:00 timezone part + gym_string = split_parts[0] + year_month_parts = gym_string.split("-", 1) + if len(year_month_parts) < 2: + raise ValueError("XSD gYearMonth string must contain one dash") + + if len(year_month_parts[0]) < 4: + raise ValueError("gYearMonth Year part must be at least 4 numerals in length") + elif len(year_month_parts[1]) < 2: + raise ValueError("gYearMonth Month part must be exactly 2 numerals in length") + year_string = year_month_parts[0].lstrip("0") # strip all leading zeros + month_string = year_month_parts[1].lstrip("0") # strip all leading zeros + try: + y = int(year_string if not minus else ("-" + year_string)) + except ValueError: + raise ValueError("gYearMonth Year part must be a valid integer") + try: + m = int(month_string) + except ValueError: + raise ValueError("gYearMonth Month part must be a valid integer") + return date(y, m, 1) + + +# Parse XSD Datetime is the same as ISO8601 Datetime +# It uses datetime.fromisoformat for python 3.11 and above +# or isodate.parse_datetime for older versions +# parse_xsd_datetime = parse_datetime + +# Parse XSD Time is the same as ISO8601 Time +# It uses time.fromisoformat for python 3.11 and above +# or isodate.parse_time for older versions +# parse_xsd_time = parse_time diff --git a/test/test_literal/test_datetime.py b/test/test_literal/test_datetime.py index f97f22318..be1e9f2d8 100644 --- a/test/test_literal/test_datetime.py +++ b/test/test_literal/test_datetime.py @@ -1,10 +1,8 @@ -from datetime import datetime - -from isodate import UTC, datetime_isoformat -from isodate.isostrf import DATE_EXT_COMPLETE, TZ_EXT +from datetime import datetime, timezone from rdflib.namespace import XSD from rdflib.term import Literal, URIRef +from rdflib.xsd_datetime import xsd_datetime_isoformat class TestRelativeBase: @@ -43,12 +41,7 @@ def test_timezone_z(self): ) assert isinstance(l.toPython(), datetime) - assert ( - datetime_isoformat( - l.toPython(), DATE_EXT_COMPLETE + "T" + "%H:%M:%S.%f" + TZ_EXT - ) - == dt - ) + assert xsd_datetime_isoformat(l.toPython()) == dt assert l.toPython().isoformat() == "2008-12-01T18:02:00.522630+00:00" def test_timezone_offset(self): @@ -66,8 +59,8 @@ def test_timezone_offset_to_utc(self): dt, datatype=URIRef("http://www.w3.org/2001/XMLSchema#dateTime") ) - utc_dt = l.toPython().astimezone(UTC) - assert datetime_isoformat(utc_dt) == "2010-02-10T09:36:00Z" + utc_dt = l.toPython().astimezone(timezone.utc) + assert xsd_datetime_isoformat(utc_dt) == "2010-02-10T09:36:00Z" def test_timezone_offset_millisecond(self): dt = "2011-01-16T19:39:18.239743+01:00" diff --git a/test/test_literal/test_duration.py b/test/test_literal/test_duration.py index 8973e6b65..e8ebfea2d 100644 --- a/test/test_literal/test_duration.py +++ b/test/test_literal/test_duration.py @@ -1,26 +1,25 @@ from datetime import timedelta -from isodate import Duration, parse_duration - from rdflib.namespace import XSD from rdflib.term import Literal +from rdflib.xsd_datetime import Duration, parse_xsd_duration class TestDuration: def test_to_python_timedelta(self): l = Literal("P4DT5H6M7S", datatype=XSD.dayTimeDuration) # noqa: E741 assert isinstance(l.toPython(), timedelta) - assert l.toPython() == parse_duration("P4DT5H6M7S") + assert l.toPython() == parse_xsd_duration("P4DT5H6M7S") def test_to_python_ym_duration(self): l = Literal("P1Y2M", datatype=XSD.yearMonthDuration) # noqa: E741 assert isinstance(l.toPython(), Duration) - assert l.toPython() == parse_duration("P1Y2M") + assert l.toPython() == parse_xsd_duration("P1Y2M") def test_to_python_ymdhms_duration(self): l = Literal("P1Y2M4DT5H6M7S", datatype=XSD.duration) # noqa: E741 assert isinstance(l.toPython(), Duration) - assert l.toPython() == parse_duration("P1Y2M4DT5H6M7S") + assert l.toPython() == parse_xsd_duration("P1Y2M4DT5H6M7S") def test_equalityself(self): x = Literal("P1Y2M3W4DT5H6M7S", datatype=XSD.duration) diff --git a/test/test_literal/test_literal.py b/test/test_literal/test_literal.py index e31e02b26..bc2bc3b45 100644 --- a/test/test_literal/test_literal.py +++ b/test/test_literal/test_literal.py @@ -6,6 +6,7 @@ from decimal import Decimal from typing import Any, Callable, Generator, Optional, Type, Union +from rdflib.xsd_datetime import Duration from test.utils import affix_tuples from test.utils.literal import LiteralChecker, literal_idfn from test.utils.namespace import EGDC @@ -28,7 +29,6 @@ except ImportError: _HAVE_HTML5LIB = False -import isodate import pytest import rdflib # needed for eval(repr(...)) below @@ -397,10 +397,10 @@ def test_ill_typed_literals( Literal("3.2", datatype=_XSD_DOUBLE), ), ( - Literal(isodate.Duration(hours=1)), - Literal(isodate.Duration(hours=1)), + Literal(Duration(hours=1)), + Literal(Duration(hours=1)), "aplusb", - Literal(isodate.Duration(hours=2)), + Literal(Duration(hours=2)), ), ( Literal(datetime.timedelta(days=1)), @@ -410,19 +410,19 @@ def test_ill_typed_literals( ), ( Literal(datetime.time.fromisoformat("04:23:01.000384")), - Literal(isodate.Duration(hours=1)), + Literal(Duration(hours=1)), "aplusb", Literal("05:23:01.000384", datatype=XSD.time), ), ( Literal(datetime.date.fromisoformat("2011-11-04")), - Literal(isodate.Duration(days=1)), + Literal(Duration(days=1)), "aplusb", Literal("2011-11-05", datatype=XSD.date), ), ( Literal(datetime.datetime.fromisoformat("2011-11-04 00:05:23.283+00:00")), - Literal(isodate.Duration(days=1)), + Literal(Duration(days=1)), "aplusb", Literal("2011-11-05T00:05:23.283000+00:00", datatype=XSD.dateTime), ), @@ -446,19 +446,19 @@ def test_ill_typed_literals( ), ( Literal(datetime.time.fromisoformat("04:23:01.000384")), - Literal(isodate.Duration(hours=1)), + Literal(Duration(hours=1)), "aminusb", Literal("03:23:01.000384", datatype=XSD.time), ), ( Literal(datetime.date.fromisoformat("2011-11-04")), - Literal(isodate.Duration(days=1)), + Literal(Duration(days=1)), "aminusb", Literal("2011-11-03", datatype=XSD.date), ), ( Literal(datetime.datetime.fromisoformat("2011-11-04 00:05:23.283+00:00")), - Literal(isodate.Duration(days=1)), + Literal(Duration(days=1)), "aminusb", Literal("2011-11-03T00:05:23.283000+00:00", datatype=XSD.dateTime), ), @@ -578,7 +578,7 @@ def test_ill_typed_literals( ), *affix_tuples( ( - Literal(isodate.Duration(days=4)), + Literal(Duration(days=4)), Literal(datetime.timedelta(days=1)), ), [ @@ -599,12 +599,12 @@ def test_ill_typed_literals( ), *affix_tuples( ( - Literal(isodate.Duration(days=4)), - Literal(isodate.Duration(days=1)), + Literal(Duration(days=4)), + Literal(Duration(days=1)), ), [ - ("aplusb", Literal(isodate.Duration(days=5))), - ("aminusb", Literal(isodate.Duration(days=3))), + ("aplusb", Literal(Duration(days=5))), + ("aminusb", Literal(Duration(days=3))), ], None, ), diff --git a/test/test_sparql/test_datetime_processing.py b/test/test_sparql/test_datetime_processing.py index c934e9543..7fd0b2a6c 100644 --- a/test/test_sparql/test_datetime_processing.py +++ b/test/test_sparql/test_datetime_processing.py @@ -94,8 +94,8 @@ def test_datetime_duration_subs(): SELECT (?d - ?duration AS ?next_year) WHERE { VALUES (?duration ?d) { - ("P1Y"^^xsd:yearMonthDuration"2019-05-28T12:14:45Z"^^xsd:dateTime) - ("P1Y"^^xsd:yearMonthDuration"2019-05-28"^^xsd:date) + ("P1Y"^^xsd:yearMonthDuration "2019-05-28T12:14:45Z"^^xsd:dateTime) + ("P1Y"^^xsd:yearMonthDuration "2019-05-28"^^xsd:date) } } """ diff --git a/test/test_sparql/test_functions.py b/test/test_sparql/test_functions.py index fb544142c..77b43e820 100644 --- a/test/test_sparql/test_functions.py +++ b/test/test_sparql/test_functions.py @@ -167,7 +167,7 @@ def test_function(expression: str, expected_result: Identifier) -> None: if isinstance(expected_result, type): assert isinstance(actual_result, expected_result) else: - assert expected_result == actual_result + assert actual_result == expected_result @pytest.mark.parametrize(