diff --git a/README.rst b/README.rst index c5aeb62..7be2dda 100644 --- a/README.rst +++ b/README.rst @@ -37,11 +37,11 @@ To use # Derive Python date objects # lower and upper bounds that strictly adhere to the given range - >>> e.lower_strict(), e.upper_strict() - (datetime.date(1979, 8, 1), datetime.date(1979, 8, 31)) + >>> e.lower_strict()[:3], e.upper_strict()[:3] + ((1979, 8, 1), (1979, 8, 31)) # lower and upper bounds that are padded if there's indicated uncertainty - >>> e.lower_fuzzy(), e.upper_fuzzy() - (datetime.date(1979, 7, 1), datetime.date(1979, 9, 30)) + >>> e.lower_fuzzy()[:3], e.upper_fuzzy()[:3] + ((1979, 7, 1), (1979, 9, 30)) # Date intervals >>> interval = parse_edtf("1979-08~/open") @@ -50,9 +50,9 @@ To use # Intervals have lower and upper EDTF objects. >>> interval.lower, interval.upper (UncertainOrApproximate: '1979-08~', UncertainOrApproximate: 'open') - >>> interval.lower.upper_strict() - datetime.date(1979, 8, 31) - >>> interval.upper.lower_strict() #'open' is interpreted to mean 'still happening'. + >>> interval.lower.upper_strict()[:3] + (1979, 8, 31) + >>> interval.upper.lower_strict() # 'open' is interpreted to mean 'still happening'. [Today's date] # Date collections @@ -296,6 +296,31 @@ few different Python dates, depending on the circumstance. Generally, Python dates are used for sorting and filtering, and are not displayed directly to users. + +``struct_time`` date representation +----------------------------------- + +Because Python's ``datetime`` module does not support dates out side the range +1 AD to 9999 AD we return dates as `time.struct_time` objects by default +instead of the ``datetime.date`` or ``datetime.datetime`` objects you might +expect. + +The ``struct_time`` representation is more difficult to work with, but can be +sorted as-is which is the primary use-case, and can be converted relatively +easily to ``date`` or ``datetime`` objects (provided the year is within 1 to +9999 AD) or to date objects in more flexible libraries like +`astropy.time `_ +for years outside these bounds. + +If you are sure you are working with dates within the range supported by +Python's ``datetime`` module, you can get these more convenient objects using +the ``edtf.struct_time_to_date`` and ``edtf.struct_time_to_datetime`` +functions. + +NOTE: This library previously did return ``date`` and ``datetime`` objects +from methods by default before we switched to ``struct_time``. See ticket +``_. + ``lower_strict`` and ``upper_strict`` ------------------------------------- @@ -308,9 +333,21 @@ natural sort order. In a descending sort (most recent first), sort by ``upper_strict``:: >>> e = parse_edtf('1912-04~') - >>> e.lower_strict() + + >>> e.lower_strict() # Returns struct_time + >>> time.struct_time(tm_year=1912, tm_mon=4, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=0, tm_isdst=-1) + + >>> e.lower_strict()[:3] # Show only interesting parts of struct_time + (1912, 4, 01) + + >>> from edtf import struct_time_to_date + >>> struct_time_to_date(e.lower_strict()) # Convert to date datetime.date(1912, 4, 01) - >>> e.upper_strict() + + >>> e.upper_strict()[:3] + (1912, 4, 30) + + >>> struct_time_to_date(e.upper_strict()) datetime.date(1912, 4, 30) ``lower_fuzzy`` and ``upper_fuzzy`` @@ -330,16 +367,16 @@ is, if a date is approximate at the month scale, it is padded by a month. If it is approximate at the year scale, it is padded by a year:: >>> e = parse_edtf('1912-04~') - >>> e.lower_fuzzy() # padding is 100% of a month - datetime.date(1912, 3, 1) - >>> e.upper_fuzzy() - datetime.date(1912, 5, 30) + >>> e.lower_fuzzy()[:3] # padding is 100% of a month + (1912, 3, 1) + >>> e.upper_fuzzy()[:3] + (1912, 5, 30) >>> e = parse_edtf('1912~') - >>> e.lower_fuzzy() # padding is 100% of a year - datetime.date(1911, 1, 1) - >>> e.upper_fuzzy() - datetime.date(1913, 12, 31) + >>> e.lower_fuzzy()[:3] # padding is 100% of a year + (1911, 1, 1) + >>> e.upper_fuzzy()[:3] + (1913, 12, 31) One can interpret uncertain or approximate dates as 'plus or minus a [level of precision]'. @@ -347,16 +384,6 @@ One can interpret uncertain or approximate dates as 'plus or minus a If a date is both uncertain __and__ approximate, the padding is applied twice, i.e. it gets 100% * 2 padding, or 'plus or minus two [levels of precision]'. -Long years ----------- - -Since EDTF covers a much greater timespan than Python ``date`` objects, it is -easy to exceed the bounds of valid Python ``date``s. In this case, the returned -dates are clamped to ``date.MIN`` and ``date.MAX``. - -Future revisions will include numerical interpretations of dates for better -sortability. - Seasons ------- @@ -381,9 +408,17 @@ the ``natural_text_field`` parameter of your ``EDTFField``. When your model is saved, the ``natural_text_field`` value will be parsed to set the ``date_edtf`` value, and the underlying EDTF object will set the -``_earliest`` and ``_latest`` fields on the model. +``_earliest`` and ``_latest`` fields on the model to a float value representing +the Julian Date. -:: + +**WARNING**: The conversion to and from Julian Date numerical values can be +inaccurate, especially for ancient dates back to thousands of years BC. Ideally +Julian Date values should be used for range and ordering operations only where +complete accuracy is not required. They should **not** be used for definitive +storage or for display after roundtrip conversions. + +Example usage:: from django.db import models from edtf.fields import EDTFField @@ -405,11 +440,11 @@ the ``date_edtf`` value, and the underlying EDTF object will set the null=True, ) # use for filtering - date_earliest = models.DateField(blank=True, null=True) - date_latest = models.DateField(blank=True, null=True) + date_earliest = models.FloatField(blank=True, null=True) + date_latest = models.FloatField(blank=True, null=True) # use for sorting - date_sort_ascending = models.DateField(blank=True, null=True) - date_sort_descending = models.DateField(blank=True, null=True) + date_sort_ascending = models.FloatField(blank=True, null=True) + date_sort_descending = models.FloatField(blank=True, null=True) Since the ``EDTFField`` and the ``_earliest`` and ``_latest`` field values are diff --git a/changelog.rst b/changelog.rst index 8a30865..ea5b6fa 100644 --- a/changelog.rst +++ b/changelog.rst @@ -1,6 +1,38 @@ Changelog ========= +In development +-------------- + + +4.0 (2018-05-31) +---------------- + +* Remove 1 AD - 9999 AD restriction on date ranges imposed by Python's + ``datetime`` module (#26). + + **WARNING**: This involves a breaking API change where the following methods + return a ``time.struct_time`` object instead of ``datetime.date`` or + ``datetime.datetime`` objects:: + + lower_strict() + upper_strict() + lower_fuzzy() + upper_fuzzy() + +* Add `jdutil` library code by Matt Davis at + `https://gist.github.com/jiffyclub/1294443`_ to convert dates to numerical + float representations. + +* Update `EDTFField` to store derived upper/lower strict/fuzzy date values as + numerical values to Django's `FloatField` fields, when available, to permit + storage of arbitrary date/time values. + + The older approach where `DateField` fields are used instead is still + supported but not recommended, since this usage will break for date/time + values outside the range 1 AD to 9999 AD. + + 3.0 (2018-02-13) ---------------- diff --git a/edtf/__init__.py b/edtf/__init__.py index d0efde5..a86232f 100644 --- a/edtf/__init__.py +++ b/edtf/__init__.py @@ -1,3 +1,6 @@ from edtf.parser.grammar import parse_edtf from edtf.natlang import text_to_edtf from edtf.parser.parser_classes import * +from edtf.convert import dt_to_struct_time, struct_time_to_date, \ + struct_time_to_datetime, trim_struct_time, struct_time_to_jd, \ + jd_to_struct_time diff --git a/edtf/convert.py b/edtf/convert.py new file mode 100644 index 0000000..c1bfd3a --- /dev/null +++ b/edtf/convert.py @@ -0,0 +1,145 @@ +from time import struct_time +from datetime import date, datetime + +from edtf import jdutil + + +TIME_EMPTY_TIME = [0, 0, 0] # tm_hour, tm_min, tm_sec +TIME_EMPTY_EXTRAS = [0, 0, -1] # tm_wday, tm_yday, tm_isdst + + +def dt_to_struct_time(dt): + """ + Convert a `datetime.date` or `datetime.datetime` to a `struct_time` + representation *with zero values* for data fields that we cannot always + rely on for ancient or far-future dates: tm_wday, tm_yday, tm_isdst + + NOTE: If it wasn't for the requirement that the extra fields are unset + we could use the `timetuple()` method instead of this function. + """ + if isinstance(dt, datetime): + return struct_time( + [dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second] + + TIME_EMPTY_EXTRAS + ) + elif isinstance(dt, date): + return struct_time( + [dt.year, dt.month, dt.day] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS + ) + else: + raise NotImplementedError( + "Cannot convert %s to `struct_time`" % type(dt)) + + +def struct_time_to_date(st): + """ + Return a `datetime.date` representing the provided `struct_time. + + WARNING: This will fail for dates with years before 1 AD or after 9999 AD. + """ + return date(*st[:3]) + + +def struct_time_to_datetime(st): + """ + Return a `datetime.datetime` representing the provided `struct_time. + + WARNING: This will fail for dates with years before 1 AD or after 9999 AD. + """ + return datetime(*st[:6]) + + +def trim_struct_time(st, strip_time=False): + """ + Return a `struct_time` based on the one provided but with the extra fields + `tm_wday`, `tm_yday`, and `tm_isdst` reset to default values. + + If `strip_time` is set to true the time value are also set to zero: + `tm_hour`, `tm_min`, and `tm_sec`. + """ + if strip_time: + return struct_time(list(st[:3]) + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + else: + return struct_time(list(st[:6]) + TIME_EMPTY_EXTRAS) + + +def struct_time_to_jd(st): + """ + Return a float number representing the Julian Date for the given + `struct_time`. + + NOTE: extra fields `tm_wday`, `tm_yday`, and `tm_isdst` are ignored. + """ + year, month, day = st[:3] + hours, minutes, seconds = st[3:6] + + # Convert time of day to fraction of day + day += jdutil.hmsm_to_days(hours, minutes, seconds) + + return jdutil.date_to_jd(year, month, day) + + +def jd_to_struct_time(jd): + """ + Return a `struct_time` converted from a Julian Date float number. + + WARNING: Conversion to then from Julian Date value to `struct_time` can be + inaccurate and lose or gain time, especially for BC (negative) years. + + NOTE: extra fields `tm_wday`, `tm_yday`, and `tm_isdst` are set to default + values, not real ones. + """ + year, month, day = jdutil.jd_to_date(jd) + + # Convert time of day from fraction of day + day_fraction = day - int(day) + hour, minute, second, ms = jdutil.days_to_hmsm(day_fraction) + day = int(day) + + # This conversion can return negative values for items we do not want to be + # negative: month, day, hour, minute, second. + year, month, day, hour, minute, second = _roll_negative_time_fields( + year, month, day, hour, minute, second) + + return struct_time( + [year, month, day, hour, minute, second] + TIME_EMPTY_EXTRAS + ) + + +def _roll_negative_time_fields(year, month, day, hour, minute, second): + """ + Fix date/time fields which have nonsense negative values for any field + except for year by rolling the overall date/time value backwards, treating + negative values as relative offsets of the next higher unit. + + For example minute=5, second=-63 becomes minute=3, second=57 (5 minutes + less 63 seconds) + + This is very unsophisticated handling of negative values which we would + ideally do with `dateutil.relativedelta` but cannot because that class does + not support arbitrary dates, especially not negative years which is the + only case where these nonsense values are likely to occur anyway. + + NOTE: To greatly simplify the logic we assume all months are 30 days long. + """ + if second < 0: + minute += int(second / 60.0) # Adjust by whole minute in secs + minute -= 1 # Subtract 1 for negative second + second %= 60 # Convert negative second to positive remainder + if minute < 0: + hour += int(minute / 60.0) # Adjust by whole hour in minutes + hour -= 1 # Subtract 1 for negative minutes + minute %= 60 # Convert negative minute to positive remainder + if hour < 0: + day += int(hour / 24.0) # Adjust by whole day in hours + day -= 1 # Subtract 1 for negative minutes + hour %= 24 # Convert negative hour to positive remainder + if day < 0: + month += int(day / 30.0) # Adjust by whole month in days (assume 30) + month -= 1 # Subtract 1 for negative minutes + day %= 30 # Convert negative day to positive remainder + if month < 0: + year += int(month / 12.0) # Adjust by whole year in months + year -= 1 # Subtract 1 for negative minutes + month %= 12 # Convert negative month to positive remainder + return (year, month, day, hour, minute, second) diff --git a/edtf/fields.py b/edtf/fields.py index 5989b68..83d10a7 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -4,9 +4,11 @@ import pickle from django.db import models +from django.core.exceptions import FieldDoesNotExist from edtf import parse_edtf, EDTFObject from edtf.natlang import text_to_edtf +from edtf.convert import struct_time_to_date, struct_time_to_jd DATE_ATTRS = ( 'lower_strict', @@ -116,7 +118,21 @@ def pre_save(self, instance, add): g = getattr(self, field_attr, None) if g: if edtf: - setattr(instance, g, getattr(edtf, attr)()) + try: + target_field = instance._meta.get_field(g) + except FieldDoesNotExist: + continue + value = getattr(edtf, attr)() # struct_time + if isinstance(target_field, models.FloatField): + value = struct_time_to_jd(value) + elif isinstance(target_field, models.DateField): + value = struct_time_to_date(value) + else: + raise NotImplementedError( + u"EDTFField does not support %s as a derived data" + u" field, only FloatField or DateField" + % type(target_field)) + setattr(instance, g, value) else: setattr(instance, g, None) return edtf diff --git a/edtf/jdutil.py b/edtf/jdutil.py new file mode 100644 index 0000000..9fabdd1 --- /dev/null +++ b/edtf/jdutil.py @@ -0,0 +1,437 @@ +# Source: https://gist.github.com/jiffyclub/1294443 +""" +Functions for converting dates to/from JD and MJD. Assumes dates are historical +dates, including the transition from the Julian calendar to the Gregorian +calendar in 1582. No support for proleptic Gregorian/Julian calendars. + +:Author: Matt Davis +:Website: http://github.com/jiffyclub + +""" + +import math +import datetime as dt + +# Note: The Python datetime module assumes an infinitely valid Gregorian calendar. +# The Gregorian calendar took effect after 10-15-1582 and the dates 10-05 through +# 10-14-1582 never occurred. Python datetime objects will produce incorrect +# time deltas if one date is from before 10-15-1582. + +def mjd_to_jd(mjd): + """ + Convert Modified Julian Day to Julian Day. + + Parameters + ---------- + mjd : float + Modified Julian Day + + Returns + ------- + jd : float + Julian Day + + + """ + return mjd + 2400000.5 + + +def jd_to_mjd(jd): + """ + Convert Julian Day to Modified Julian Day + + Parameters + ---------- + jd : float + Julian Day + + Returns + ------- + mjd : float + Modified Julian Day + + """ + return jd - 2400000.5 + + +def date_to_jd(year,month,day): + """ + Convert a date to Julian Day. + + Algorithm from 'Practical Astronomy with your Calculator or Spreadsheet', + 4th ed., Duffet-Smith and Zwart, 2011. + + Parameters + ---------- + year : int + Year as integer. Years preceding 1 A.D. should be 0 or negative. + The year before 1 A.D. is 0, 10 B.C. is year -9. + + month : int + Month as integer, Jan = 1, Feb. = 2, etc. + + day : float + Day, may contain fractional part. + + Returns + ------- + jd : float + Julian Day + + Examples + -------- + Convert 6 a.m., February 17, 1985 to Julian Day + + >>> date_to_jd(1985,2,17.25) + 2446113.75 + + """ + if month == 1 or month == 2: + yearp = year - 1 + monthp = month + 12 + else: + yearp = year + monthp = month + + # this checks where we are in relation to October 15, 1582, the beginning + # of the Gregorian calendar. + if ((year < 1582) or + (year == 1582 and month < 10) or + (year == 1582 and month == 10 and day < 15)): + # before start of Gregorian calendar + B = 0 + else: + # after start of Gregorian calendar + A = math.trunc(yearp / 100.) + B = 2 - A + math.trunc(A / 4.) + + if yearp < 0: + C = math.trunc((365.25 * yearp) - 0.75) + else: + C = math.trunc(365.25 * yearp) + + D = math.trunc(30.6001 * (monthp + 1)) + + jd = B + C + D + day + 1720994.5 + + return jd + + +def jd_to_date(jd): + """ + Convert Julian Day to date. + + Algorithm from 'Practical Astronomy with your Calculator or Spreadsheet', + 4th ed., Duffet-Smith and Zwart, 2011. + + Parameters + ---------- + jd : float + Julian Day + + Returns + ------- + year : int + Year as integer. Years preceding 1 A.D. should be 0 or negative. + The year before 1 A.D. is 0, 10 B.C. is year -9. + + month : int + Month as integer, Jan = 1, Feb. = 2, etc. + + day : float + Day, may contain fractional part. + + Examples + -------- + Convert Julian Day 2446113.75 to year, month, and day. + + >>> jd_to_date(2446113.75) + (1985, 2, 17.25) + + """ + jd = jd + 0.5 + + F, I = math.modf(jd) + I = int(I) + + A = math.trunc((I - 1867216.25)/36524.25) + + if I > 2299160: + B = I + 1 + A - math.trunc(A / 4.) + else: + B = I + + C = B + 1524 + + D = math.trunc((C - 122.1) / 365.25) + + E = math.trunc(365.25 * D) + + G = math.trunc((C - E) / 30.6001) + + day = C - E + F - math.trunc(30.6001 * G) + + if G < 13.5: + month = G - 1 + else: + month = G - 13 + + if month > 2.5: + year = D - 4716 + else: + year = D - 4715 + + return year, month, day + + +def hmsm_to_days(hour=0,min=0,sec=0,micro=0): + """ + Convert hours, minutes, seconds, and microseconds to fractional days. + + Parameters + ---------- + hour : int, optional + Hour number. Defaults to 0. + + min : int, optional + Minute number. Defaults to 0. + + sec : int, optional + Second number. Defaults to 0. + + micro : int, optional + Microsecond number. Defaults to 0. + + Returns + ------- + days : float + Fractional days. + + Examples + -------- + >>> hmsm_to_days(hour=6) + 0.25 + + """ + days = sec + (micro / 1.e6) + + days = min + (days / 60.) + + days = hour + (days / 60.) + + return days / 24. + + +def days_to_hmsm(days): + """ + Convert fractional days to hours, minutes, seconds, and microseconds. + Precision beyond microseconds is rounded to the nearest microsecond. + + Parameters + ---------- + days : float + A fractional number of days. Must be less than 1. + + Returns + ------- + hour : int + Hour number. + + min : int + Minute number. + + sec : int + Second number. + + micro : int + Microsecond number. + + Raises + ------ + ValueError + If `days` is >= 1. + + Examples + -------- + >>> days_to_hmsm(0.1) + (2, 24, 0, 0) + + """ + hours = days * 24. + hours, hour = math.modf(hours) + + mins = hours * 60. + mins, min = math.modf(mins) + + secs = mins * 60. + secs, sec = math.modf(secs) + + micro = round(secs * 1.e6) + + return int(hour), int(min), int(sec), int(micro) + + +def datetime_to_jd(date): + """ + Convert a `datetime.datetime` object to Julian Day. + + Parameters + ---------- + date : `datetime.datetime` instance + + Returns + ------- + jd : float + Julian day. + + Examples + -------- + >>> d = datetime.datetime(1985,2,17,6) + >>> d + datetime.datetime(1985, 2, 17, 6, 0) + >>> jdutil.datetime_to_jd(d) + 2446113.75 + + """ + days = date.day + hmsm_to_days(date.hour,date.minute,date.second,date.microsecond) + + return date_to_jd(date.year,date.month,days) + + +def jd_to_datetime(jd): + """ + Convert a Julian Day to an `jdutil.datetime` object. + + Parameters + ---------- + jd : float + Julian day. + + Returns + ------- + dt : `jdutil.datetime` object + `jdutil.datetime` equivalent of Julian day. + + Examples + -------- + >>> jd_to_datetime(2446113.75) + datetime(1985, 2, 17, 6, 0) + + """ + year, month, day = jd_to_date(jd) + + frac_days,day = math.modf(day) + day = int(day) + + hour,min,sec,micro = days_to_hmsm(frac_days) + + return datetime(year,month,day,hour,min,sec,micro) + + +def timedelta_to_days(td): + """ + Convert a `datetime.timedelta` object to a total number of days. + + Parameters + ---------- + td : `datetime.timedelta` instance + + Returns + ------- + days : float + Total number of days in the `datetime.timedelta` object. + + Examples + -------- + >>> td = datetime.timedelta(4.5) + >>> td + datetime.timedelta(4, 43200) + >>> timedelta_to_days(td) + 4.5 + + """ + seconds_in_day = 24. * 3600. + + days = td.days + (td.seconds + (td.microseconds * 10.e6)) / seconds_in_day + + return days + + +class datetime(dt.datetime): + """ + A subclass of `datetime.datetime` that performs math operations by first + converting to Julian Day, then back to a `jdutil.datetime` object. + + Addition works with `datetime.timedelta` objects, subtraction works with + `datetime.timedelta`, `datetime.datetime`, and `jdutil.datetime` objects. + Not all combinations work in all directions, e.g. + `timedelta - datetime` is meaningless. + + See Also + -------- + datetime.datetime : Parent class. + + """ + def __add__(self,other): + if not isinstance(other,dt.timedelta): + s = "jdutil.datetime supports '+' only with datetime.timedelta" + raise TypeError(s) + + days = timedelta_to_days(other) + + combined = datetime_to_jd(self) + days + + return jd_to_datetime(combined) + + def __radd__(self,other): + if not isinstance(other,dt.timedelta): + s = "jdutil.datetime supports '+' only with datetime.timedelta" + raise TypeError(s) + + days = timedelta_to_days(other) + + combined = datetime_to_jd(self) + days + + return jd_to_datetime(combined) + + def __sub__(self,other): + if isinstance(other,dt.timedelta): + days = timedelta_to_days(other) + + combined = datetime_to_jd(self) - days + + return jd_to_datetime(combined) + + elif isinstance(other, (datetime,dt.datetime)): + diff = datetime_to_jd(self) - datetime_to_jd(other) + + return dt.timedelta(diff) + + else: + s = "jdutil.datetime supports '-' with: " + s += "datetime.timedelta, jdutil.datetime and datetime.datetime" + raise TypeError(s) + + def __rsub__(self,other): + if not isinstance(other, (datetime,dt.datetime)): + s = "jdutil.datetime supports '-' with: " + s += "jdutil.datetime and datetime.datetime" + raise TypeError(s) + + diff = datetime_to_jd(other) - datetime_to_jd(self) + + return dt.timedelta(diff) + + def to_jd(self): + """ + Return the date converted to Julian Day. + + """ + return datetime_to_jd(self) + + def to_mjd(self): + """ + Return the date converted to Modified Julian Day. + + """ + return jd_to_mjd(self.to_jd()) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 3ebf6cd..e9e6299 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -1,9 +1,14 @@ import calendar import re +from time import struct_time from datetime import date, datetime -from dateutil.parser import parse +from operator import add, sub + from dateutil.relativedelta import relativedelta + from edtf import appsettings +from edtf.convert import dt_to_struct_time, trim_struct_time, \ + TIME_EMPTY_TIME, TIME_EMPTY_EXTRAS EARLIEST = 'earliest' LATEST = 'latest' @@ -17,6 +22,67 @@ PRECISION_DAY = "day" +def days_in_month(year, month): + """ + Return the number of days in the given year and month, where month is + 1=January to 12=December, and respecting leap years as identified by + `calendar.isleap()` + """ + return { + 1: 31, + 2: 29 if calendar.isleap(year) else 28, + 3: 31, + 4: 30, + 5: 31, + 6: 30, + 7: 31, + 8: 31, + 9: 30, + 10: 31, + 11: 30, + 12: 31, + }[month] + + +def apply_delta(op, time_struct, delta): + """ + Apply a `relativedelta` to a `struct_time` data structure. + + `op` is an operator function, probably always `add` or `sub`tract to + correspond to `a_date + a_delta` and `a_date - a_delta`. + + This function is required because we cannot use standard `datetime` module + objects for conversion when the date/time is, or will become, outside the + boundary years 1 AD to 9999 AD. + """ + if not delta: + return time_struct # No work to do + + try: + dt_result = op(datetime(*time_struct[:6]), delta) + return dt_to_struct_time(dt_result) + except (OverflowError, ValueError): + # Year is not within supported 1 to 9999 AD range + pass + + # Here we fake the year to one in the acceptable range to avoid having to + # write our own date rolling logic + + # Adjust the year to be close to the 2000 millenium in 1,000 year + # increments to try and retain accurate relative leap years + actual_year = time_struct.tm_year + millenium = int(float(actual_year) / 1000) + millenium_diff = (2 - millenium) * 1000 + adjusted_year = actual_year + millenium_diff + # Apply delta to the date/time with adjusted year + dt = datetime(*(adjusted_year,) + time_struct[1:6]) + dt_result = op(dt, delta) + # Convert result year back to its original millenium + final_year = dt_result.year - millenium_diff + return struct_time( + (final_year,) + dt_result.timetuple()[1:6] + tuple(TIME_EMPTY_EXTRAS)) + + class EDTFObject(object): """ Object to attact to a parser to become instantiated when the parser @@ -86,23 +152,19 @@ def set_is_uncertain(self, val): def lower_fuzzy(self): strict_val = self.lower_strict() - # Do not exceed or adjust boundary datetimes - if strict_val in (date.min, date.max): - return strict_val - return strict_val - self._get_fuzzy_padding(EARLIEST) + return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) def upper_fuzzy(self): strict_val = self.upper_strict() - # Do not exceed or adjust boundary datetimes - if strict_val in (date.min, date.max): - return strict_val - return strict_val + self._get_fuzzy_padding(LATEST) + return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) def __eq__(self, other): if isinstance(other, EDTFObject): return str(self) == str(other) elif isinstance(other, date): return str(self) == other.isoformat() + elif isinstance(other, struct_time): + return self._strict_date() == trim_struct_time(other) return False def __ne__(self, other): @@ -110,34 +172,44 @@ def __ne__(self, other): return str(self) != str(other) elif isinstance(other, date): return str(self) != other.isoformat() + elif isinstance(other, struct_time): + return self._strict_date() != trim_struct_time(other) return True def __gt__(self, other): if isinstance(other, EDTFObject): return self.lower_strict() > other.lower_strict() elif isinstance(other, date): - return self.lower_strict() > other + return self.lower_strict() > dt_to_struct_time(other) + elif isinstance(other, struct_time): + return self.lower_strict() > trim_struct_time(other) raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) def __ge__(self, other): if isinstance(other, EDTFObject): return self.lower_strict() >= other.lower_strict() elif isinstance(other, date): - return self.lower_strict() >= other + return self.lower_strict() >= dt_to_struct_time(other) + elif isinstance(other, struct_time): + return self.lower_strict() >= trim_struct_time(other) raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) def __lt__(self, other): if isinstance(other, EDTFObject): return self.lower_strict() < other.lower_strict() elif isinstance(other, date): - return self.lower_strict() < other + return self.lower_strict() < dt_to_struct_time(other) + elif isinstance(other, struct_time): + return self.lower_strict() < trim_struct_time(other) raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) def __le__(self, other): if isinstance(other, EDTFObject): return self.lower_strict() <= other.lower_strict() elif isinstance(other, date): - return self.lower_strict() <= other + return self.lower_strict() <= dt_to_struct_time(other) + elif isinstance(other, struct_time): + return self.lower_strict() <= trim_struct_time(other) raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) @@ -191,13 +263,7 @@ def isoformat(self, default=date.max): def _precise_year(self, lean): # Replace any ambiguous characters in the year string with 0s or 9s if lean == EARLIEST: - year = int(re.sub(r'[xu]', r'0', self.year)) - # Don't return 0 as year because this isn't acceptable elsewhere in - # this code and will get coerced to 1 anyway - if year == 0: - return 1 - else: - return year + return int(re.sub(r'[xu]', r'0', self.year)) else: return int(re.sub(r'[xu]', r'9', self.year)) @@ -210,49 +276,28 @@ def _precise_month(self, lean): else: return 1 if lean == EARLIEST else 12 - @staticmethod - def _days_in_month(yr, month): - return calendar.monthrange(int(yr), int(month))[1] - def _precise_day(self, lean): if not self.day or self.day == 'uu': if lean == EARLIEST: return 1 else: - return self._days_in_month( + return days_in_month( self._precise_year(LATEST), self._precise_month(LATEST) ) else: return int(self.day) def _strict_date(self, lean): - py = self._precise_year(lean) - if py < 1: # year is not positive - return date.min - - parts = { - 'year': py, - 'month': self._precise_month(lean), - 'day': self._precise_day(lean), - } - - isoish = "%(year)04d-%(month)02d-%(day)02d" % parts - - try: - dt = parse( - isoish, - fuzzy=True, - yearfirst=True, - dayfirst=False, - default=date.max if lean == LATEST else date.min - ) - return dt - - except ValueError: # year is out of range - if isoish < date.min.isoformat(): - return date.min - else: - return date.max + """ + Return a `time.struct_time` representation of the date. + """ + return struct_time( + ( + self._precise_year(lean), + self._precise_month(lean), + self._precise_day(lean), + ) + tuple(TIME_EMPTY_TIME) + tuple(TIME_EMPTY_EXTRAS) + ) @property def precision(self): @@ -280,11 +325,15 @@ def _strict_date(self, lean): def __eq__(self, other): if isinstance(other, datetime): return self.isoformat() == other.isoformat() + elif isinstance(other, struct_time): + return self._strict_date() == trim_struct_time(other) return super(DateAndTime, self).__eq__(other) def __ne__(self, other): if isinstance(other, datetime): return self.isoformat() != other.isoformat() + elif isinstance(other, struct_time): + return self._strict_date() != trim_struct_time(other) return super(DateAndTime, self).__ne__(other) @@ -305,7 +354,7 @@ def _strict_date(self, lean): return r except AttributeError: # it's a string, or no date. Result depends on the upper date upper = self.upper._strict_date(LATEST) - return upper - appsettings.DELTA_IF_UNKNOWN + return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) else: try: r = self.upper._strict_date(lean) @@ -314,10 +363,10 @@ def _strict_date(self, lean): return r except AttributeError: # an 'unknown' or 'open' string - depends on the lower date if self.upper and (self.upper == "open" or self.upper.date == "open"): - return date.today() # it's still happening + return dt_to_struct_time(date.today()) # it's still happening else: lower = self.lower._strict_date(EARLIEST) - return lower + appsettings.DELTA_IF_UNKNOWN + return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) # (* ************************** Level 1 *************************** *) @@ -366,7 +415,7 @@ def __str__(self): def _strict_date(self, lean): if self.date == "open": - return date.today() + return dt_to_struct_time(date.today()) if self.date =="unknown": return None # depends on the other date return self.date._strict_date(lean) @@ -412,15 +461,12 @@ def _precise_year(self): def _strict_date(self, lean): py = self._precise_year() - if py >= date.max.year: - return date.max - if py <= date.min.year: - return date.min - if lean == EARLIEST: - return date(py, 1, 1) + return struct_time( + [py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) else: - return date(py, 12, 31) + return struct_time( + [py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) class Season(Date): diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 33b3430..f9dde42 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -1,11 +1,11 @@ import unittest - -from datetime import date - import sys +from datetime import date +from time import struct_time from edtf.parser.grammar import parse_edtf as parse -from edtf.parser.parser_classes import EDTFObject +from edtf.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, \ + TIME_EMPTY_EXTRAS from edtf.parser.edtf_exceptions import EDTFParseException # Example object types and attributes. @@ -23,9 +23,6 @@ # - If there are four other values, then the first value is the lower strict, # the second value is the upper strict; the third value is the lower fuzzy date # and the final value is the upper fuzzy date. - - - EXAMPLES = ( # ******************************* LEVEL 0 ********************************* # year, month, day @@ -35,9 +32,9 @@ # year ('2008', '2008-01-01', '2008-12-31'), # a negative year - ('-0999', date.min.isoformat(), date.min.isoformat()), + ('-0999', '-0999-01-01', '-0999-12-31'), # year zero - ('0000', date.min.isoformat(), date.min.isoformat()), + ('0000', '0000-01-01', '0000-12-31'), # DateTimes ('2001-02-03T09:30:01', '2001-02-03'), ('2004-01-01T10:10:10Z', '2004-01-01'), @@ -77,11 +74,10 @@ # some day in 1999 ('1999-uu-uu', '1999-01-01', '1999-12-31'), - # Uncertain/Approximate lower boundary dates (BCE) -- forced to `date.min` - # becase we cannot represent BCE dates in Python at all :( - ('-0275~', date.min.isoformat(), date.min.isoformat()), - ('-0001~', date.min.isoformat(), date.min.isoformat()), - ('0000~', date.min.isoformat(), date.min.isoformat()), + # Uncertain/Approximate lower boundary dates (BCE) + ('-0275~', '-0275-01-01', '-0275-12-31', '-0276-01-01', '-0274-12-31'), + ('-0001~', '-0001-01-01', '-0001-12-31', '-0002-01-01', '0000-12-31'), + ('0000~', '0000-01-01', '0000-12-31', '-0001-01-01', '0001-12-31'), # L1 Extended Interval # beginning unknown, end 2006 @@ -102,9 +98,9 @@ ('1984-06-02?/unknown', '1984-06-02', '1994-06-02', '1984-06-01', '1994-06-02'), # Year exceeding 4 digits # the year 170000002 - ('y170000002', date.max.isoformat(), date.max.isoformat()), + ('y170000002', '170000002-01-01', '170000002-12-31'), # the year -170000002 - ('y-170000002', date.min.isoformat(), date.min.isoformat()), + ('y-170000002', '-170000002-01-01', '-170000002-12-31'), # Seasons # Spring, 2001 ('2001-21', '2001-03-01', '2001-05-31'), @@ -155,7 +151,7 @@ ('1560-uu-25', '1560-01-25', '1560-12-25'), ('15uu-12-uu', '1500-12-01', '1599-12-31'), # Day specified, year and month unspecified - ('uuuu-uu-23', '0001-01-23', '9999-12-23'), + ('uuuu-uu-23', '0000-01-23', '9999-12-23'), # One of a Set # One of the years 1667, 1668, 1670, 1671, 1672 (('[1667,1668, 1670..1672]', '[1667, 1668, 1670..1672]'), '1667-01-01', '1672-12-31'), @@ -184,11 +180,12 @@ ('2004-06-uu/2004-07-03', '2004-06-01', '2004-07-03'), # Year Requiring More than Four Digits - Exponential Form # the year 170000000 - ('y17e7', date.max.isoformat(), date.max.isoformat()), + ('y17e7', '170000000-01-01', '170000000-12-31'), # the year -170000000 - ('y-17e7', date.min.isoformat(), date.min.isoformat()), - # Some year between 171000000 and 171999999, estimated to be 171010000 ('p3' indicates a precision of 3 significant digits.) - ('y17101e4p3', date.max.isoformat(), date.max.isoformat()), + ('y-17e7', '-170000000-01-01', '-170000000-12-31'), + # Some year between 171010000 and 171999999, estimated to be 171010000 ('p3' indicates a precision of 3 significant digits.) + # TODO Not yet implemented, see https://github.com/ixc/python-edtf/issues/12 + # ('y17101e4p3', '171010000-01-01', '171999999-12-31'), ) BAD_EXAMPLES = ( @@ -249,11 +246,30 @@ def test_date_values(self): if len(e) == 1: continue + def iso_to_struct_time(iso_date): + """ Convert YYYY-mm-dd date strings to time structs """ + if iso_date[0] == '-': + is_negative = True + iso_date = iso_date[1:] + else: + is_negative = False + y, mo, d = [int(i) for i in iso_date.split('-')] + if is_negative: + y *= -1 + return struct_time( + [y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + + # Convert string date representations into `struct_time`s + expected_lower_strict = iso_to_struct_time(expected_lower_strict) + expected_upper_strict = iso_to_struct_time(expected_upper_strict) + expected_lower_fuzzy = iso_to_struct_time(expected_lower_fuzzy) + expected_upper_fuzzy = iso_to_struct_time(expected_upper_fuzzy) + try: - self.assertEqual(f.lower_strict().isoformat(), expected_lower_strict) - self.assertEqual(f.upper_strict().isoformat(), expected_upper_strict) - self.assertEqual(f.lower_fuzzy().isoformat(), expected_lower_fuzzy) - self.assertEqual(f.upper_fuzzy().isoformat(), expected_upper_fuzzy) + self.assertEqual(f.lower_strict(), expected_lower_strict) + self.assertEqual(f.upper_strict(), expected_upper_strict) + self.assertEqual(f.lower_fuzzy(), expected_lower_fuzzy) + self.assertEqual(f.upper_fuzzy(), expected_upper_fuzzy) except Exception as x: # Write to stdout for manual debugging, I guess sys.stdout.write(str(x)) diff --git a/edtf/tests.py b/edtf/tests.py new file mode 100644 index 0000000..0e49e67 --- /dev/null +++ b/edtf/tests.py @@ -0,0 +1,134 @@ +import unittest + +from time import struct_time +from datetime import datetime, date + +from edtf import convert + + +class TestConversions(unittest.TestCase): + + def test_dt_to_struct_time_for_datetime(self): + now = datetime.now() + st = convert.dt_to_struct_time(now) + # Check equal year, month, day, hours, minutes, seconds + self.assertEqual(st[:6], now.timetuple()[:6]) + # Confirm 'extra' fields are set to defaults + self.assertEqual(st[6:], (0, 0, -1)) + + def test_dt_to_struct_time_for_date(self): + today = date.today() + st = convert.dt_to_struct_time(today) + # Check equal year, month, day + self.assertEqual(st[:3], today.timetuple()[:3]) + # Confirm time fields are zeroed + self.assertEqual(st[3:6], (0, 0, 0)) + # Confirm 'extra' fields are set to defaults + self.assertEqual(st[6:], (0, 0, -1)) + + def test_struct_time_to_date(self): + st = struct_time( + [2018, 4, 19] + convert.TIME_EMPTY_TIME + convert.TIME_EMPTY_EXTRAS) + d = date(*st[:3]) + self.assertEqual(d, convert.struct_time_to_date(st)) + + def test_struct_time_to_datetime(self): + st = struct_time( + [2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + dt = datetime(*st[:6]) + converted_dt = convert.struct_time_to_datetime(st) + self.assertEqual(dt, converted_dt) + # Note that 'extra' fields are auto-populated by `datetime` module + self.assertEqual(converted_dt.timetuple()[6:], (3, 109, -1)) + + def test_trim_struct_time(self): + now = datetime.now() + st = now.timetuple() + trimmed_st = convert.trim_struct_time(st) + # Confirm trimmed `struct_time` has expected date/time values + self.assertEqual( + trimmed_st[:6], + (now.year, now.month, now.day, now.hour, now.minute, now.second) + ) + # Confirm 'extra' fields are set to defaults + self.assertEqual(trimmed_st[6:], (0, 0, -1)) + # Confirm 'extra' fields in untrimmed `struct_time` has real values + self.assertNotEqual(st[6:], (0, 0, -1)) + + def test_struct_time_to_jd(self): + # Check conversion of AD date & time to Julian Date number + st_ad = struct_time( + [2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + jd_ad = 2458227.9263194446 + self.assertEqual(jd_ad, convert.struct_time_to_jd(st_ad)) + # Check conversion of BC date & time to Julian Date number + st_bc = struct_time( + [-2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + jd_bc = 984091.9263194444 + self.assertEqual(jd_bc, convert.struct_time_to_jd(st_bc)) + + def test_jd_to_struct_time(self): + # Check conversion of Julian Date number to AD date & time + jd_ad = 2458227.9263194446 # As in `test_struct_time_to_jd` + st_ad = struct_time( + [2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + self.assertEqual(st_ad, convert.jd_to_struct_time(jd_ad)) + # Check conversion of Julian Date number to BC date & time + # WARNING: Converted time is off by 1 second, 53 not 54 + jd_bc = 984091.9263194444 # As in `test_struct_time_to_jd` + st_bc = struct_time( + [-2018, 4, 19] + [10, 13, 54 - 1] + convert.TIME_EMPTY_EXTRAS) + self.assertEqual(st_bc, convert.jd_to_struct_time(jd_bc)) + + def test_jd_round_trip_for_extreme_future(self): + original_st = struct_time( + [999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) + jd = convert.struct_time_to_jd(original_st) + converted_st = convert.jd_to_struct_time(jd) + # Confirm that year, month, day, hour, minute are correct (not second) + self.assertEqual(original_st[:5], converted_st[:5]) + # WARNING: Seconds are off by 1, should be 3 but is 2 + self.assertEqual(3 - 1, converted_st[5]) + + def test_jd_round_trip_for_extreme_past(self): + original_st = struct_time( + [-999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) + converted_st = convert.jd_to_struct_time( + convert.struct_time_to_jd(original_st)) + # WARNING: We have lost a year of accuracy + self.assertEqual( + (-999999 + 1, # Year off by 1 + 8, 4, 21, 15, 3, 0, 0, -1), + tuple(converted_st)) + + def test_jd_round_trip_for_zero_year_aka_1_bc(self): + original_st = struct_time( + [0, 9, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) + converted_st = convert.jd_to_struct_time( + convert.struct_time_to_jd(original_st)) + self.assertEqual( + (0, 9, 5, 4, 58, 59, 0, 0, -1), + tuple(converted_st)) + + def test_jd_round_trip_for_2_bc(self): + original_st = struct_time( + [-1, 12, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) + converted_st = convert.jd_to_struct_time( + convert.struct_time_to_jd(original_st)) + self.assertEqual( + (-1, 12, 5, 4, 58, 59, 0, 0, -1), + tuple(converted_st)) + + def test_roll_negative_time_fields(self): + # Confirm time value is adjusted as expected + year = -100 + month = -17 # More than 1 year + day = -34 # More than 1 month + hour = -25 # More than 1 day + minute = -74 # More than 1 hour + second = -253 # More than 1 minute + self.assertEqual( + (-102, 5, 24, 21, 41, 47), + convert._roll_negative_time_fields( + year, month, day, hour, minute, second) + )