Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return numpy.datetime64 arrays for non-standard calendars #126

Merged
merged 8 commits into from
May 16, 2014
60 changes: 52 additions & 8 deletions test/test_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,6 @@ def test_cf_datetime(self):

@requires_netCDF4
def test_decoded_cf_datetime_array(self):
import netCDF4 as nc4

actual = conventions.DecodedCFDatetimeArray(
[0, 1, 2], 'days since 1900-01-01', 'standard')
expected = pd.date_range('1900-01-01', periods=3).values
Expand All @@ -125,13 +123,59 @@ def test_decoded_cf_datetime_array(self):
self.assertEqual(actual.dtype, np.dtype('datetime64[ns]'))
self.assertArrayEqual(actual, expected)

num_dates = [722000, 720000.5]
units = 'days since 0001-01-01 0:0:0'
@requires_netCDF4
def test_decode_non_standard_calendar(self):
import netCDF4 as nc4
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: could you please move this last bit to a separate test? Your new test isn't testing DecodedCFDatetimeArray anymore.


for calendar in ['noleap', '365_day', '360_day', 'julian', 'all_leap',
'366_day']:
units = 'days since 0001-01-01'
times = pd.date_range('2001-04-01-00', end='2001-04-30-23',
freq='H')
noleap_time = nc4.date2num(times.to_pydatetime(), units,
calendar=calendar)
expected = times.values
actual = conventions.decode_cf_datetime(noleap_time, units,
calendar=calendar)
self.assertEqual(actual.dtype, np.dtype('M8[ns]'))
self.assertArrayEqual(actual, expected)

@requires_netCDF4
def test_decode_non_standard_calendar_multidim_time(self):
import netCDF4 as nc4

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this test should actually still be filtering "All-Nan"... notice the warning that appears in Travis: https://travis-ci.org/xray-pydata/xray/jobs/25267676

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a note: the reason why I used a warnings filter here instead of verifying the message in a test is that this warning comes upstream from numpy. It's not something issued directly by xray.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right you are.

calendar = 'noleap'
actual = conventions.DecodedCFDatetimeArray(num_dates, units, calendar)
expected = nc4.num2date(num_dates, units, calendar)
self.assertEqual(actual.dtype, np.dtype('O'))
self.assertArrayEqual(actual, expected)
units = 'days since 0001-01-01'
times1 = pd.date_range('2001-04-01', end='2001-04-05', freq='D')
times2 = pd.date_range('2001-05-01', end='2001-05-05', freq='D')
noleap_time1 = nc4.date2num(times1.to_pydatetime(), units,
calendar=calendar)
noleap_time2 = nc4.date2num(times2.to_pydatetime(), units,
calendar=calendar)
mdim_time = np.empty((len(noleap_time1), 2), )
mdim_time[:, 0] = noleap_time1
mdim_time[:, 1] = noleap_time2

expected1 = times1.values
expected2 = times2.values
actual = conventions.decode_cf_datetime(mdim_time, units,
calendar=calendar)
self.assertEqual(actual.dtype, np.dtype('M8[ns]'))
self.assertArrayEqual(actual[:, 0], expected1)
self.assertArrayEqual(actual[:, 1], expected2)

@requires_netCDF4
def test_decode_non_calendar_fallback(self):
import netCDF4 as nc4
for year in [2010, 2011, 2012, 2013, 2014]:
calendar = '360_day'
units = 'days since {0}-01-01'.format(year)
num_times = np.arange(100)
expected = nc4.num2date(num_times, units, calendar)
actual = conventions.decode_cf_datetime(num_times, units,
calendar=calendar)
self.assertEqual(actual.dtype, np.dtype('O'))
self.assertArrayEqual(actual, expected)

@requires_netCDF4
def test_cf_datetime_nan(self):
Expand Down
29 changes: 22 additions & 7 deletions xray/conventions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import pandas as pd
import warnings
from collections import defaultdict, OrderedDict
from datetime import datetime

Expand Down Expand Up @@ -88,7 +89,17 @@ def nan_safe_num2date(num):
if ((calendar not in _STANDARD_CALENDARS
or min_date.year < 1678 or max_date.year >= 2262)
and min_date is not pd.NaT):

dates = nc4.num2date(num_dates, units, calendar)

if min_date.year >= 1678 and max_date.year < 2262:
try:
dates = nctime_to_nptime(dates)
except ValueError as e:
warnings.warn('Unable to decode time axis into full '
'numpy.datetime64 objects, continuing using '
'dummy netCDF4.datetime objects instead, reason:'
'{0}'.format(e), RuntimeWarning, stacklevel=2)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is indeed better to issue a single warning -- that way it can be filtered more easily.

else:
# we can safely use np.datetime64 with nanosecond precision (pandas
# likes ns precision so it can directly make DatetimeIndex objects)
Expand Down Expand Up @@ -122,6 +133,7 @@ def nan_safe_num2date(num):
+ np.datetime64(min_date))
# restore original shape and ensure dates are given in ns
dates = dates.reshape(num_dates.shape).astype('M8[ns]')

return dates


Expand All @@ -144,6 +156,15 @@ def guess_time_units(dates):
return '%s since %s' % (time_unit, dates[0])


def nctime_to_nptime(times):
"""Given an array of netCDF4.datetime objects, return an array of
numpy.datetime64 objects of the same size"""
new = np.empty(times.shape, dtype='M8[ns]')
for i, t in np.ndenumerate(times):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! I didn't realize ndenumerate made this so easy.

new[i] = np.datetime64(datetime(*t.timetuple()[:6]))
return new


def encode_cf_datetime(dates, units=None, calendar=None):
"""Given an array of datetime objects, returns the tuple `(num, units,
calendar)` suitable for a CF complient time variable.
Expand Down Expand Up @@ -246,13 +267,7 @@ def __init__(self, array, units, calendar=None):

@property
def dtype(self):
if self.calendar is None or self.calendar in _STANDARD_CALENDARS:
# TODO: return the proper dtype (object) for a standard calendar
# that can't be expressed in ns precision. Perhaps we could guess
# this from the units?
return np.dtype('datetime64[ns]')
else:
return np.dtype('O')
return np.dtype('datetime64[ns]')

def __getitem__(self, key):
return decode_cf_datetime(self.array, units=self.units,
Expand Down