Skip to content

Commit

Permalink
fix!: use pandas.NaT for missing values in dbdate and dbtime dtypes (
Browse files Browse the repository at this point in the history
…#67)

* fix!: use `pandas.NaT` for missing values in dbdate and dbtime dtypes

This makes them consistent with other date/time dtypes, as well as internally
consistent with the advertised `dtype.na_value`.

* adjust pandas version support for median

BREAKING-CHANGE: dbdate and dbtime dtypes return NaT instead of None for missing values

Release-As: 0.4.0
  • Loading branch information
tswast authored Feb 2, 2022
1 parent e9d41d1 commit f903c2c
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 60 deletions.
8 changes: 4 additions & 4 deletions db_dtypes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ def _datetime(
raise TypeError("Invalid value type", scalar)

def _box_func(self, x):
if pandas.isnull(x):
return None
if pandas.isna(x):
return pandas.NaT

try:
return x.astype("<M8[us]").astype(datetime.datetime).time()
Expand Down Expand Up @@ -250,8 +250,8 @@ def _datetime(
raise TypeError("Invalid value type", scalar)

def _box_func(self, x):
if pandas.isnull(x):
return None
if pandas.isna(x):
return pandas.NaT
try:
return x.astype("<M8[us]").astype(datetime.datetime).date()
except AttributeError:
Expand Down
5 changes: 2 additions & 3 deletions db_dtypes/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

import numpy
import pandas
from pandas import NaT
import pandas.api.extensions
from pandas.api.types import is_dtype_equal, is_list_like, pandas_dtype

Expand All @@ -27,8 +26,8 @@


class BaseDatetimeDtype(pandas.api.extensions.ExtensionDtype):
na_value = NaT
kind = "o"
na_value = pandas.NaT
kind = "O"
names = None

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion db_dtypes/pandas_backports.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
numpy_validate_max = pandas.compat.numpy.function.validate_max
numpy_validate_min = pandas.compat.numpy.function.validate_min

if pandas_release >= (1, 2):
if pandas_release >= (1, 3):
nanmedian = pandas.core.nanops.nanmedian
numpy_validate_median = pandas.compat.numpy.function.validate_median

Expand Down
3 changes: 2 additions & 1 deletion testing/constraints-3.9.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
sqlalchemy>=1.4.13
# Make sure we test with pandas 1.3.0. The Python version isn't that relevant.
pandas==1.3.0
27 changes: 27 additions & 0 deletions tests/unit/test_date.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

# To register the types.
import db_dtypes # noqa
from db_dtypes import pandas_backports


@pytest.mark.parametrize(
Expand Down Expand Up @@ -65,3 +66,29 @@ def test_date_parsing(value, expected):
def test_date_parsing_errors(value, error):
with pytest.raises(ValueError, match=error):
pandas.Series([value], dtype="dbdate")


@pytest.mark.skipif(
not hasattr(pandas_backports, "numpy_validate_median"),
reason="median not available with this version of pandas",
)
@pytest.mark.parametrize(
"values, expected",
[
(["1970-01-01", "1900-01-01", "2000-01-01"], datetime.date(1970, 1, 1)),
(
[
None,
"1900-01-01",
pandas.NA if hasattr(pandas, "NA") else None,
pandas.NaT,
float("nan"),
],
datetime.date(1900, 1, 1),
),
(["2222-02-01", "2222-02-03"], datetime.date(2222, 2, 2)),
],
)
def test_date_median(values, expected):
series = pandas.Series(values, dtype="dbdate")
assert series.median() == expected
Loading

0 comments on commit f903c2c

Please sign in to comment.