pandas-dev
diff --git a/‎.circleci/config.yml
Lines changed: 1 addition & 11 deletions b/‎.circleci/config.yml
Lines changed: 1 addition & 11 deletions
diff --git a/‎doc/source/groupby.rst
Lines changed: 27 additions & 0 deletions b/‎doc/source/groupby.rst
Lines changed: 27 additions & 0 deletions
diff --git a/‎doc/source/io.rst
Lines changed: 30 additions & 0 deletions b/‎doc/source/io.rst
Lines changed: 30 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v0.24.0.txt
Lines changed: 6 additions & 0 deletions b/‎doc/source/whatsnew/v0.24.0.txt
Lines changed: 6 additions & 0 deletions
diff --git a/‎pandas/_libs/intervaltree.pxi.in
Lines changed: 4 additions & 3 deletions b/‎pandas/_libs/intervaltree.pxi.in
Lines changed: 4 additions & 3 deletions
diff --git a/‎pandas/_libs/tslibs/timestamps.pyx
Lines changed: 3 additions & 3 deletions b/‎pandas/_libs/tslibs/timestamps.pyx
Lines changed: 3 additions & 3 deletions
diff --git a/‎pandas/core/generic.py
Lines changed: 15 additions & 6 deletions b/‎pandas/core/generic.py
Lines changed: 15 additions & 6 deletions
diff --git a/‎pandas/core/indexes/base.py
Lines changed: 5 additions & 9 deletions b/‎pandas/core/indexes/base.py
Lines changed: 5 additions & 9 deletions
diff --git a/‎pandas/core/indexes/period.py
Lines changed: 4 additions & 4 deletions b/‎pandas/core/indexes/period.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎pandas/core/internals/blocks.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/internals/blocks.py
Lines changed: 1 addition & 1 deletion
@@ -1,10 +1,6 @@
 version: 2
 jobs:
-
-  # --------------------------------------------------------------------------
-  # 1. py36_locale
-  # --------------------------------------------------------------------------
-  py36_locale:
+  build:
     docker:
       - image: continuumio/miniconda:latest
       # databases configuration
@@ -34,9 +30,3 @@ jobs:
       - run:
           name: test
           command: ./ci/circle/run_circle.sh  --skip-slow --skip-network
-
-workflows:
-  version: 2
-  build_and_test:
-    jobs:
-      - py36_locale
@@ -995,6 +995,33 @@ Note that ``df.groupby('A').colname.std().`` is more efficient than
 is only interesting over one column (here ``colname``), it may be filtered
 *before* applying the aggregation function.
 
+.. note::
+   Any object column, also if it contains numerical values such as ``Decimal``
+   objects, is considered as a "nuisance" columns. They are excluded from
+   aggregate functions automatically in groupby.
+
+   If you do wish to include decimal or object columns in an aggregation with
+   other non-nuisance data types, you must do so explicitly.
+
+.. ipython:: python
+
+    from decimal import Decimal
+    df_dec = pd.DataFrame(
+        {'id': [1, 2, 1, 2],
+         'int_column': [1, 2, 3, 4],
+         'dec_column': [Decimal('0.50'), Decimal('0.15'), Decimal('0.25'), Decimal('0.40')]
+        }
+    )
+
+    # Decimal columns can be sum'd explicitly by themselves...
+    df_dec.groupby(['id'])[['dec_column']].sum()
+
+    # ...but cannot be combined with standard data types or they will be excluded
+    df_dec.groupby(['id'])[['int_column', 'dec_column']].sum()
+
+    # Use .agg function to aggregate over standard and "nuisance" data types at the same time
+    df_dec.groupby(['id']).agg({'int_column': 'sum', 'dec_column': 'sum'})
+
 .. _groupby.observed:
 
 Handling of (un)observed Categorical values
 
@@ -4806,6 +4806,36 @@ default ``Text`` type for string columns:
     Because of this, reading the database table back in does **not** generate
     a categorical.
 
+.. _io.sql_datetime_data:
+
+Datetime data types
+'''''''''''''''''''
+
+Using SQLAlchemy, :func:`~pandas.DataFrame.to_sql` is capable of writing
+datetime data that is timezone naive or timezone aware. However, the resulting
+data stored in the database ultimately depends on the supported data type
+for datetime data of the database system being used.
+
+The following table lists supported data types for datetime data for some
+common databases. Other database dialects may have different data types for
+datetime data.
+
+===========   =============================================  ===================
+Database      SQL Datetime Types                             Timezone Support
+===========   =============================================  ===================
+SQLite        ``TEXT``                                       No
+MySQL         ``TIMESTAMP`` or ``DATETIME``                  No
+PostgreSQL    ``TIMESTAMP`` or ``TIMESTAMP WITH TIME ZONE``  Yes
+===========   =============================================  ===================
+
+When writing timezone aware data to databases that do not support timezones,
+the data will be written as timezone naive timestamps that are in local time
+with respect to the timezone.
+
+:func:`~pandas.read_sql_table` is also capable of reading datetime data that is
+timezone aware or naive. When reading ``TIMESTAMP WITH TIME ZONE`` types, pandas
+will convert the data to UTC.
+
 Reading Tables
 ''''''''''''''
 
 
@@ -222,6 +222,7 @@ Other Enhancements
 - :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`)
 - :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename extension (:issue:`15008`).
   The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`).
+- :meth:`DataFrame.to_sql` now supports writing ``TIMESTAMP WITH TIME ZONE`` types for supported databases. For databases that don't support timezones, datetime data will be stored as timezone unaware local timestamps. See the :ref:`io.sql_datetime_data` for implications (:issue:`9086`).
 - :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`)
 - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)
 - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
@@ -853,6 +854,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
 - Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`)
 - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`).
 - Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`)
+- :meth:`DataFrame.stack` no longer converts to object dtype for DataFrames where each column has the same extension dtype. The output Series will have the same dtype as the columns (:issue:`23077`).
 - :meth:`Series.unstack` and :meth:`DataFrame.unstack` no longer convert extension arrays to object-dtype ndarrays. Each column in the output ``DataFrame`` will now have the same dtype as the input (:issue:`23077`).
 - Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`).
 
@@ -1245,6 +1247,9 @@ MultiIndex
 I/O
 ^^^
 
+- Bug in :meth:`to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`)
+- Bug in :meth:`to_sql` where a naive DatetimeIndex would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`)
+
 .. _whatsnew_0240.bug_fixes.nan_with_str_dtype:
 
 Proper handling of `np.NaN` in a string data-typed column with the Python engine
@@ -1292,6 +1297,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
 - Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`).
 - Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`)
 - Bug in :meth:`read_csv()` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`)
+- Bug in :meth:`read_html()` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`)
 
 Plotting
 ^^^^^^^^
 
@@ -105,7 +105,7 @@ cdef class IntervalTree(IntervalMixin):
         self.root.query(result, key)
         if not result.data.n:
             raise KeyError(key)
-        return result.to_array()
+        return result.to_array().astype('intp')
 
     def _get_partial_overlap(self, key_left, key_right, side):
         """Return all positions corresponding to intervals with the given side
@@ -155,7 +155,7 @@ cdef class IntervalTree(IntervalMixin):
                 raise KeyError(
                     'indexer does not intersect a unique set of intervals')
             old_len = result.data.n
-        return result.to_array()
+        return result.to_array().astype('intp')
 
     def get_indexer_non_unique(self, scalar_t[:] target):
         """Return the positions corresponding to intervals that overlap with
@@ -175,7 +175,8 @@ cdef class IntervalTree(IntervalMixin):
                 result.append(-1)
                 missing.append(i)
             old_len = result.data.n
-        return result.to_array(), missing.to_array()
+        return (result.to_array().astype('intp'),
+                missing.to_array().astype('intp'))
 
     def __repr__(self):
         return ('<IntervalTree[{dtype},{closed}]: '
 
@@ -733,11 +733,11 @@ class Timestamp(_Timestamp):
         if ts.value == NPY_NAT:
             return NaT
 
-        if is_string_object(freq):
-            freq = to_offset(freq)
-        elif not is_offset_object(freq):
+        if freq is None:
             # GH 22311: Try to extract the frequency of a given Timestamp input
             freq = getattr(ts_input, 'freq', None)
+        elif not is_offset_object(freq):
+            freq = to_offset(freq)
 
         return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)
 
 
@@ -10,7 +10,7 @@
 import numpy as np
 import pandas as pd
 
-from pandas._libs import tslib, properties
+from pandas._libs import properties, Timestamp, iNaT
 from pandas.core.dtypes.common import (
     ensure_int64,
     ensure_object,
@@ -2397,6 +2397,15 @@ def to_sql(self, name, con, schema=None, if_exists='fail', index=True,
         --------
         pandas.read_sql : read a DataFrame from a table
 
+        Notes
+        -----
+        Timezone aware datetime columns will be written as
+        ``Timestamp with timezone`` type with SQLAlchemy if supported by the
+        database. Otherwise, the datetimes will be stored as timezone unaware
+        timestamps local to the original timezone.
+
+        .. versionadded:: 0.24.0
+
         References
         ----------
         .. [1] http://docs.sqlalchemy.org
@@ -5091,7 +5100,7 @@ def get_ftype_counts(self):
         1   b    2    2.0
         2   c    3    3.0
 
-        >>> df.get_ftype_counts()
+        >>> df.get_ftype_counts()  # doctest: +SKIP
         float64:dense    1
         int64:dense      1
         object:dense     1
@@ -9273,9 +9282,9 @@ def describe_categorical_1d(data):
                     tz = data.dt.tz
                     asint = data.dropna().values.view('i8')
                     names += ['top', 'freq', 'first', 'last']
-                    result += [tslib.Timestamp(top, tz=tz), freq,
-                               tslib.Timestamp(asint.min(), tz=tz),
-                               tslib.Timestamp(asint.max(), tz=tz)]
+                    result += [Timestamp(top, tz=tz), freq,
+                               Timestamp(asint.min(), tz=tz),
+                               Timestamp(asint.max(), tz=tz)]
                 else:
                     names += ['top', 'freq']
                     result += [top, freq]
@@ -10613,7 +10622,7 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs):
                 issubclass(y.dtype.type, (np.datetime64, np.timedelta64))):
             result = accum_func(y, axis)
             mask = isna(self)
-            np.putmask(result, mask, tslib.iNaT)
+            np.putmask(result, mask, iNaT)
         elif skipna and not issubclass(y.dtype.type, (np.integer, np.bool_)):
             mask = isna(self)
             np.putmask(y, mask, mask_a)
 
@@ -1875,35 +1875,31 @@ def get_duplicates(self):
 
         Works on different Index of types.
 
-        >>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates()
+        >>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates()  # doctest: +SKIP
         [2, 3]
-        >>> pd.Index([1., 2., 2., 3., 3., 3., 4.]).get_duplicates()
-        [2.0, 3.0]
-        >>> pd.Index(['a', 'b', 'b', 'c', 'c', 'c', 'd']).get_duplicates()
-        ['b', 'c']
 
         Note that for a DatetimeIndex, it does not return a list but a new
         DatetimeIndex:
 
         >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03',
         ...                         '2018-01-03', '2018-01-04', '2018-01-04'],
         ...                        format='%Y-%m-%d')
-        >>> pd.Index(dates).get_duplicates()
+        >>> pd.Index(dates).get_duplicates()  # doctest: +SKIP
         DatetimeIndex(['2018-01-03', '2018-01-04'],
                       dtype='datetime64[ns]', freq=None)
 
         Sorts duplicated elements even when indexes are unordered.
 
-        >>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates()
+        >>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates()  # doctest: +SKIP
         [2, 3]
 
         Return empty array-like structure when all elements are unique.
 
-        >>> pd.Index([1, 2, 3, 4]).get_duplicates()
+        >>> pd.Index([1, 2, 3, 4]).get_duplicates()  # doctest: +SKIP
         []
         >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03'],
         ...                        format='%Y-%m-%d')
-        >>> pd.Index(dates).get_duplicates()
+        >>> pd.Index(dates).get_duplicates()  # doctest: +SKIP
         DatetimeIndex([], dtype='datetime64[ns]', freq=None)
         """
         warnings.warn("'get_duplicates' is deprecated and will be removed in "
 
@@ -22,11 +22,11 @@
 )
 from pandas.core.tools.datetimes import parse_time_string, DateParseError
 
-from pandas._libs import tslib, index as libindex
+from pandas._libs import index as libindex
 from pandas._libs.tslibs.period import (Period, IncompatibleFrequency,
                                         DIFFERENT_FREQ_INDEX)
 
-from pandas._libs.tslibs import resolution
+from pandas._libs.tslibs import resolution, NaT, iNaT
 
 from pandas.core.algorithms import unique1d
 import pandas.core.arrays.datetimelike as dtl
@@ -336,7 +336,7 @@ def _box_func(self):
         # places outside of indexes/period.py are calling this _box_func,
         # but passing data that's already boxed.
         def func(x):
-            if isinstance(x, Period) or x is tslib.NaT:
+            if isinstance(x, Period) or x is NaT:
                 return x
             else:
                 return Period._from_ordinal(ordinal=x, freq=self.freq)
@@ -726,7 +726,7 @@ def get_loc(self, key, method=None, tolerance=None):
                 raise KeyError(key)
 
             try:
-                ordinal = tslib.iNaT if key is tslib.NaT else key.ordinal
+                ordinal = iNaT if key is NaT else key.ordinal
                 if tolerance is not None:
                     tolerance = self._convert_tolerance(tolerance,
                                                         np.asarray(key))
 
@@ -35,9 +35,9 @@
     is_numeric_v_string_like, is_extension_type,
     is_extension_array_dtype,
     is_list_like,
-    is_sparse,
     is_re,
     is_re_compilable,
+    is_sparse,
     pandas_dtype)
 from pandas.core.dtypes.cast import (
     maybe_downcast_to_dtype,