Skip to content

REGR: Row series are broken after applying to_datetime() #36785

Closed
@krassowski

Description

@krassowski
  • I have checked that this issue has not already been reported.

  • I have confirmed this bug exists on the latest version of pandas.

  • (optional) I have confirmed this bug exists on the master branch of pandas.


Code Sample, a copy-pastable example

from io import StringIO
from pandas import read_csv, to_datetime, options

df = read_csv(StringIO("""\
,A,B,C,D,E,F
P0,,2020-10-01 08:00:00+00:00,,,,2020-10-16 00:01:00+00:00
"""), index_col=0)

# works
options.display.max_rows = 6
df.apply(lambda d: to_datetime(d, utc=True), axis=0).apply(lambda x: str(x), axis=1)

# raises
options.display.max_rows = 5
df.apply(lambda d: to_datetime(d, utc=True), axis=0).apply(lambda x: str(x), axis=1)

# raises
df.apply(lambda d: to_datetime(d, utc=True), axis=0).apply(lambda x: x.dropna().apply(lambda y: getattr(y, 'year')), axis=1)
# in v 1.0.4 would return:
#        B     F
# P0  2020  2020

Problem description

Assigning the output of pd.to_datetime to a column of a dataframe, although not demonstrated in documentation is a popular use of this helper function. In previous versions of pandas (1.0.x) it was possible to convert multiple columns using to_datetime in combination with DataFrame.apply. It is still possible in 1.1.2 and master:

>>>df.apply(lambda d: to_datetime(d, utc=True), axis=0).dtypes
A    datetime64[ns, UTC]
B    datetime64[ns, UTC]
            ...         
E    datetime64[ns, UTC]
F    datetime64[ns, UTC]
Length: 6, dtype: object

However, the rows in the subsequent apply operations are broken for certain operations. For example, trying to print them out raises: TypeError: cannot concatenate object of type '<class 'numpy.ndarray'>'; only Series and DataFrame objs are valid. This was not the case in pandas 1.0.4.

X in Y
     10 
     11 # raises
---> 12 df.apply(lambda d: to_datetime(d, utc=True), axis=0).apply(lambda x: str(x), axis=1)

/pandas/core/frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
   7545             kwds=kwds,
   7546         )
-> 7547         return op.get_result()
   7548 
   7549     def applymap(self, func) -> "DataFrame":

/pandas/core/apply.py in get_result(self)
    178             return self.apply_raw()
    179 
--> 180         return self.apply_standard()
    181 
    182     def apply_empty_result(self):

/pandas/core/apply.py in apply_standard(self)
    253 
    254     def apply_standard(self):
--> 255         results, res_index = self.apply_series_generator()
    256 
    257         # wrap results

/pandas/core/apply.py in apply_series_generator(self)
    282                 for i, v in enumerate(series_gen):
    283                     # ignore SettingWithCopy here in case the user mutates
--> 284                     results[i] = self.f(v)
    285                     if isinstance(results[i], ABCSeries):
    286                         # If we have a view on v, we need to make a copy because

X in <lambda>(x)
     10 df.apply(lambda x: str(x), axis=1)
     11 # raises
---> 12 df.apply(lambda d: to_datetime(d, utc=True), axis=0).apply(lambda x: str(x), axis=1)

/pandas/core/series.py in __repr__(self)
   1313         show_dimensions = get_option("display.show_dimensions")
   1314 
-> 1315         self.to_string(
   1316             buf=buf,
   1317             name=self.name,

/pandas/core/series.py in to_string(self, buf, na_rep, float_format, header, index, length, dtype, name, max_rows, min_rows)
   1372             String representation of Series if ``buf=None``, otherwise None.
   1373         """
-> 1374         formatter = fmt.SeriesFormatter(
   1375             self,
   1376             name=name,

/pandas/io/formats/format.py in __init__(self, series, buf, length, header, index, na_rep, name, float_format, dtype, max_rows, min_rows)
    259         self.adj = _get_adjustment()
    260 
--> 261         self._chk_truncate()
    262 
    263     def _chk_truncate(self) -> None:

/pandas/io/formats/format.py in _chk_truncate(self)
    283             else:
    284                 row_num = max_rows // 2
--> 285                 series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
    286             self.tr_row_num = row_num
    287         else:

/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
    272     ValueError: Indexes have overlapping values: ['a']
    273     """
--> 274     op = _Concatenator(
    275         objs,
    276         axis=axis,

/pandas/core/reshape/concat.py in __init__(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)
    357                     "only Series and DataFrame objs are valid"
    358                 )
--> 359                 raise TypeError(msg)
    360 
    361             # consolidate

TypeError: cannot concatenate object of type '<class 'numpy.ndarray'>'; only Series and DataFrame objs are valid

Expected Output

Should not raise.

Output of pd.show_versions()

INSTALLED VERSIONS

commit : 2a7d332
python : 3.8.1.final.0
python-bits : 64
OS : Linux
OS-release : 5.4.0-48-generic
Version : #52-Ubuntu SMP Thu Sep 10 10:58:49 UTC 2020
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : en_GB.UTF-8
LOCALE : en_GB.UTF-8

pandas : 1.1.2
numpy : 1.18.1
pytz : 2019.3
dateutil : 2.8.1
pip : 20.2.3
setuptools : 41.2.0
Cython : None
pytest : 5.3.4
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : 1.2.8
lxml.etree : 4.4.2
html5lib : 1.0.1
pymysql : None
psycopg2 : None
jinja2 : 2.10.3
IPython : 7.11.1
pandas_datareader: None
bs4 : 4.8.2
bottleneck : None
fsspec : None
fastparquet : None
gcsfs : None
matplotlib : 3.1.2
numexpr : None
odfpy : None
openpyxl : 3.0.3
pandas_gbq : None
pyarrow : None
pytables : None
pyxlsb : None
s3fs : None
scipy : 1.4.1
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : 1.2.0
xlwt : None
numba : 0.49.0

Metadata

Metadata

Assignees

No one assigned

    Labels

    ApplyApply, Aggregate, Transform, MapRegressionFunctionality that used to work in a prior pandas version

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions