Description
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
(optional) I have confirmed this bug exists on the master branch of pandas.
Code Sample, a copy-pastable example
from io import StringIO
from pandas import read_csv, to_datetime, options
df = read_csv(StringIO("""\
,A,B,C,D,E,F
P0,,2020-10-01 08:00:00+00:00,,,,2020-10-16 00:01:00+00:00
"""), index_col=0)
# works
options.display.max_rows = 6
df.apply(lambda d: to_datetime(d, utc=True), axis=0).apply(lambda x: str(x), axis=1)
# raises
options.display.max_rows = 5
df.apply(lambda d: to_datetime(d, utc=True), axis=0).apply(lambda x: str(x), axis=1)
# raises
df.apply(lambda d: to_datetime(d, utc=True), axis=0).apply(lambda x: x.dropna().apply(lambda y: getattr(y, 'year')), axis=1)
# in v 1.0.4 would return:
# B F
# P0 2020 2020
Problem description
Assigning the output of pd.to_datetime
to a column of a dataframe, although not demonstrated in documentation is a popular use of this helper function. In previous versions of pandas (1.0.x) it was possible to convert multiple columns using to_datetime
in combination with DataFrame.apply
. It is still possible in 1.1.2 and master:
>>>df.apply(lambda d: to_datetime(d, utc=True), axis=0).dtypes
A datetime64[ns, UTC]
B datetime64[ns, UTC]
...
E datetime64[ns, UTC]
F datetime64[ns, UTC]
Length: 6, dtype: object
However, the rows in the subsequent apply operations are broken for certain operations. For example, trying to print them out raises: TypeError: cannot concatenate object of type '<class 'numpy.ndarray'>'; only Series and DataFrame objs are valid
. This was not the case in pandas 1.0.4.
X in Y
10
11 # raises
---> 12 df.apply(lambda d: to_datetime(d, utc=True), axis=0).apply(lambda x: str(x), axis=1)
/pandas/core/frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
7545 kwds=kwds,
7546 )
-> 7547 return op.get_result()
7548
7549 def applymap(self, func) -> "DataFrame":
/pandas/core/apply.py in get_result(self)
178 return self.apply_raw()
179
--> 180 return self.apply_standard()
181
182 def apply_empty_result(self):
/pandas/core/apply.py in apply_standard(self)
253
254 def apply_standard(self):
--> 255 results, res_index = self.apply_series_generator()
256
257 # wrap results
/pandas/core/apply.py in apply_series_generator(self)
282 for i, v in enumerate(series_gen):
283 # ignore SettingWithCopy here in case the user mutates
--> 284 results[i] = self.f(v)
285 if isinstance(results[i], ABCSeries):
286 # If we have a view on v, we need to make a copy because
X in <lambda>(x)
10 df.apply(lambda x: str(x), axis=1)
11 # raises
---> 12 df.apply(lambda d: to_datetime(d, utc=True), axis=0).apply(lambda x: str(x), axis=1)
/pandas/core/series.py in __repr__(self)
1313 show_dimensions = get_option("display.show_dimensions")
1314
-> 1315 self.to_string(
1316 buf=buf,
1317 name=self.name,
/pandas/core/series.py in to_string(self, buf, na_rep, float_format, header, index, length, dtype, name, max_rows, min_rows)
1372 String representation of Series if ``buf=None``, otherwise None.
1373 """
-> 1374 formatter = fmt.SeriesFormatter(
1375 self,
1376 name=name,
/pandas/io/formats/format.py in __init__(self, series, buf, length, header, index, na_rep, name, float_format, dtype, max_rows, min_rows)
259 self.adj = _get_adjustment()
260
--> 261 self._chk_truncate()
262
263 def _chk_truncate(self) -> None:
/pandas/io/formats/format.py in _chk_truncate(self)
283 else:
284 row_num = max_rows // 2
--> 285 series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
286 self.tr_row_num = row_num
287 else:
/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
272 ValueError: Indexes have overlapping values: ['a']
273 """
--> 274 op = _Concatenator(
275 objs,
276 axis=axis,
/pandas/core/reshape/concat.py in __init__(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)
357 "only Series and DataFrame objs are valid"
358 )
--> 359 raise TypeError(msg)
360
361 # consolidate
TypeError: cannot concatenate object of type '<class 'numpy.ndarray'>'; only Series and DataFrame objs are valid
Expected Output
Should not raise.
Output of pd.show_versions()
INSTALLED VERSIONS
commit : 2a7d332
python : 3.8.1.final.0
python-bits : 64
OS : Linux
OS-release : 5.4.0-48-generic
Version : #52-Ubuntu SMP Thu Sep 10 10:58:49 UTC 2020
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : en_GB.UTF-8
LOCALE : en_GB.UTF-8
pandas : 1.1.2
numpy : 1.18.1
pytz : 2019.3
dateutil : 2.8.1
pip : 20.2.3
setuptools : 41.2.0
Cython : None
pytest : 5.3.4
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : 1.2.8
lxml.etree : 4.4.2
html5lib : 1.0.1
pymysql : None
psycopg2 : None
jinja2 : 2.10.3
IPython : 7.11.1
pandas_datareader: None
bs4 : 4.8.2
bottleneck : None
fsspec : None
fastparquet : None
gcsfs : None
matplotlib : 3.1.2
numexpr : None
odfpy : None
openpyxl : 3.0.3
pandas_gbq : None
pyarrow : None
pytables : None
pyxlsb : None
s3fs : None
scipy : 1.4.1
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : 1.2.0
xlwt : None
numba : 0.49.0