Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
import pandas as pd
import pytz
from datetime import datetime
df1 = pd.DataFrame([{'date': datetime(2022, 12, 1, 12, 0, 0, 0, pytz.UTC)}])
df2 = pd.DataFrame([{'date': None}])
pd.concat([df1, df2])
Traceback
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Input In [55], in <cell line: 3>()
1 df1 = pd.DataFrame([{'date': datetime(2022, 12, 1, 12, 0, 0, 0, pytz.UTC)}])
2 df2 = pd.DataFrame([{'date': None}])
----> 3 pd.concat([df1, df2])
File ~/software/mambaforge/envs/dev/lib/python3.10/site-packages/pandas/util/_decorators.py:331, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
325 if len(args) > num_allow_args:
326 warnings.warn(
327 msg.format(arguments=_format_argument_list(allow_args)),
328 FutureWarning,
329 stacklevel=find_stack_level(),
330 )
--> 331 return func(*args, **kwargs)
File ~/software/mambaforge/envs/dev/lib/python3.10/site-packages/pandas/core/reshape/concat.py:381, in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
159 """
160 Concatenate pandas objects along a particular axis.
161
(...)
366 1 3 4
367 """
368 op = _Concatenator(
369 objs,
370 axis=axis,
(...)
378 sort=sort,
379 )
--> 381 return op.get_result()
File ~/software/mambaforge/envs/dev/lib/python3.10/site-packages/pandas/core/reshape/concat.py:616, in _Concatenator.get_result(self)
612 indexers[ax] = obj_labels.get_indexer(new_labels)
614 mgrs_indexers.append((obj._mgr, indexers))
--> 616 new_data = concatenate_managers(
617 mgrs_indexers, self.new_axes, concat_axis=self.bm_axis, copy=self.copy
618 )
619 if not self.copy:
620 new_data._consolidate_inplace()
File ~/software/mambaforge/envs/dev/lib/python3.10/site-packages/pandas/core/internals/concat.py:233, in concatenate_managers(mgrs_indexers, axes, concat_axis, copy)
231 fastpath = blk.values.dtype == values.dtype
232 else:
--> 233 values = _concatenate_join_units(join_units, concat_axis, copy=copy)
234 fastpath = False
236 if fastpath:
File ~/software/mambaforge/envs/dev/lib/python3.10/site-packages/pandas/core/internals/concat.py:542, in _concatenate_join_units(join_units, concat_axis, copy)
539 has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units)
540 upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks)
--> 542 to_concat = [
543 ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na)
544 for ju in join_units
545 ]
547 if len(to_concat) == 1:
548 # Only one block, nothing to concatenate.
549 concat_values = to_concat[0]
File ~/software/mambaforge/envs/dev/lib/python3.10/site-packages/pandas/core/internals/concat.py:543, in <listcomp>(.0)
539 has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units)
540 upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks)
542 to_concat = [
--> 543 ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na)
544 for ju in join_units
545 ]
547 if len(to_concat) == 1:
548 # Only one block, nothing to concatenate.
549 concat_values = to_concat[0]
File ~/software/mambaforge/envs/dev/lib/python3.10/site-packages/pandas/core/internals/concat.py:468, in JoinUnit.get_reindexed_values(self, empty_dtype, upcasted_na)
464 fill_value = None
466 if isinstance(empty_dtype, DatetimeTZDtype):
467 # NB: exclude e.g. pyarrow[dt64tz] dtypes
--> 468 i8values = np.full(self.shape, fill_value.value)
469 return DatetimeArray(i8values, dtype=empty_dtype)
471 elif is_1d_only_ea_dtype(empty_dtype):
AttributeError: 'NoneType' object has no attribute 'value'
Issue Description
Concatenating DataFrames fails with an AttributeError if attempting to concatenate a column with a timezone-aware datetime dtype and a column with the object dtype containing only None
as values.
Expected Behavior
The expected behavior is for this to work as it does with timezone-naive datetimes:
df1 = pd.DataFrame([{'date': datetime(2022, 12, 9, 12, 0, 0, 0)}])
df2 = pd.DataFrame([{'date': None}])
pd.concat([df1, df2])
date
0 2022-12-09 12:00:00
0 NaT
Installed Versions
INSTALLED VERSIONS
commit : 8dab54d
python : 3.10.6.final.0
python-bits : 64
OS : Linux
OS-release : 4.4.0-19041-Microsoft
Version : #1237-Microsoft Sat Sep 11 14:32:00 PST 2021
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : C.UTF-8
LOCALE : en_US.UTF-8
pandas : 1.5.2
numpy : 1.23.2
pytz : 2022.2.1
dateutil : 2.8.2
setuptools : 65.3.0
pip : 22.2.2
Cython : None
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : 4.8.0
html5lib : None
pymysql : None
psycopg2 : 2.9.3
jinja2 : 3.1.2
IPython : 8.4.0
pandas_datareader: None
bs4 : 4.11.1
bottleneck : None
brotli :
fastparquet : None
fsspec : None
gcsfs : None
matplotlib : 3.5.3
numba : None
numexpr : None
odfpy : None
openpyxl : 3.0.9
pandas_gbq : None
pyarrow : None
pyreadstat : None
pyxlsb : None
s3fs : None
scipy : 1.9.3
snappy : None
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : 2.0.1
xlwt : None
zstandard : None
tzdata : 2022.5