pandas-dev · jreback · Nov 14, 2021 · Nov 12, 2021 · Nov 12, 2021 · Nov 13, 2021
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -714,6 +714,7 @@ Styler
 
 Other
 ^^^^^
+- Bug in :meth:`DataFrame.astype` with non-unique columns and a :class:`Series` ``dtype`` argument (:issue:`44417`)
 - Bug in :meth:`CustomBusinessMonthBegin.__add__` (:meth:`CustomBusinessMonthEnd.__add__`) not applying the extra ``offset`` parameter when beginning (end) of the target month is already a business day (:issue:`41356`)
 - Bug in :meth:`RangeIndex.union` with another ``RangeIndex`` with matching (even) ``step`` and starts differing by strictly less than ``step / 2`` (:issue:`44019`)
 - Bug in :meth:`RangeIndex.difference` with ``sort=None`` and ``step<0`` failing to sort (:issue:`44085`)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -5826,14 +5826,22 @@ def astype(
                         "Only a column name can be used for the "
                         "key in a dtype mappings argument."
                     )
+
+            # GH#44417 cast to Series so we can use .iat below, which will be
+            #  robust in case we
+            from pandas import Series
+
+            dtype_ser = Series(dtype, dtype=object)
+            dtype_ser = dtype_ser.reindex(self.columns, fill_value=None, copy=False)
+
             results = []
-            for col_name, col in self.items():
-                if col_name in dtype:
-                    results.append(
-                        col.astype(dtype=dtype[col_name], copy=copy, errors=errors)
-                    )
+            for i, (col_name, col) in enumerate(self.items()):
+                cdt = dtype_ser.iat[i]
+                if isna(cdt):
+                    res_col = col.copy() if copy else col
                 else:
-                    results.append(col.copy() if copy else col)
+                    res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
+                results.append(res_col)
 
         elif is_extension_array_dtype(dtype) and self.ndim > 1:
             # GH 18099/22869: columnwise conversion to extension dtype

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -992,7 +992,7 @@ def _wrap_applied_output(
             result = self.obj._constructor(
                 index=self.grouper.result_index, columns=data.columns
             )
-            result = result.astype(data.dtypes.to_dict(), copy=False)
+            result = result.astype(data.dtypes, copy=False)
             return result
 
         # GH12824

diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
@@ -261,6 +261,26 @@ def test_astype_duplicate_col(self):
         expected = concat([a1_str, b, a2_str], axis=1)
         tm.assert_frame_equal(result, expected)
 
+    def test_astype_duplicate_col_series_arg(self):
+        # GH#44417
+        vals = np.random.randn(3, 4)
+        df = DataFrame(vals, columns=["A", "B", "C", "A"])
+        dtypes = df.dtypes
+        dtypes.iloc[0] = str
+        dtypes.iloc[2] = "Float64"
+
+        result = df.astype(dtypes)
+        expected = DataFrame(
+            {
+                0: vals[:, 0].astype(str),
+                1: vals[:, 1],
+                2: pd.array(vals[:, 2], dtype="Float64"),
+                3: vals[:, 3],
+            }
+        )
+        expected.columns = df.columns
+        tm.assert_frame_equal(result, expected)
+
     @pytest.mark.parametrize(
         "dtype",
         [

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -2031,6 +2031,16 @@ def get_result():
     tm.assert_equal(result, expected)
 
 
+def test_empty_groupby_apply_nonunique_columns():
+    # GH#44417
+    df = DataFrame(np.random.randn(0, 4))
+    df[3] = df[3].astype(np.int64)
+    df.columns = [0, 1, 2, 0]
+    gb = df.groupby(df[1])
+    res = gb.apply(lambda x: x)
+    assert (res.dtypes == df.dtypes).all()
+
+
 def test_tuple_as_grouping():
     # https://github.com/pandas-dev/pandas/issues/18314
     df = DataFrame(