pandas-dev · mroeschke · Aug 3, 2023 · Aug 1, 2023 · Aug 1, 2023 · Aug 2, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -176,8 +176,8 @@ Other enhancements
 - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`)
 - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`)
 - Reductions :meth:`Series.argmax`, :meth:`Series.argmin`, :meth:`Series.idxmax`, :meth:`Series.idxmin`, :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`DataFrame.idxmax`, :meth:`DataFrame.idxmin` are now supported for object-dtype objects (:issue:`4279`, :issue:`18021`, :issue:`40685`, :issue:`43697`)
+- :meth:`DataFrame.to_parquet` and :func:`read_parquet` will now write and read ``attrs`` respectively (:issue:`54346`)
 - Performance improvement in :meth:`GroupBy.quantile` (:issue:`51722`)
--
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_210.notable_bug_fixes:

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 import io
+import json
 import os
 from typing import (
     TYPE_CHECKING,
@@ -184,6 +185,12 @@ def write(
 
         table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
 
+        if df.attrs:
+            df_metadata = {"PANDAS_ATTRS": json.dumps(df.attrs)}
+            existing_metadata = table.schema.metadata
+            merged_metadata = {**existing_metadata, **df_metadata}
+            table = table.replace_schema_metadata(merged_metadata)
+
         path_or_handle, handles, filesystem = _get_path_or_handle(
             path,
             filesystem,
@@ -263,6 +270,11 @@ def read(
 
             if manager == "array":
                 result = result._as_manager("array", copy=False)
+
+            if pa_table.schema.metadata:
+                if b"PANDAS_ATTRS" in pa_table.schema.metadata:
+                    df_metadata = pa_table.schema.metadata[b"PANDAS_ATTRS"]
+                    result.attrs = json.loads(df_metadata)
             return result
         finally:
             if handles is not None:

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -1065,6 +1065,14 @@ def test_empty_columns(self, pa):
         df = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name"))
         check_round_trip(df, pa)
 
+    def test_df_attrs_persistence(self, tmp_path, pa):
+        path = tmp_path / "test_df_metadata.p"
+        df = pd.DataFrame(data={1: [1]})
+        df.attrs = {"test_attribute": 1}
+        df.to_parquet(path, engine=pa)
+        new_df = read_parquet(path, engine=pa)
+        assert new_df.attrs == df.attrs
+
 
 class TestParquetFastParquet(Base):
     def test_basic(self, fp, df_full):