Skip to content

PR: Add metadata attribute to DataFrame and Column #43

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add review changes
  • Loading branch information
steff456 committed Jun 25, 2021
commit d72c1ef830f0ecf211babc19fbe3ed7f855b68b7
10 changes: 8 additions & 2 deletions protocol/dataframe_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def null_count(self) -> Optional[int]:
@property
def metadata(self) -> Dict[str, Any]:
"""
Store the metadata specific to the column.
The metadata for the column. See `DataFrame.metadata` for more details.
"""
pass

Expand Down Expand Up @@ -360,7 +360,13 @@ def __dataframe__(self, nan_as_null : bool = False) -> dict:
@property
def metadata(self) -> Dict[str, Any]:
"""
Store the metadata specific to the DataFrame
The metadata for the data frame, as a dictionary with string keys. The
contents of `metadata` may be anything, they are meant for a library
to store information that it needs to, e.g., roundtrip losslessly or
for two implementations to share data that is not (yet) part of the
interchange protocol specification. For avoiding collisions with other
entries, please add name the keys with the name of the library
followed by a period and the desired name, e.g, ``pandas.indexcol``.
"""
pass

Expand Down
16 changes: 8 additions & 8 deletions protocol/pandas_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def metadata(self) -> Dict[str, Any]:
"""
Store specific metadata of the column.
"""
return {"num_chunks": self.num_chunks()}
return {}

def num_chunks(self) -> int:
"""
Expand Down Expand Up @@ -504,8 +504,7 @@ def __init__(self, df : pd.DataFrame, nan_as_null : bool = False) -> None:

@property
def metadata(self):
return {"num_chunks": self.num_chunks(),
"num_columns": self.num_columns()}
return {"pandas.indexcol": self._df.index.name}

def num_columns(self) -> int:
return len(self._df.columns)
Expand Down Expand Up @@ -591,19 +590,20 @@ def test_categorical_dtype():


def test_metadata():
df = pd.DataFrame(data=dict(a=[1, 2, 3], b=[4, 5, 6], c=[7, 8, 9]))
d = {'A': [1, 2, 3, 4],'B': [1, 2, 3, 4]}
df = pd.DataFrame(d).set_index('A')

# Check the metadata from the dataframe
df_metadata = df.__dataframe__().metadata
excpected = {"num_chunks": 1, "num_columns": 3}
expected = {"pandas.indexcol": 'A'}
for key in df_metadata:
assert df_metadata[key] == excpected[key]
assert df_metadata[key] == expected[key]

# Check the metadata from the column
col_metadata = df.__dataframe__().get_column(0).metadata
expected = {"num_chunks": 1}
expected = {}
for key in col_metadata:
assert col_metadata[key] == excpected[key]
assert col_metadata[key] == expected[key]

df2 = from_dataframe(df)
tm.assert_frame_equal(df, df2)
Expand Down