Skip to content

ENH: Explode multiple columns of DataFrame #28465

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 13 commits into from
Prev Previous commit
Next Next commit
multiple columns for explode method
  • Loading branch information
Kyle Stahl authored Sep 16, 2019
commit 34fef5306ad0735e501a8f0e9d6c32f086a921cf
19 changes: 10 additions & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6261,7 +6261,7 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame":
3 3 1
3 4 1

>>> df = pd.DataFrame({'A': [[1, 2, 3], 'foo', [], [3, 4]],
>>> df = pd.DataFrame({'A': [[1, 2, 3], 'foo', [], [3, 4]],
'B': 1,
'C': [[7,8,9],'bar',[],[8,7]]})
>>> df
Expand All @@ -6270,16 +6270,16 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame":
1 foo 1 bar
2 [] 1 []
3 [3, 4] 1 [8, 7]
>>> df.explode(['A','C'])

>>> df.explode(['A','C'])
B A C
0 1 1 7
0 1 2 8
0 1 3 9
1 1 foo bar
2 1 NaN NaN
3 1 3 8
3 1 4 7
3 1 4 7
"""

# Validate data
Expand All @@ -6295,23 +6295,24 @@ def explode(self, columns: Union[str, List[str]]) -> "DataFrame":
if not all([c in self.columns for c in columns]):
raise ValueError("column name(s) not in index")

tmp = self.iloc[0:0,0:0].copy() # creates empty temp df
tmp = self.iloc[0:0, 0:0].copy() # creates empty temp df
lengths_equal = []

for row in self[columns].iterrows():
# converts non-lists into 1 element lists so len() is valid
r=row[1].apply(lambda x: x if type(x) in (list,tuple) else [x])
r = row[1].apply(lambda x: x if type(x) in (list, tuple) else [x])

# make sure all lists in the same record are the same length
row_is_ok = len(set([len(r[c]) for c in columns])) == 1
lengths_equal.append(row_is_ok)
lengths_equal.append(row_is_ok)

# Explode all columns if lengths match
if all(lengths_equal):
for c in columns:
tmp[c] = self[c].explode()
else:
raise ValueError("Exploded lists from `columns` do not have equivalent length within the same record")
e = "Elements from `columns` do not have equivalent length within in the same row"
raise ValueError(e)

# join in exploded columns
results = self.drop(columns, axis=1).join(tmp)
Expand Down