Skip to content

Commit 80a45b5

Browse files
feat: add __iter__, iterrows, itertuples, keys methods
1 parent 0801d96 commit 80a45b5

File tree

5 files changed

+183
-1
lines changed

5 files changed

+183
-1
lines changed

bigframes/dataframe.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,9 @@ def __len__(self):
303303
rows, _ = self.shape
304304
return rows
305305

306+
def __iter__(self):
307+
return iter(self.columns)
308+
306309
def astype(
307310
self,
308311
dtype: Union[bigframes.dtypes.DtypeString, bigframes.dtypes.Dtype],
@@ -1472,12 +1475,27 @@ def isin(self, values) -> DataFrame:
14721475
f"isin(), you passed a [{type(values).__name__}]"
14731476
)
14741477

1478+
def keys(self) -> pandas.Index:
1479+
return self.columns
1480+
14751481
def items(self):
14761482
column_ids = self._block.value_columns
14771483
column_labels = self._block.column_labels
14781484
for col_id, col_label in zip(column_ids, column_labels):
14791485
yield col_label, bigframes.series.Series(self._block.select_column(col_id))
14801486

1487+
def iterrows(self) -> Iterable[tuple[typing.Hashable, bigframes.series.Series]]:
1488+
for df in self.to_pandas_batches():
1489+
for item in df.iterrows():
1490+
yield item
1491+
1492+
def itertuples(
1493+
self, index: bool = True, name: typing.Optional[str] = "Pandas"
1494+
) -> Iterable[tuple[typing.Any, ...]]:
1495+
for df in self.to_pandas_batches():
1496+
for item in df.itertuples(index=index, name=name):
1497+
yield item
1498+
14811499
def dropna(
14821500
self,
14831501
*,

bigframes/series.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from __future__ import annotations
1818

19+
import itertools
1920
import numbers
2021
import textwrap
2122
import typing
@@ -148,6 +149,11 @@ def _set_internal_query_job(self, query_job: bigquery.QueryJob):
148149
def __len__(self):
149150
return self.shape[0]
150151

152+
def __iter__(self) -> typing.Iterator:
153+
return itertools.chain.from_iterable(
154+
map(lambda x: x.index, self._block.to_pandas_batches())
155+
)
156+
151157
def copy(self) -> Series:
152158
return Series(self._block)
153159

tests/system/small/test_dataframe.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,56 @@ def test_apply_series_scalar_callable(
787787
pandas.testing.assert_series_equal(bf_result, pd_result)
788788

789789

790+
def test_df_keys(
791+
scalars_df_index,
792+
scalars_pandas_df_index,
793+
):
794+
pandas.testing.assert_index_equal(
795+
scalars_df_index.keys(), scalars_pandas_df_index.keys()
796+
)
797+
798+
799+
def test_df_iter(
800+
scalars_df_index,
801+
scalars_pandas_df_index,
802+
):
803+
pandas.testing.assert_index_equal(
804+
list(scalars_df_index), list(scalars_pandas_df_index)
805+
)
806+
807+
808+
def test_iterrows(
809+
scalars_df_index,
810+
scalars_pandas_df_index,
811+
):
812+
for (bf_index, bf_series), (pd_index, pd_series) in zip(
813+
scalars_df_index.iterrows(), scalars_pandas_df_index.iterrows()
814+
):
815+
assert bf_index == pd_index
816+
pandas.testing.assert_series_equal(bf_series, pd_series)
817+
818+
819+
@pytest.mark.parametrize(
820+
(
821+
"index",
822+
"name",
823+
),
824+
[
825+
(
826+
True,
827+
"my_df",
828+
),
829+
(False, None),
830+
],
831+
)
832+
def test_itertuples(scalars_df_index, index, name):
833+
# Numeric has slightly different representation as a result of conversions.
834+
bf_tuples = scalars_df_index.itertuples(index, name)
835+
pd_tuples = scalars_df_index.to_pandas().itertuples(index, name)
836+
for bf_tuple, pd_tuple in zip(bf_tuples, pd_tuples):
837+
assert bf_tuple == pd_tuple
838+
839+
790840
def test_df_isin_list(scalars_dfs):
791841
scalars_df, scalars_pandas_df = scalars_dfs
792842
values = ["Hello, World!", 55555, 2.51, pd.NA, True]

third_party/bigframes_vendored/pandas/core/frame.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -947,6 +947,85 @@ def isin(self, values):
947947
"""
948948
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
949949

950+
def keys(self):
951+
"""
952+
Get the 'info axis'.
953+
954+
This is index for Series, columns for DataFrame.
955+
956+
Returns:
957+
Index: Info axis.
958+
959+
**Examples:**
960+
961+
>>> import bigframes.pandas as bpd
962+
>>> bpd.options.display.progress_bar = None
963+
964+
>>> df = bpd.DataFrame({
965+
... 'A': [1, 2, 3],
966+
... 'B': [4, 5, 6],
967+
... })
968+
>>> df.keys()
969+
Index(['A', 'B'], dtype='object')
970+
"""
971+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
972+
973+
def iterrows(self):
974+
"""
975+
Iterate over DataFrame rows as (index, Series) pairs.
976+
977+
Yields:
978+
a tuple (index, data) where data contains row values as a Series
979+
980+
**Examples:**
981+
982+
>>> import bigframes.pandas as bpd
983+
>>> bpd.options.display.progress_bar = None
984+
>>> df = bpd.DataFrame({
985+
... 'A': [1, 2, 3],
986+
... 'B': [4, 5, 6],
987+
... })
988+
>>> index, row = next(df.iterrows())
989+
>>> index
990+
0
991+
>>> row
992+
A 1
993+
B 4
994+
Name: 0, dtype: object
995+
"""
996+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
997+
998+
def itertuples(self, index: bool = True, name: str | None = "Pandas"):
999+
"""
1000+
Iterate over DataFrame rows as namedtuples.
1001+
1002+
Args:
1003+
index (bool, default True):
1004+
If True, return the index as the first element of the tuple.
1005+
name (str or None, default "Pandas"):
1006+
The name of the returned namedtuples or None to return regular
1007+
tuples.
1008+
1009+
Returns:
1010+
iterator:
1011+
An object to iterate over namedtuples for each row in the
1012+
DataFrame with the first field possibly being the index and
1013+
following fields being the column values.
1014+
1015+
1016+
**Examples:**
1017+
1018+
>>> import bigframes.pandas as bpd
1019+
>>> bpd.options.display.progress_bar = None
1020+
>>> df = bpd.DataFrame({
1021+
... 'A': [1, 2, 3],
1022+
... 'B': [4, 5, 6],
1023+
... })
1024+
>>> next(df.itertuples(name="Pair"))
1025+
Pair(Index=0, A=1, B=4)
1026+
"""
1027+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
1028+
9501029
def items(self):
9511030
"""
9521031
Iterate over (column name, Series) pairs.

third_party/bigframes_vendored/pandas/core/generic.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/generic.py
22
from __future__ import annotations
33

4-
from typing import Literal, Optional
4+
from typing import Iterator, Literal, Optional
55

66
from bigframes import constants
77
from third_party.bigframes_vendored.pandas.core import indexing
@@ -35,6 +35,35 @@ def size(self) -> int:
3535
"""
3636
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
3737

38+
def __iter__(self) -> Iterator:
39+
"""
40+
Iterate over info axis.
41+
42+
Returns
43+
iterator: Info axis as iterator.
44+
45+
**Examples:**
46+
>>> import bigframes.pandas as bpd
47+
>>> bpd.options.display.progress_bar = None
48+
49+
>>> df = bpd.DataFrame({
50+
... 'A': [1, 2, 3],
51+
... 'B': [4, 5, 6],
52+
... })
53+
>>> for x in df:
54+
... print(x)
55+
A
56+
B
57+
58+
>>> series = bpd.Series(["a", "b", "c"], index=[10, 20, 30])
59+
>>> for x in series:
60+
... print(x)
61+
10
62+
20
63+
30
64+
"""
65+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
66+
3867
# -------------------------------------------------------------------------
3968
# Unary Methods
4069

0 commit comments

Comments
 (0)