Skip to content

Commit aefdbb4

Browse files
authored
Merge pull request more-itertools#249 from rovyko/split_into_feature
Add split_into
2 parents 3c6c315 + 10ed5d1 commit aefdbb4

File tree

3 files changed

+206
-0
lines changed

3 files changed

+206
-0
lines changed

docs/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ These tools yield groups of items from a source iterable.
2020
.. autofunction:: split_at
2121
.. autofunction:: split_before
2222
.. autofunction:: split_after
23+
.. autofunction:: split_into
2324
.. autofunction:: bucket
2425
.. autofunction:: unzip
2526

more_itertools/more.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
'split_at',
7575
'split_after',
7676
'split_before',
77+
'split_into',
7778
'spy',
7879
'stagger',
7980
'strip',
@@ -1070,6 +1071,51 @@ def split_after(iterable, pred):
10701071
yield buf
10711072

10721073

1074+
def split_into(iterable, sizes):
1075+
"""Yield a list of sequential items from *iterable* of length 'n' for each
1076+
integer 'n' in *sizes*.
1077+
1078+
>>> list(split_into([1,2,3,4,5,6], [1,2,3]))
1079+
[[1], [2, 3], [4, 5, 6]]
1080+
1081+
If the sum of *sizes* is smaller than the length of *iterable*, then the
1082+
remaining items of *iterable* will not be returned.
1083+
1084+
>>> list(split_into([1,2,3,4,5,6], [2,3]))
1085+
[[1, 2], [3, 4, 5]]
1086+
1087+
If the sum of *sizes* is larger than the length of *iterable*, fewer items
1088+
will be returned in the iteration that overruns *iterable* and further
1089+
lists will be empty:
1090+
1091+
>>> list(split_into([1,2,3,4], [1,2,3,4]))
1092+
[[1], [2, 3], [4], []]
1093+
1094+
When a ``None`` object is encountered in *sizes*, the returned list will
1095+
contain items up to the end of *iterable* the same way that itertools.slice
1096+
does:
1097+
1098+
>>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None]))
1099+
[[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]]
1100+
1101+
:func:`split_into` can be useful for grouping a series of items where the
1102+
sizes of the groups are not uniform. An example would be where in a row
1103+
from a table, multiple columns represent elements of the same feature
1104+
(e.g. a point represented by x,y,z) but, the format is not the same for
1105+
all columns.
1106+
"""
1107+
# convert the iterable argument into an iterator so its contents can
1108+
# be consumed by islice in case it is a generator
1109+
it = iter(iterable)
1110+
1111+
for size in sizes:
1112+
if size is None:
1113+
yield list(it)
1114+
return
1115+
else:
1116+
yield list(islice(it, size))
1117+
1118+
10731119
def padded(iterable, fillvalue=None, n=None, next_multiple=False):
10741120
"""Yield the elements from *iterable*, followed by *fillvalue*, such that
10751121
at least *n* items are emitted.

more_itertools/tests/test_more.py

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,6 +1009,165 @@ def test_no_sep(self):
10091009
self.assertEqual(actual, expected)
10101010

10111011

1012+
class SplitIntoTests(TestCase):
1013+
"""Tests for ``split_into()``"""
1014+
1015+
def test_iterable_just_right(self):
1016+
"""Size of ``iterable`` equals the sum of ``sizes``."""
1017+
iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9]
1018+
sizes = [2, 3, 4]
1019+
expected = [[1, 2], [3, 4, 5], [6, 7, 8, 9]]
1020+
actual = list(mi.split_into(iterable, sizes))
1021+
self.assertEqual(actual, expected)
1022+
1023+
def test_iterable_too_small(self):
1024+
"""Size of ``iterable`` is smaller than sum of ``sizes``. Last return
1025+
list is shorter as a result."""
1026+
iterable = [1, 2, 3, 4, 5, 6, 7]
1027+
sizes = [2, 3, 4]
1028+
expected = [[1, 2], [3, 4, 5], [6, 7]]
1029+
actual = list(mi.split_into(iterable, sizes))
1030+
self.assertEqual(actual, expected)
1031+
1032+
def test_iterable_too_small_extra(self):
1033+
"""Size of ``iterable`` is smaller than sum of ``sizes``. Second last
1034+
return list is shorter and last return list is empty as a result."""
1035+
iterable = [1, 2, 3, 4, 5, 6, 7]
1036+
sizes = [2, 3, 4, 5]
1037+
expected = [[1, 2], [3, 4, 5], [6, 7], []]
1038+
actual = list(mi.split_into(iterable, sizes))
1039+
self.assertEqual(actual, expected)
1040+
1041+
def test_iterable_too_large(self):
1042+
"""Size of ``iterable`` is larger than sum of ``sizes``. Not all
1043+
items of iterable are returned."""
1044+
iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9]
1045+
sizes = [2, 3, 2]
1046+
expected = [[1, 2], [3, 4, 5], [6, 7]]
1047+
actual = list(mi.split_into(iterable, sizes))
1048+
self.assertEqual(actual, expected)
1049+
1050+
def test_using_none_with_leftover(self):
1051+
"""Last item of ``sizes`` is None when items still remain in
1052+
``iterable``. Last list returned stretches to fit all remaining items
1053+
of ``iterable``."""
1054+
iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9]
1055+
sizes = [2, 3, None]
1056+
expected = [[1, 2], [3, 4, 5], [6, 7, 8, 9]]
1057+
actual = list(mi.split_into(iterable, sizes))
1058+
self.assertEqual(actual, expected)
1059+
1060+
def test_using_none_without_leftover(self):
1061+
"""Last item of ``sizes`` is None when no items remain in
1062+
``iterable``. Last list returned is empty."""
1063+
iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9]
1064+
sizes = [2, 3, 4, None]
1065+
expected = [[1, 2], [3, 4, 5], [6, 7, 8, 9], []]
1066+
actual = list(mi.split_into(iterable, sizes))
1067+
self.assertEqual(actual, expected)
1068+
1069+
def test_using_none_mid_sizes(self):
1070+
"""None is present in ``sizes`` but is not the last item. Last list
1071+
returned stretches to fit all remaining items of ``iterable`` but
1072+
all items in ``sizes`` after None are ignored."""
1073+
iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9]
1074+
sizes = [2, 3, None, 4]
1075+
expected = [[1, 2], [3, 4, 5], [6, 7, 8, 9]]
1076+
actual = list(mi.split_into(iterable, sizes))
1077+
self.assertEqual(actual, expected)
1078+
1079+
def test_iterable_empty(self):
1080+
"""``iterable`` argument is empty but ``sizes`` is not. An empty
1081+
list is returned for each item in ``sizes``."""
1082+
iterable = []
1083+
sizes = [2, 4, 2]
1084+
expected = [[], [], []]
1085+
actual = list(mi.split_into(iterable, sizes))
1086+
self.assertEqual(actual, expected)
1087+
1088+
def test_iterable_empty_using_none(self):
1089+
"""``iterable`` argument is empty but ``sizes`` is not. An empty
1090+
list is returned for each item in ``sizes`` that is not after a
1091+
None item."""
1092+
iterable = []
1093+
sizes = [2, 4, None, 2]
1094+
expected = [[], [], []]
1095+
actual = list(mi.split_into(iterable, sizes))
1096+
self.assertEqual(actual, expected)
1097+
1098+
def test_sizes_empty(self):
1099+
"""``sizes`` argument is empty but ``iterable`` is not. An empty
1100+
generator is returned."""
1101+
iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9]
1102+
sizes = []
1103+
expected = []
1104+
actual = list(mi.split_into(iterable, sizes))
1105+
self.assertEqual(actual, expected)
1106+
1107+
def test_both_empty(self):
1108+
"""Both ``sizes`` and ``iterable`` arguments are empty. An empty
1109+
generator is returned."""
1110+
iterable = []
1111+
sizes = []
1112+
expected = []
1113+
actual = list(mi.split_into(iterable, sizes))
1114+
self.assertEqual(actual, expected)
1115+
1116+
def test_bool_in_sizes(self):
1117+
"""A bool object is present in ``sizes`` is treated as a 1 or 0 for
1118+
``True`` or ``False`` due to bool being an instance of int."""
1119+
iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9]
1120+
sizes = [3, True, 2, False]
1121+
expected = [[1, 2, 3], [4], [5, 6], []]
1122+
actual = list(mi.split_into(iterable, sizes))
1123+
self.assertEqual(actual, expected)
1124+
1125+
def test_invalid_in_sizes(self):
1126+
"""A ValueError is raised if an object in ``sizes`` is neither ``None``
1127+
or an integer."""
1128+
iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9]
1129+
sizes = [1, [], 3]
1130+
with self.assertRaises(ValueError):
1131+
list(mi.split_into(iterable, sizes))
1132+
1133+
def test_invalid_in_sizes_after_none(self):
1134+
"""A item in ``sizes`` that is invalid will not raise a TypeError if it
1135+
comes after a ``None`` item."""
1136+
iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9]
1137+
sizes = [3, 4, None, []]
1138+
expected = [[1, 2, 3], [4, 5, 6, 7], [8, 9]]
1139+
actual = list(mi.split_into(iterable, sizes))
1140+
self.assertEqual(actual, expected)
1141+
1142+
def test_generator_iterable_integrity(self):
1143+
"""Check that if ``iterable`` is an iterator, it is consumed only by as
1144+
many items as the sum of ``sizes``."""
1145+
iterable = (i for i in range(10))
1146+
sizes = [2, 3]
1147+
1148+
expected = [[0, 1], [2, 3, 4]]
1149+
actual = list(mi.split_into(iterable, sizes))
1150+
self.assertEqual(actual, expected)
1151+
1152+
iterable_expected = [5, 6, 7, 8, 9]
1153+
iterable_actual = list(iterable)
1154+
self.assertEqual(iterable_actual, iterable_expected)
1155+
1156+
def test_generator_sizes_integrity(self):
1157+
"""Check that if ``sizes`` is an iterator, it is consumed only until a
1158+
``None`` item is reached"""
1159+
iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9]
1160+
sizes = (i for i in [1, 2, None, 3, 4])
1161+
1162+
expected = [[1], [2, 3], [4, 5, 6, 7, 8, 9]]
1163+
actual = list(mi.split_into(iterable, sizes))
1164+
self.assertEqual(actual, expected)
1165+
1166+
sizes_expected = [3, 4]
1167+
sizes_actual = list(sizes)
1168+
self.assertEqual(sizes_actual, sizes_expected)
1169+
1170+
10121171
class PaddedTest(TestCase):
10131172
"""Tests for ``padded()``"""
10141173

0 commit comments

Comments
 (0)