From 356e46509334f2d028828ca2103666df5f1db38e Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Thu, 16 Aug 2012 14:00:54 -0700 Subject: [PATCH] Lots of post-merge cleanups. Refs #8. * Make the new and old routines peers, and import them both into __init__. * PEP 8 * Bump version to 2.1. * Add recipes to documentation. --- README.rst | 3 +- docs/api.rst | 16 +- docs/conf.py | 2 +- docs/index.rst | 3 +- docs/versions.rst | 4 + more_itertools/__init__.py | 202 +----------------- more_itertools/more.py | 197 +++++++++++++++++ more_itertools/recipes.py | 26 ++- .../tests/{tests.py => test_more.py} | 0 9 files changed, 241 insertions(+), 212 deletions(-) create mode 100644 more_itertools/more.py rename more_itertools/tests/{tests.py => test_more.py} (100%) diff --git a/README.rst b/README.rst index f8d45b15..dc2fe17e 100644 --- a/README.rst +++ b/README.rst @@ -9,7 +9,8 @@ recipes included in its docs do quite what I need. Here I've collected several routines I've reached for but not found. Since they are deceptively tricky to get right, I've wrapped them up into a library. -Enjoy! Any additions are welcome; just file a pull request. +We've also included implementations of the recipes from the itertools +documentation. Enjoy! Any additions are welcome; just file a pull request. Full Documentation diff --git a/docs/api.rst b/docs/api.rst index 44c2ba8d..6b8046e6 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -2,10 +2,24 @@ API Reference ============= -.. automodule:: more_itertools +Though these routines are listed as living under the ``more`` and ``recipes`` +submodules, you should just import them from ``more_itertools`` directly. + + +New Routines +============ + +.. automodule:: more_itertools.more .. autofunction:: chunked .. autofunction:: collate(*iterables, key=lambda a: a, reverse=False) .. autofunction:: consumer .. autofunction:: first(iterable[, default]) .. autoclass:: peekable + + +Itertools Recipes +================= + +.. automodule:: more_itertools.recipes + :members: diff --git a/docs/conf.py b/docs/conf.py index d6f610f9..e7e4a5bf 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -48,7 +48,7 @@ # built documents. # # The short X.Y version. -version = '2.0' +version = '2.1' # The full version, including alpha/beta/rc tags. release = version diff --git a/docs/index.rst b/docs/index.rst index fb232984..aa0bc223 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,7 +9,8 @@ recipes included in its docs do quite what I need. Here I've collected several routines I've reached for but not found. Since they are deceptively tricky to get right, I've wrapped them up into a library. -Enjoy! Any additions are welcome; just file a pull request. +We've also included implementations of the recipes from the itertools +documentation. Enjoy! Any additions are welcome; just file a pull request. Contents diff --git a/docs/versions.rst b/docs/versions.rst index 151ad11d..a12df809 100644 --- a/docs/versions.rst +++ b/docs/versions.rst @@ -2,6 +2,10 @@ Version History =============== +2.1 + * Added (tested!) implementations of the recipes from the itertools + documentation. (Thanks, Chris Lonnen!) + 2.0 * ``chunked`` now returns lists rather than tuples. After all, they're homogeneous. This slightly backward-incompatible change is the reason for diff --git a/more_itertools/__init__.py b/more_itertools/__init__.py index bf277469..5a3467fe 100644 --- a/more_itertools/__init__.py +++ b/more_itertools/__init__.py @@ -1,200 +1,2 @@ -from functools import partial, wraps -from itertools import izip_longest -from recipes import * - -__all__ = ['chunked', 'first', 'peekable', 'collate', 'consumer', 'take', - 'tabulate', 'consume', 'nth', 'quantify', 'padnone', 'ncycles', - 'dotproduct', 'flatten', 'repeatfunc', 'pairwise', 'grouper', - 'roundrobin', 'powerset', 'unique_everseen', 'unique_justseen', - 'iter_except', 'random_product', 'random_permutation', - 'random_combination', 'random_combination_with_replacement'] - - -_marker = object() - - -def chunked(iterable, n): - """Break an iterable into lists of a given length:: - - >>> list(chunked([1, 2, 3, 4, 5, 6, 7], 3)) - [[1, 2, 3], [4, 5, 6], [7]] - - If the length of ``iterable`` is not evenly divisible by ``n``, the last - returned list will be shorter. - - This is useful for splitting up a computation on a large number of keys - into batches, to be pickled and sent off to worker processes. One example - is operations on rows in MySQL, which does not implement server-side - cursors properly and would otherwise load the entire dataset into RAM on - the client. - - """ - # Doesn't seem to run into any number-of-args limits. - for group in (list(g) for g in izip_longest(*[iter(iterable)] * n, - fillvalue=_marker)): - if group[-1] is _marker: - # If this is the last group, shuck off the padding: - del group[group.index(_marker):] - yield group - - -def first(iterable, default=_marker): - """Return the first item of an iterable, ``default`` if there is none. - - >>> first(xrange(4)) - 0 - >>> first(xrange(0), 'some default') - 'some default' - - If ``default`` is not provided and there are no items in the iterable, - raise ``ValueError``. - - ``first()`` is useful when you have a generator of expensive-to-retrieve - values and want any arbitrary one. It is marginally shorter than - ``next(iter(...))`` but saves you an entire ``try``/``except`` when you - want to provide a fallback value. - - """ - try: - return next(iter(iterable)) - except StopIteration: - # I'm on the edge about raising ValueError instead of StopIteration. At - # the moment, ValueError wins, because the caller could conceivably - # want to do something different with flow control when I raise the - # exception, and it's weird to explicitly catch StopIteration. - if default is _marker: - raise ValueError('first() was called on an empty iterable, and no ' - 'default value was provided.') - return default - - -class peekable(object): - """Wrapper for an iterator to allow 1-item lookahead - - Call ``peek()`` on the result to get the value that will next pop out of - ``next()``, without advancing the iterator: - - >>> p = peekable(xrange(2)) - >>> p.peek() - 0 - >>> p.next() - 0 - >>> p.peek() - 1 - >>> p.next() - 1 - - Pass ``peek()`` a default value, and it will be returned in the case where - the iterator is exhausted: - - >>> p = peekable([]) - >>> p.peek('hi') - 'hi' - - If no default is provided, ``peek()`` raises ``StopIteration`` when there - are no items left. - - To test whether there are more items in the iterator, examine the - peekable's truth value. If it is truthy, there are more items. - - >>> assert peekable(xrange(1)) - >>> assert not peekable([]) - - """ - # Lowercase to blend in with itertools. The fact that it's a class is an - # implementation detail. - - def __init__(self, iterable): - self._it = iter(iterable) - - def __iter__(self): - return self - - def __nonzero__(self): - try: - self.peek() - except StopIteration: - return False - return True - - def peek(self, default=_marker): - """Return the item that will be next returned from ``next()``. - - Return ``default`` if there are no items left. If ``default`` is not - provided, raise ``StopIteration``. - - """ - if not hasattr(self, '_peek'): - try: - self._peek = self._it.next() - except StopIteration: - if default is _marker: - raise - return default - return self._peek - - def next(self): - ret = self.peek() - del self._peek - return ret - - -def collate(*iterables, **kwargs): - """Return an iterable sorted merge of the already-sorted items from each of - ``iterables``. - - >>> list(collate('ACDZ', 'AZ', 'JKL')) - ['A', 'A', 'C', 'D', 'J', 'K', 'L', 'Z', 'Z'] - - Works lazily, keeping only the next value from each iterable in memory. - - :arg key: A function that returns a comparison value for an item. Defaults - to the identity function. - :arg reverse: If ``reverse=True``, yield results in descending order - rather than ascending. ``iterables`` must also yield their elements in - descending order. - - If the elements of the passed-in iterables are out of order, you might get - unexpected results. - - """ - key = kwargs.pop('key', lambda a: a) - reverse = kwargs.pop('reverse', False) - - min_or_max = partial(max if reverse else min, key=lambda (a, b): a) - peekables = [peekable(it) for it in iterables] - peekables = [p for p in peekables if p] # Kill empties. - while peekables: - _, p = min_or_max((key(p.peek()), p) for p in peekables) - yield p.next() - peekables = [p for p in peekables if p] - - -def consumer(func): - """Decorator that automatically advances a PEP-342-style "reverse iterator" - to its first yield point so you don't have to call ``next()`` on it - manually. - - >>> @consumer - ... def tally(): - ... i = 0 - ... while True: - ... print 'Thing number %s is %s.' % (i, (yield)) - ... i += 1 - ... - >>> t = tally() - >>> t.send('red') - Thing number 0 is red. - >>> t.send('fish') - Thing number 1 is fish. - - Without the decorator, you would have to call ``t.next()`` before - ``t.send()`` could be used. - - """ - @wraps(func) - def wrapper(*args, **kwargs): - gen = func(*args, **kwargs) - gen.next() - return gen - return wrapper +from more_itertools.more import * +from more_itertools.recipes import * diff --git a/more_itertools/more.py b/more_itertools/more.py new file mode 100644 index 00000000..0dbaa295 --- /dev/null +++ b/more_itertools/more.py @@ -0,0 +1,197 @@ +from functools import partial, wraps +from itertools import izip_longest +from recipes import * + +__all__ = ['chunked', 'first', 'peekable', 'collate', 'consumer'] + + +_marker = object() + + +def chunked(iterable, n): + """Break an iterable into lists of a given length:: + + >>> list(chunked([1, 2, 3, 4, 5, 6, 7], 3)) + [[1, 2, 3], [4, 5, 6], [7]] + + If the length of ``iterable`` is not evenly divisible by ``n``, the last + returned list will be shorter. + + This is useful for splitting up a computation on a large number of keys + into batches, to be pickled and sent off to worker processes. One example + is operations on rows in MySQL, which does not implement server-side + cursors properly and would otherwise load the entire dataset into RAM on + the client. + + """ + # Doesn't seem to run into any number-of-args limits. + for group in (list(g) for g in izip_longest(*[iter(iterable)] * n, + fillvalue=_marker)): + if group[-1] is _marker: + # If this is the last group, shuck off the padding: + del group[group.index(_marker):] + yield group + + +def first(iterable, default=_marker): + """Return the first item of an iterable, ``default`` if there is none. + + >>> first(xrange(4)) + 0 + >>> first(xrange(0), 'some default') + 'some default' + + If ``default`` is not provided and there are no items in the iterable, + raise ``ValueError``. + + ``first()`` is useful when you have a generator of expensive-to-retrieve + values and want any arbitrary one. It is marginally shorter than + ``next(iter(...))`` but saves you an entire ``try``/``except`` when you + want to provide a fallback value. + + """ + try: + return next(iter(iterable)) + except StopIteration: + # I'm on the edge about raising ValueError instead of StopIteration. At + # the moment, ValueError wins, because the caller could conceivably + # want to do something different with flow control when I raise the + # exception, and it's weird to explicitly catch StopIteration. + if default is _marker: + raise ValueError('first() was called on an empty iterable, and no ' + 'default value was provided.') + return default + + +class peekable(object): + """Wrapper for an iterator to allow 1-item lookahead + + Call ``peek()`` on the result to get the value that will next pop out of + ``next()``, without advancing the iterator: + + >>> p = peekable(xrange(2)) + >>> p.peek() + 0 + >>> p.next() + 0 + >>> p.peek() + 1 + >>> p.next() + 1 + + Pass ``peek()`` a default value, and it will be returned in the case where + the iterator is exhausted: + + >>> p = peekable([]) + >>> p.peek('hi') + 'hi' + + If no default is provided, ``peek()`` raises ``StopIteration`` when there + are no items left. + + To test whether there are more items in the iterator, examine the + peekable's truth value. If it is truthy, there are more items. + + >>> assert peekable(xrange(1)) + >>> assert not peekable([]) + + """ + # Lowercase to blend in with itertools. The fact that it's a class is an + # implementation detail. + + def __init__(self, iterable): + self._it = iter(iterable) + + def __iter__(self): + return self + + def __nonzero__(self): + try: + self.peek() + except StopIteration: + return False + return True + + def peek(self, default=_marker): + """Return the item that will be next returned from ``next()``. + + Return ``default`` if there are no items left. If ``default`` is not + provided, raise ``StopIteration``. + + """ + if not hasattr(self, '_peek'): + try: + self._peek = self._it.next() + except StopIteration: + if default is _marker: + raise + return default + return self._peek + + def next(self): + ret = self.peek() + del self._peek + return ret + + +def collate(*iterables, **kwargs): + """Return a sorted merge of the items from each of several already-sorted + ``iterables``. + + >>> list(collate('ACDZ', 'AZ', 'JKL')) + ['A', 'A', 'C', 'D', 'J', 'K', 'L', 'Z', 'Z'] + + Works lazily, keeping only the next value from each iterable in memory. Use + ``collate()`` to, for example, perform a n-way mergesort of items that + don't fit in memory. + + :arg key: A function that returns a comparison value for an item. Defaults + to the identity function. + :arg reverse: If ``reverse=True``, yield results in descending order + rather than ascending. ``iterables`` must also yield their elements in + descending order. + + If the elements of the passed-in iterables are out of order, you might get + unexpected results. + + """ + key = kwargs.pop('key', lambda a: a) + reverse = kwargs.pop('reverse', False) + + min_or_max = partial(max if reverse else min, key=lambda (a, b): a) + peekables = [peekable(it) for it in iterables] + peekables = [p for p in peekables if p] # Kill empties. + while peekables: + _, p = min_or_max((key(p.peek()), p) for p in peekables) + yield p.next() + peekables = [p for p in peekables if p] + + +def consumer(func): + """Decorator that automatically advances a PEP-342-style "reverse iterator" + to its first yield point so you don't have to call ``next()`` on it + manually. + + >>> @consumer + ... def tally(): + ... i = 0 + ... while True: + ... print 'Thing number %s is %s.' % (i, (yield)) + ... i += 1 + ... + >>> t = tally() + >>> t.send('red') + Thing number 0 is red. + >>> t.send('fish') + Thing number 1 is fish. + + Without the decorator, you would have to call ``t.next()`` before + ``t.send()`` could be used. + + """ + @wraps(func) + def wrapper(*args, **kwargs): + gen = func(*args, **kwargs) + gen.next() + return gen + return wrapper diff --git a/more_itertools/recipes.py b/more_itertools/recipes.py index 6358cce4..991b4101 100644 --- a/more_itertools/recipes.py +++ b/more_itertools/recipes.py @@ -1,16 +1,25 @@ -import operator +"""Imported from the recipes section of the itertools documentation. + +All functions taken from the recipes section of the itertools library docs +[1]_. +Some backward-compatible usability improvements have been made. + +.. [1] http://docs.python.org/library/itertools.html#recipes + +""" from collections import deque -from itertools import chain, count, combinations, cycle, groupby, imap, ifilterfalse, islice, izip, izip_longest, repeat, starmap, tee +from itertools import chain, count, combinations, cycle, groupby, imap, ifilterfalse, islice, izip, izip_longest, repeat, starmap, tee # Wrapping breaks 2to3. +import operator from random import randrange, sample, choice -''' -Imported from the recipes section of the itertools documentation. -All functions taken from the recipes section of the itertools library docs[1]. -Some backwards compatable usability improvements have been made. +__all__ = ['take', 'tabulate', 'consume', 'nth', 'quantify', 'padnone', + 'ncycles', 'dotproduct', 'flatten', 'repeatfunc', 'pairwise', + 'grouper', 'roundrobin', 'powerset', 'unique_everseen', + 'unique_justseen', 'iter_except', 'random_product', + 'random_permutation', 'random_combination', + 'random_combination_with_replacement'] -[1] http://docs.python.org/library/itertools.html#recipes -''' def take(n, iterable): """Return first n items of the iterable as a list @@ -22,6 +31,7 @@ def take(n, iterable): Effectively a short replacement for ``next`` based iterator consumption when you want more than one item, but less than the whole iterator. + """ return list(islice(iterable, n)) diff --git a/more_itertools/tests/tests.py b/more_itertools/tests/test_more.py similarity index 100% rename from more_itertools/tests/tests.py rename to more_itertools/tests/test_more.py