changeset: 90899:f5521f5dec4a user: Raymond Hettinger date: Fri May 30 02:28:36 2014 -0700 files: Doc/glossary.rst Doc/library/heapq.rst Lib/heapq.py Lib/test/test_heapq.py Misc/NEWS description: Issue #13742: Add key and reverse parameters to heapq.merge() diff -r 89be1419472c -r f5521f5dec4a Doc/glossary.rst --- a/Doc/glossary.rst Thu May 29 23:42:47 2014 -0700 +++ b/Doc/glossary.rst Fri May 30 02:28:36 2014 -0700 @@ -444,12 +444,13 @@ A number of tools in Python accept key functions to control how elements are ordered or grouped. They include :func:`min`, :func:`max`, - :func:`sorted`, :meth:`list.sort`, :func:`heapq.nsmallest`, - :func:`heapq.nlargest`, and :func:`itertools.groupby`. + :func:`sorted`, :meth:`list.sort`, :func:`heapq.merge`, + :func:`heapq.nsmallest`, :func:`heapq.nlargest`, and + :func:`itertools.groupby`. There are several ways to create a key function. For example. the :meth:`str.lower` method can serve as a key function for case insensitive - sorts. Alternatively, an ad-hoc key function can be built from a + sorts. Alternatively, a key function can be built from a :keyword:`lambda` expression such as ``lambda r: (r[0], r[2])``. Also, the :mod:`operator` module provides three key function constructors: :func:`~operator.attrgetter`, :func:`~operator.itemgetter`, and diff -r 89be1419472c -r f5521f5dec4a Doc/library/heapq.rst --- a/Doc/library/heapq.rst Thu May 29 23:42:47 2014 -0700 +++ b/Doc/library/heapq.rst Fri May 30 02:28:36 2014 -0700 @@ -81,7 +81,7 @@ The module also offers three general purpose functions based on heaps. -.. function:: merge(*iterables) +.. function:: merge(*iterables, key=None, reverse=False) Merge multiple sorted inputs into a single sorted output (for example, merge timestamped entries from multiple log files). Returns an :term:`iterator` @@ -91,6 +91,18 @@ not pull the data into memory all at once, and assumes that each of the input streams is already sorted (smallest to largest). + Has two optional arguments which must be specified as keyword arguments. + + *key* specifies a :term:`key function` of one argument that is used to + extract a comparison key from each input element. The default value is + ``None`` (compare the elements directly). + + *reverse* is a boolean value. If set to ``True``, then the input elements + are merged as if each comparison were reversed. + + .. versionchanged:: 3.5 + Added the optional *key* and *reverse* parameters. + .. function:: nlargest(n, iterable, key=None) diff -r 89be1419472c -r f5521f5dec4a Lib/heapq.py --- a/Lib/heapq.py Thu May 29 23:42:47 2014 -0700 +++ b/Lib/heapq.py Fri May 30 02:28:36 2014 -0700 @@ -176,6 +176,16 @@ for i in reversed(range(n//2)): _siftup(x, i) +def _heappop_max(heap): + """Maxheap version of a heappop.""" + lastelt = heap.pop() # raises appropriate IndexError if heap is empty + if heap: + returnitem = heap[0] + heap[0] = lastelt + _siftup_max(heap, 0) + return returnitem + return lastelt + def _heapreplace_max(heap, item): """Maxheap version of a heappop followed by a heappush.""" returnitem = heap[0] # raises appropriate IndexError if heap is empty @@ -311,7 +321,7 @@ except ImportError: pass -def merge(*iterables): +def merge(*iterables, key=None, reverse=False): '''Merge multiple sorted inputs into a single sorted output. Similar to sorted(itertools.chain(*iterables)) but returns a generator, @@ -321,31 +331,73 @@ >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25])) [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25] + If *key* is not None, applies a key function to each element to determine + its sort order. + + >>> list(merge(['dog', 'horse'], ['cat', 'fish', 'kangaroo'], key=len)) + ['dog', 'cat', 'fish', 'horse', 'kangaroo'] + ''' h = [] h_append = h.append + + if reverse: + _heapify = _heapify_max + _heappop = _heappop_max + _heapreplace = _heapreplace_max + direction = -1 + else: + _heapify = heapify + _heappop = heappop + _heapreplace = heapreplace + direction = 1 + + if key is None: + for order, it in enumerate(map(iter, iterables)): + try: + next = it.__next__ + h_append([next(), order * direction, next]) + except StopIteration: + pass + _heapify(h) + while len(h) > 1: + try: + while True: + value, order, next = s = h[0] + yield value + s[0] = next() # raises StopIteration when exhausted + _heapreplace(h, s) # restore heap condition + except StopIteration: + _heappop(h) # remove empty iterator + if h: + # fast case when only a single iterator remains + value, order, next = h[0] + yield value + yield from next.__self__ + return + for order, it in enumerate(map(iter, iterables)): try: next = it.__next__ - h_append([next(), order, next]) + value = next() + h_append([key(value), order * direction, value, next]) except StopIteration: pass - heapify(h) - - _heapreplace = heapreplace + _heapify(h) while len(h) > 1: try: while True: - value, order, next = s = h[0] + key_value, order, value, next = s = h[0] yield value - s[0] = next() # raises StopIteration when exhausted - _heapreplace(h, s) # restore heap condition + value = next() + s[0] = key(value) + s[2] = value + _heapreplace(h, s) except StopIteration: - heappop(h) # remove empty iterator + _heappop(h) if h: - # fast case when only a single iterator remains - value, order, next = h[0] + key_value, order, value, next = h[0] yield value yield from next.__self__ diff -r 89be1419472c -r f5521f5dec4a Lib/test/test_heapq.py --- a/Lib/test/test_heapq.py Thu May 29 23:42:47 2014 -0700 +++ b/Lib/test/test_heapq.py Fri May 30 02:28:36 2014 -0700 @@ -6,6 +6,7 @@ from test import support from unittest import TestCase, skipUnless +from operator import itemgetter py_heapq = support.import_fresh_module('heapq', blocked=['_heapq']) c_heapq = support.import_fresh_module('heapq', fresh=['_heapq']) @@ -152,11 +153,21 @@ def test_merge(self): inputs = [] - for i in range(random.randrange(5)): - row = sorted(random.randrange(1000) for j in range(random.randrange(10))) + for i in range(random.randrange(25)): + row = [] + for j in range(random.randrange(100)): + tup = random.choice('ABC'), random.randrange(-500, 500) + row.append(tup) inputs.append(row) - self.assertEqual(sorted(chain(*inputs)), list(self.module.merge(*inputs))) - self.assertEqual(list(self.module.merge()), []) + + for key in [None, itemgetter(0), itemgetter(1), itemgetter(1, 0)]: + for reverse in [False, True]: + seqs = [] + for seq in inputs: + seqs.append(sorted(seq, key=key, reverse=reverse)) + self.assertEqual(sorted(chain(*inputs), key=key, reverse=reverse), + list(self.module.merge(*seqs, key=key, reverse=reverse))) + self.assertEqual(list(self.module.merge()), []) def test_merge_does_not_suppress_index_error(self): # Issue 19018: Heapq.merge suppresses IndexError from user generator diff -r 89be1419472c -r f5521f5dec4a Misc/NEWS --- a/Misc/NEWS Thu May 29 23:42:47 2014 -0700 +++ b/Misc/NEWS Fri May 30 02:28:36 2014 -0700 @@ -94,6 +94,9 @@ error bubble up as this "bad data" appears in many real world zip files in the wild and is ignored by other zip tools. +- Issue #13742: Added "key" and "reverse" parameters to heapq.merge(). + (First draft of patch contributed by Simon Sapin.) + - Issue #21402: tkinter.ttk now works when default root window is not set. - Issue #3015: _tkinter.create() now creates tkapp object with wantobject=1 by