Is there a Pythonic equivalent to Ruby's #each_cons?
In Ruby you can do this:
array = [1,2,3,4]
array.each_cons(2).to_a
=> [[1,2],[2,3],[3,4]]
I don't think there is one, I looked through the built-in module itertools, which is where I would expect it to be. You can simply create one though:
def each_cons(xs, n):
return [xs[i:i+n] for i in range(len(xs)-n+1)]
2, but after modifying it for an arbitrary cons, it looked like yours.x is a collection, so better xs (naming is very important, even in examples. I'd even say it's more important in examples :)).For such things, itertools is the module you should be looking at:
from itertools import tee, izip
def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = tee(iterable)
next(b, None)
return izip(a, b)
Then:
>>> list(pairwise([1, 2, 3, 4]))
[(1, 2), (2, 3), (3, 4)]
For an even more general solution, consider this:
def split_subsequences(iterable, length=2, overlap=0):
it = iter(iterable)
results = list(itertools.islice(it, length))
while len(results) == length:
yield results
results = results[length - overlap:]
results.extend(itertools.islice(it, length - overlap))
if results:
yield results
This allows arbitrary lengths of subsequences and arbitrary overlapping. Usage:
>> list(split_subsequences([1, 2, 3, 4], length=2))
[[1, 2], [3, 4]]
>> list(split_subsequences([1, 2, 3, 4], length=2, overlap=1))
[[1, 2], [2, 3], [3, 4], [4]]
each_cons when you have a sequence with insufficient length (each_cons returns nil). The implementation in snipsnipsnip's answer seems more appropriated in this regard.list(split_subsequences([1, 2, 3, 4, 5, 6], length=3, overlap=1)) should return [[1,2,3],[2,3,4],[3,4,5],[4,5,6]] and not [[1, 2, 3], [3, 4, 5], [5, 6]].My solution for lists (Python2):
import itertools
def each_cons(xs, n):
return itertools.izip(*(xs[i:] for i in xrange(n)))
Edit: With Python 3 itertools.izip is no longer, so you use plain zip:
def each_cons(xs, n):
return zip(*(xs[i:] for i in range(n)))
xs[i:].islice(xs, i, None) instead of xs[i:]. I preferred latter for some reason: a) The question was about lists. b) I use each_cons for lists most of the time. c) In case xs is a list, sliced lists will have shared memory, so it may be memory efficient than doing it lazy.#each_cons works for everything, so I thought I should point it out. I've posted a lazy solution for those who need one.xrange(). It still failed with a plain generator, though. This little piece of code is very beautiful, thanks again for sharing.Python can surely do this. If you don't want to do it so eagerly, use itertool's islice and izip. Also, its important to remember that normal slices will create a copy so if memory usage is important you should also consider the itertool equivalents.
each_cons = lambda l: zip(l[:-1], l[1:])
UPDATE: Nevermind my answer below, just use toolz.itertoolz.sliding_window() -- it will do the right thing.
For a truly lazy implementation that preserves the behavior of Ruby's each_cons when the sequence/generator has insufficient length:
import itertools
def each_cons(sequence, n):
return itertools.izip(*(itertools.islice(g, i, None)
for i, g in
enumerate(itertools.tee(sequence, n))))
Examples:
>>> print(list(each_cons(xrange(5), 2)))
[(0, 1), (1, 2), (2, 3), (3, 4)]
>>> print(list(each_cons(xrange(5), 5)))
[(0, 1, 2, 3, 4)]
>>> print(list(each_cons(xrange(5), 6)))
[]
>>> print(list(each_cons((a for a in xrange(5)), 2)))
[(0, 1), (1, 2), (2, 3), (3, 4)]
Note that the tuple unpacking used on the arguments for izip is applied to a tuple of size n resulting of itertools.tee(xs, n) (that is, the "window size"), and not the sequence we want to iterate.
from itertools import islice, tee
def each_cons(sequence, n):
return zip(
*(
islice(g, i, None)
for i, g in
enumerate(tee(sequence, n))
)
)
$ ipython
...
In [2]: a_list = [1, 2, 3, 4, 5]
In [3]: list(each_cons(a_list, 2))
Out[3]: [(1, 2), (2, 3), (3, 4), (4, 5)]
In [4]: list(each_cons(a_list, 3))
Out[4]: [(1, 2, 3), (2, 3, 4), (3, 4, 5)]
In [5]: list(each_cons(a_list, 5))
Out[5]: [(1, 2, 3, 4, 5)]
In [6]: list(each_cons(a_list, 6))
Out[6]: []
Here's an implementation using collections.deque. This supports arbitrary generators as well
from collections import deque
def each_cons(it, n):
# convert it to an iterator
it = iter(it)
# insert first n items to a list first
deq = deque()
for _ in range(n):
try:
deq.append(next(it))
except StopIteration:
for _ in range(n - len(deq)):
deq.append(None)
yield tuple(deq)
return
yield tuple(deq)
# main loop
while True:
try:
val = next(it)
except StopIteration:
return
deq.popleft()
deq.append(val)
yield tuple(deq)
Usage:
list(each_cons([1,2,3,4], 2))
# => [(1, 2), (2, 3), (3, 4)]
# This supports generators
list(each_cons(range(5), 2))
# => [(0, 1), (1, 2), (2, 3), (3, 4)]
list(each_cons([1,2,3,4], 10))
# => [(1, 2, 3, 4, None, None, None, None, None, None)]
toolz.itertoolz.sliding_window().