Skip to content

Commit 372d705

Browse files
authored
bpo-33234 Improve list() pre-sizing for inputs with known lengths (GH-9846)
The list() constructor isn't taking full advantage of known input lengths or length hints. This commit makes the constructor pre-size and not over-allocate when the input size is known (the input collection implements __len__). One on the main advantages is that this provides 12% difference in memory savings due to the difference between overallocating and allocating exactly the input size. For efficiency purposes and to avoid a performance regression for small generators and collections, the size of the input object is calculated using __len__ and not __length_hint__, as the later is considerably slower.
1 parent 569d12f commit 372d705

File tree

3 files changed

+51
-0
lines changed

3 files changed

+51
-0
lines changed

‎Lib/test/test_list.py‎

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import sys
22
from test import list_tests
3+
from test.support import cpython_only
34
import pickle
45
import unittest
56

@@ -157,5 +158,13 @@ class L(list): pass
157158
with self.assertRaises(TypeError):
158159
(3,) + L([1,2])
159160

161+
@cpython_only
162+
def test_preallocation(self):
163+
iterable = [0] * 10
164+
iter_size = sys.getsizeof(iterable)
165+
166+
self.assertEqual(iter_size, sys.getsizeof(list([0] * 10)))
167+
self.assertEqual(iter_size, sys.getsizeof(list(range(10))))
168+
160169
if __name__ == "__main__":
161170
unittest.main()
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
The list constructor will pre-size and not over-allocate when
2+
the input lenght is known.

‎Objects/listobject.c‎

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,33 @@ list_resize(PyListObject *self, Py_ssize_t newsize)
7676
return 0;
7777
}
7878

79+
static int
80+
list_preallocate_exact(PyListObject *self, Py_ssize_t size)
81+
{
82+
assert(self->ob_item == NULL);
83+
84+
PyObject **items;
85+
size_t allocated;
86+
87+
allocated = (size_t)size;
88+
if (allocated > (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) {
89+
PyErr_NoMemory();
90+
return -1;
91+
}
92+
93+
if (size == 0) {
94+
allocated = 0;
95+
}
96+
items = (PyObject **)PyMem_New(PyObject*, allocated);
97+
if (items == NULL) {
98+
PyErr_NoMemory();
99+
return -1;
100+
}
101+
self->ob_item = items;
102+
self->allocated = allocated;
103+
return 0;
104+
}
105+
79106
/* Debug statistic to compare allocations with reuse through the free list */
80107
#undef SHOW_ALLOC_COUNT
81108
#ifdef SHOW_ALLOC_COUNT
@@ -2683,6 +2710,19 @@ list___init___impl(PyListObject *self, PyObject *iterable)
26832710
(void)_list_clear(self);
26842711
}
26852712
if (iterable != NULL) {
2713+
if (_PyObject_HasLen(iterable)) {
2714+
Py_ssize_t iter_len = PyObject_Size(iterable);
2715+
if (iter_len == -1) {
2716+
if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2717+
return -1;
2718+
}
2719+
PyErr_Clear();
2720+
}
2721+
if (iter_len > 0 && self->ob_item == NULL
2722+
&& list_preallocate_exact(self, iter_len)) {
2723+
return -1;
2724+
}
2725+
}
26862726
PyObject *rv = list_extend(self, iterable);
26872727
if (rv == NULL)
26882728
return -1;

0 commit comments

Comments
 (0)