Skip to content

Commit 2e3f570

Browse files
bpo-30416: Protect the optimizer during constant folding. (#4860)
It no longer spends much time doing complex calculations and no longer consumes much memory for creating large constants that will be dropped later. This fixes also bpo-21074.
1 parent a5552f0 commit 2e3f570

File tree

4 files changed

+144
-27
lines changed

4 files changed

+144
-27
lines changed

‎Lib/test/test_memoryio.py‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,8 @@ def test_sizeof(self):
759759
check = self.check_sizeof
760760
self.assertEqual(object.__sizeof__(io.BytesIO()), basesize)
761761
check(io.BytesIO(), basesize )
762-
check(io.BytesIO(b'a' * 1000), basesize + sys.getsizeof(b'a' * 1000))
762+
n = 1000 # use a variable to prevent constant folding
763+
check(io.BytesIO(b'a' * n), basesize + sys.getsizeof(b'a' * n))
763764

764765
# Various tests of copy-on-write behaviour for BytesIO.
765766

‎Lib/test/test_peepholer.py‎

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,15 @@ def test_folding_of_binops_on_constants(self):
175175
self.assertInBytecode(code, 'LOAD_CONST', 'b')
176176

177177
# Verify that large sequences do not result from folding
178-
code = compile('a="x"*1000', '', 'single')
178+
code = compile('a="x"*10000', '', 'single')
179+
self.assertInBytecode(code, 'LOAD_CONST', 10000)
180+
self.assertNotIn("x"*10000, code.co_consts)
181+
code = compile('a=1<<1000', '', 'single')
179182
self.assertInBytecode(code, 'LOAD_CONST', 1000)
183+
self.assertNotIn(1<<1000, code.co_consts)
184+
code = compile('a=2**1000', '', 'single')
185+
self.assertInBytecode(code, 'LOAD_CONST', 1000)
186+
self.assertNotIn(2**1000, code.co_consts)
180187

181188
def test_binary_subscr_on_unicode(self):
182189
# valid code get optimized
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
The optimizer is now protected from spending much time doing complex
2+
calculations and consuming much memory for creating large constants in
3+
constant folding. Increased limits for constants that can be produced in
4+
constant folding.

‎Python/ast_opt.c‎

Lines changed: 130 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,132 @@ fold_unaryop(expr_ty node, PyArena *arena)
125125
return make_const(node, newval, arena);
126126
}
127127

128+
/* Check whether a collection doesn't containing too much items (including
129+
subcollections). This protects from creating a constant that needs
130+
too much time for calculating a hash.
131+
"limit" is the maximal number of items.
132+
Returns the negative number if the total number of items exceeds the
133+
limit. Otherwise returns the limit minus the total number of items.
134+
*/
135+
136+
static Py_ssize_t
137+
check_complexity(PyObject *obj, Py_ssize_t limit)
138+
{
139+
if (PyTuple_Check(obj)) {
140+
Py_ssize_t i;
141+
limit -= PyTuple_GET_SIZE(obj);
142+
for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
143+
limit = check_complexity(PyTuple_GET_ITEM(obj, i), limit);
144+
}
145+
return limit;
146+
}
147+
else if (PyFrozenSet_Check(obj)) {
148+
Py_ssize_t i = 0;
149+
PyObject *item;
150+
Py_hash_t hash;
151+
limit -= PySet_GET_SIZE(obj);
152+
while (limit >= 0 && _PySet_NextEntry(obj, &i, &item, &hash)) {
153+
limit = check_complexity(item, limit);
154+
}
155+
}
156+
return limit;
157+
}
158+
159+
#define MAX_INT_SIZE 128 /* bits */
160+
#define MAX_COLLECTION_SIZE 256 /* items */
161+
#define MAX_STR_SIZE 4096 /* characters */
162+
#define MAX_TOTAL_ITEMS 1024 /* including nested collections */
163+
164+
static PyObject *
165+
safe_multiply(PyObject *v, PyObject *w)
166+
{
167+
if (PyLong_Check(v) && PyLong_Check(w) && Py_SIZE(v) && Py_SIZE(w)) {
168+
size_t vbits = _PyLong_NumBits(v);
169+
size_t wbits = _PyLong_NumBits(w);
170+
if (vbits == (size_t)-1 || wbits == (size_t)-1) {
171+
return NULL;
172+
}
173+
if (vbits + wbits > MAX_INT_SIZE) {
174+
return NULL;
175+
}
176+
}
177+
else if (PyLong_Check(v) && (PyTuple_Check(w) || PyFrozenSet_Check(w))) {
178+
Py_ssize_t size = PyTuple_Check(w) ? PyTuple_GET_SIZE(w) :
179+
PySet_GET_SIZE(w);
180+
if (size) {
181+
long n = PyLong_AsLong(v);
182+
if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
183+
return NULL;
184+
}
185+
if (n && check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
186+
return NULL;
187+
}
188+
}
189+
}
190+
else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
191+
Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
192+
PyBytes_GET_SIZE(w);
193+
if (size) {
194+
long n = PyLong_AsLong(v);
195+
if (n < 0 || n > MAX_STR_SIZE / size) {
196+
return NULL;
197+
}
198+
}
199+
}
200+
else if (PyLong_Check(w) &&
201+
(PyTuple_Check(v) || PyFrozenSet_Check(v) ||
202+
PyUnicode_Check(v) || PyBytes_Check(v)))
203+
{
204+
return safe_multiply(w, v);
205+
}
206+
207+
return PyNumber_Multiply(v, w);
208+
}
209+
210+
static PyObject *
211+
safe_power(PyObject *v, PyObject *w)
212+
{
213+
if (PyLong_Check(v) && PyLong_Check(w) && Py_SIZE(v) && Py_SIZE(w) > 0) {
214+
size_t vbits = _PyLong_NumBits(v);
215+
size_t wbits = PyLong_AsSize_t(w);
216+
if (vbits == (size_t)-1 || wbits == (size_t)-1) {
217+
return NULL;
218+
}
219+
if (vbits > MAX_INT_SIZE / wbits) {
220+
return NULL;
221+
}
222+
}
223+
224+
return PyNumber_Power(v, w, Py_None);
225+
}
226+
227+
static PyObject *
228+
safe_lshift(PyObject *v, PyObject *w)
229+
{
230+
if (PyLong_Check(v) && PyLong_Check(w) && Py_SIZE(v) && Py_SIZE(w)) {
231+
size_t vbits = _PyLong_NumBits(v);
232+
size_t wbits = PyLong_AsSize_t(w);
233+
if (vbits == (size_t)-1 || wbits == (size_t)-1) {
234+
return NULL;
235+
}
236+
if (wbits > MAX_INT_SIZE || vbits > MAX_INT_SIZE - wbits) {
237+
return NULL;
238+
}
239+
}
240+
241+
return PyNumber_Lshift(v, w);
242+
}
243+
244+
static PyObject *
245+
safe_mod(PyObject *v, PyObject *w)
246+
{
247+
if (PyUnicode_Check(v) || PyBytes_Check(v)) {
248+
return NULL;
249+
}
250+
251+
return PyNumber_Remainder(v, w);
252+
}
253+
128254
static int
129255
fold_binop(expr_ty node, PyArena *arena)
130256
{
@@ -147,7 +273,7 @@ fold_binop(expr_ty node, PyArena *arena)
147273
newval = PyNumber_Subtract(lv, rv);
148274
break;
149275
case Mult:
150-
newval = PyNumber_Multiply(lv, rv);
276+
newval = safe_multiply(lv, rv);
151277
break;
152278
case Div:
153279
newval = PyNumber_TrueDivide(lv, rv);
@@ -156,13 +282,13 @@ fold_binop(expr_ty node, PyArena *arena)
156282
newval = PyNumber_FloorDivide(lv, rv);
157283
break;
158284
case Mod:
159-
newval = PyNumber_Remainder(lv, rv);
285+
newval = safe_mod(lv, rv);
160286
break;
161287
case Pow:
162-
newval = PyNumber_Power(lv, rv, Py_None);
288+
newval = safe_power(lv, rv);
163289
break;
164290
case LShift:
165-
newval = PyNumber_Lshift(lv, rv);
291+
newval = safe_lshift(lv, rv);
166292
break;
167293
case RShift:
168294
newval = PyNumber_Rshift(lv, rv);
@@ -180,27 +306,6 @@ fold_binop(expr_ty node, PyArena *arena)
180306
return 1;
181307
}
182308

183-
if (newval == NULL) {
184-
if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
185-
return 0;
186-
}
187-
PyErr_Clear();
188-
return 1;
189-
}
190-
191-
/* Avoid creating large constants. */
192-
Py_ssize_t size = PyObject_Size(newval);
193-
if (size == -1) {
194-
if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
195-
Py_DECREF(newval);
196-
return 0;
197-
}
198-
PyErr_Clear();
199-
}
200-
else if (size > 20) {
201-
Py_DECREF(newval);
202-
return 1;
203-
}
204309
return make_const(node, newval, arena);
205310
}
206311

0 commit comments

Comments
 (0)