changeset: 100625:e86cd4a872b8 parent: 100622:23a7481eafd4 parent: 100623:1c44cea2ea8f user: Serhiy Storchaka date: Sun Mar 20 23:47:48 2016 +0200 files: Lib/tokenize.py Misc/NEWS Parser/tokenizer.c description: Issue #26581: Use the first coding cookie on a line, not the last one. diff -r 23a7481eafd4 -r e86cd4a872b8 Lib/idlelib/IOBinding.py --- a/Lib/idlelib/IOBinding.py Sun Mar 20 23:12:00 2016 +0200 +++ b/Lib/idlelib/IOBinding.py Sun Mar 20 23:47:48 2016 +0200 @@ -62,7 +62,7 @@ encoding = locale_encoding ### KBK 07Sep07 This is used all over IDLE, check! ### 'encoding' is used below in encode(), check! -coding_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII) +coding_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) def coding_spec(data): diff -r 23a7481eafd4 -r e86cd4a872b8 Lib/lib2to3/pgen2/tokenize.py --- a/Lib/lib2to3/pgen2/tokenize.py Sun Mar 20 23:12:00 2016 +0200 +++ b/Lib/lib2to3/pgen2/tokenize.py Sun Mar 20 23:47:48 2016 +0200 @@ -236,7 +236,7 @@ startline = False toks_append(tokval) -cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII) +cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) def _get_normal_name(orig_enc): diff -r 23a7481eafd4 -r e86cd4a872b8 Lib/test/test_importlib/source/test_source_encoding.py --- a/Lib/test/test_importlib/source/test_source_encoding.py Sun Mar 20 23:12:00 2016 +0200 +++ b/Lib/test/test_importlib/source/test_source_encoding.py Sun Mar 20 23:47:48 2016 +0200 @@ -14,7 +14,7 @@ import warnings -CODING_RE = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII) +CODING_RE = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) class EncodingTest: diff -r 23a7481eafd4 -r e86cd4a872b8 Lib/test/test_source_encoding.py --- a/Lib/test/test_source_encoding.py Sun Mar 20 23:12:00 2016 +0200 +++ b/Lib/test/test_source_encoding.py Sun Mar 20 23:47:48 2016 +0200 @@ -178,7 +178,7 @@ def test_double_coding_same_line(self): src = (b'#coding:iso8859-15 coding:latin1\n' b'print(ascii("\xc3\xa4"))\n') - self.check_script_output(src, br"'\xc3\xa4'") + self.check_script_output(src, br"'\xc3\u20ac'") def test_first_non_utf8_coding_line(self): src = (b'#coding:iso-8859-15 \xa4\n' diff -r 23a7481eafd4 -r e86cd4a872b8 Lib/tokenize.py --- a/Lib/tokenize.py Sun Mar 20 23:12:00 2016 +0200 +++ b/Lib/tokenize.py Sun Mar 20 23:47:48 2016 +0200 @@ -34,7 +34,7 @@ import sys from token import * -cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII) +cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) import token diff -r 23a7481eafd4 -r e86cd4a872b8 Misc/NEWS --- a/Misc/NEWS Sun Mar 20 23:12:00 2016 +0200 +++ b/Misc/NEWS Sun Mar 20 23:47:48 2016 +0200 @@ -10,6 +10,9 @@ Core and Builtins ----------------- +- Issue #26581: If coding cookie is specified multiple times on a line in + Python source code file, only the first one is taken to account. + - Issue #26563: Debug hooks on Python memory allocators now raise a fatal error if functions of the :c:func:`PyMem_Malloc` family are called without holding the GIL. diff -r 23a7481eafd4 -r e86cd4a872b8 Parser/tokenizer.c --- a/Parser/tokenizer.c Sun Mar 20 23:12:00 2016 +0200 +++ b/Parser/tokenizer.c Sun Mar 20 23:47:48 2016 +0200 @@ -275,6 +275,7 @@ return 0; } *spec = r; + break; } } } diff -r 23a7481eafd4 -r e86cd4a872b8 Tools/scripts/findnocoding.py --- a/Tools/scripts/findnocoding.py Sun Mar 20 23:12:00 2016 +0200 +++ b/Tools/scripts/findnocoding.py Sun Mar 20 23:47:48 2016 +0200 @@ -32,7 +32,7 @@ "no sophisticated Python source file search will be done.", file=sys.stderr) -decl_re = re.compile(rb'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)') +decl_re = re.compile(rb'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)') blank_re = re.compile(rb'^[ \t\f]*(?:[#\r\n]|$)') def get_declaration(line):