changeset:   100625:e86cd4a872b8
parent:      100622:23a7481eafd4
parent:      100623:1c44cea2ea8f
user:        Serhiy Storchaka <storchaka@gmail.com>
date:        Sun Mar 20 23:47:48 2016 +0200
files:       Lib/tokenize.py Misc/NEWS Parser/tokenizer.c
description:
Issue #26581: Use the first coding cookie on a line, not the last one.


diff -r 23a7481eafd4 -r e86cd4a872b8 Lib/idlelib/IOBinding.py
--- a/Lib/idlelib/IOBinding.py	Sun Mar 20 23:12:00 2016 +0200
+++ b/Lib/idlelib/IOBinding.py	Sun Mar 20 23:47:48 2016 +0200
@@ -62,7 +62,7 @@
 encoding = locale_encoding  ### KBK 07Sep07  This is used all over IDLE, check!
                             ### 'encoding' is used below in encode(), check!
 
-coding_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
+coding_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
 blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
 
 def coding_spec(data):
diff -r 23a7481eafd4 -r e86cd4a872b8 Lib/lib2to3/pgen2/tokenize.py
--- a/Lib/lib2to3/pgen2/tokenize.py	Sun Mar 20 23:12:00 2016 +0200
+++ b/Lib/lib2to3/pgen2/tokenize.py	Sun Mar 20 23:47:48 2016 +0200
@@ -236,7 +236,7 @@
                 startline = False
             toks_append(tokval)
 
-cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
+cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
 blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
 
 def _get_normal_name(orig_enc):
diff -r 23a7481eafd4 -r e86cd4a872b8 Lib/test/test_importlib/source/test_source_encoding.py
--- a/Lib/test/test_importlib/source/test_source_encoding.py	Sun Mar 20 23:12:00 2016 +0200
+++ b/Lib/test/test_importlib/source/test_source_encoding.py	Sun Mar 20 23:47:48 2016 +0200
@@ -14,7 +14,7 @@
 import warnings
 
 
-CODING_RE = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
+CODING_RE = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
 
 
 class EncodingTest:
diff -r 23a7481eafd4 -r e86cd4a872b8 Lib/test/test_source_encoding.py
--- a/Lib/test/test_source_encoding.py	Sun Mar 20 23:12:00 2016 +0200
+++ b/Lib/test/test_source_encoding.py	Sun Mar 20 23:47:48 2016 +0200
@@ -178,7 +178,7 @@
     def test_double_coding_same_line(self):
         src = (b'#coding:iso8859-15 coding:latin1\n'
                b'print(ascii("\xc3\xa4"))\n')
-        self.check_script_output(src, br"'\xc3\xa4'")
+        self.check_script_output(src, br"'\xc3\u20ac'")
 
     def test_first_non_utf8_coding_line(self):
         src = (b'#coding:iso-8859-15 \xa4\n'
diff -r 23a7481eafd4 -r e86cd4a872b8 Lib/tokenize.py
--- a/Lib/tokenize.py	Sun Mar 20 23:12:00 2016 +0200
+++ b/Lib/tokenize.py	Sun Mar 20 23:47:48 2016 +0200
@@ -34,7 +34,7 @@
 import sys
 from token import *
 
-cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
+cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
 blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
 
 import token
diff -r 23a7481eafd4 -r e86cd4a872b8 Misc/NEWS
--- a/Misc/NEWS	Sun Mar 20 23:12:00 2016 +0200
+++ b/Misc/NEWS	Sun Mar 20 23:47:48 2016 +0200
@@ -10,6 +10,9 @@
 Core and Builtins
 -----------------
 
+- Issue #26581: If coding cookie is specified multiple times on a line in
+  Python source code file, only the first one is taken to account.
+
 - Issue #26563: Debug hooks on Python memory allocators now raise a fatal
   error if functions of the :c:func:`PyMem_Malloc` family are called without
   holding the GIL.
diff -r 23a7481eafd4 -r e86cd4a872b8 Parser/tokenizer.c
--- a/Parser/tokenizer.c	Sun Mar 20 23:12:00 2016 +0200
+++ b/Parser/tokenizer.c	Sun Mar 20 23:47:48 2016 +0200
@@ -275,6 +275,7 @@
                         return 0;
                 }
                 *spec = r;
+                break;
             }
         }
     }
diff -r 23a7481eafd4 -r e86cd4a872b8 Tools/scripts/findnocoding.py
--- a/Tools/scripts/findnocoding.py	Sun Mar 20 23:12:00 2016 +0200
+++ b/Tools/scripts/findnocoding.py	Sun Mar 20 23:47:48 2016 +0200
@@ -32,7 +32,7 @@
                          "no sophisticated Python source file search will be done.", file=sys.stderr)
 
 
-decl_re = re.compile(rb'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)')
+decl_re = re.compile(rb'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)')
 blank_re = re.compile(rb'^[ \t\f]*(?:[#\r\n]|$)')
 
 def get_declaration(line):