changeset: 96711:330e28b28334 branch: 3.4 parent: 96698:f0053d05ed6d parent: 96710:4856ae883041 user: Jason R. Coombs date: Sun Jun 28 10:23:11 2015 -0400 files: Misc/NEWS description: Issue #20387: Merge patch and test diff -r f0053d05ed6d -r 330e28b28334 Lib/test/test_tokenize.py --- a/Lib/test/test_tokenize.py Sun Jun 28 17:06:07 2015 +0300 +++ b/Lib/test/test_tokenize.py Sun Jun 28 10:23:11 2015 -0400 @@ -5,6 +5,8 @@ code, print out a table with tokens. The ENDMARKER is omitted for brevity. + >>> import glob + >>> dump_tokens("1 + 1") ENCODING 'utf-8' (0, 0) (0, 0) NUMBER '1' (1, 0) (1, 1) @@ -647,7 +649,7 @@ open as tokenize_open, Untokenizer) from io import BytesIO from unittest import TestCase, mock -import os, sys, glob +import os import token def dump_tokens(s): @@ -1227,6 +1229,22 @@ self.assertEqual(untokenize(iter(tokens)), b'Hello ') +class TestRoundtrip(TestCase): + def roundtrip(self, code): + if isinstance(code, str): + code = code.encode('utf-8') + return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8') + + def test_indentation_semantics_retained(self): + """ + Ensure that although whitespace might be mutated in a roundtrip, + the semantic meaning of the indentation remains consistent. + """ + code = "if False:\n\tx=3\n\tx=3\n" + codelines = self.roundtrip(code).split('\n') + self.assertEqual(codelines[1], codelines[2]) + + __test__ = {"doctests" : doctests, 'decistmt': decistmt} def test_main(): @@ -1237,6 +1255,7 @@ support.run_unittest(TestDetectEncoding) support.run_unittest(TestTokenize) support.run_unittest(UntokenizeTest) + support.run_unittest(TestRoundtrip) if __name__ == "__main__": test_main() diff -r f0053d05ed6d -r 330e28b28334 Lib/tokenize.py --- a/Lib/tokenize.py Sun Jun 28 17:06:07 2015 +0300 +++ b/Lib/tokenize.py Sun Jun 28 10:23:11 2015 -0400 @@ -244,6 +244,8 @@ def untokenize(self, iterable): it = iter(iterable) + indents = [] + startline = False for t in it: if len(t) == 2: self.compat(t, it) @@ -254,6 +256,21 @@ continue if tok_type == ENDMARKER: break + if tok_type == INDENT: + indents.append(token) + continue + elif tok_type == DEDENT: + indents.pop() + self.prev_row, self.prev_col = end + continue + elif tok_type in (NEWLINE, NL): + startline = True + elif startline and indents: + indent = indents[-1] + if start[1] >= len(indent): + self.tokens.append(indent) + self.prev_col = len(indent) + startline = False self.add_whitespace(start) self.tokens.append(token) self.prev_row, self.prev_col = end diff -r f0053d05ed6d -r 330e28b28334 Misc/NEWS --- a/Misc/NEWS Sun Jun 28 17:06:07 2015 +0300 +++ b/Misc/NEWS Sun Jun 28 10:23:11 2015 -0400 @@ -60,6 +60,9 @@ Library ------- +- Issue #20387: Restore semantic round-trip correctness in tokenize/untokenize + for tab-indented blocks. + - Issue #24336: The contextmanager decorator now works with functions with keyword arguments called "func" and "self". Patch by Martin Panter.