changeset: 101167:8ab8f5259f09 parent: 101165:cb5645b36713 user: Serhiy Storchaka date: Wed Apr 27 23:13:46 2016 +0300 files: Doc/library/fileinput.rst Doc/whatsnew/3.6.rst Lib/fileinput.py Lib/test/test_fileinput.py Misc/ACKS Misc/NEWS description: Issue #25788: fileinput.hook_encoded() now supports an "errors" argument for passing to open. Original patch by Joseph Hackman. diff -r cb5645b36713 -r 8ab8f5259f09 Doc/library/fileinput.rst --- a/Doc/library/fileinput.rst Wed Apr 27 23:06:41 2016 +0300 +++ b/Doc/library/fileinput.rst Wed Apr 27 23:13:46 2016 +0300 @@ -193,10 +193,14 @@ Usage example: ``fi = fileinput.FileInput(openhook=fileinput.hook_compressed)`` -.. function:: hook_encoded(encoding) +.. function:: hook_encoded(encoding, errors=None) Returns a hook which opens each file with :func:`open`, using the given - *encoding* to read the file. + *encoding* and *errors* to read the file. Usage example: ``fi = - fileinput.FileInput(openhook=fileinput.hook_encoded("iso-8859-1"))`` + fileinput.FileInput(openhook=fileinput.hook_encoded("utf-8", + "surrogateescape"))`` + + .. versionchanged:: 3.6 + Added the optional *errors* parameter. diff -r cb5645b36713 -r 8ab8f5259f09 Doc/whatsnew/3.6.rst --- a/Doc/whatsnew/3.6.rst Wed Apr 27 23:06:41 2016 +0300 +++ b/Doc/whatsnew/3.6.rst Wed Apr 27 23:13:46 2016 +0300 @@ -358,6 +358,13 @@ (Contributed by Aviv Palivoda in :issue:`26243`.) +fileinput +--------- + +:func:`~fileinput.hook_encoded` now supports the *errors* argument. +(Contributed by Joseph Hackman in :issue:`25788`.) + + Optimizations ============= diff -r cb5645b36713 -r 8ab8f5259f09 Lib/fileinput.py --- a/Lib/fileinput.py Wed Apr 27 23:06:41 2016 +0300 +++ b/Lib/fileinput.py Wed Apr 27 23:13:46 2016 +0300 @@ -400,9 +400,9 @@ return open(filename, mode) -def hook_encoded(encoding): +def hook_encoded(encoding, errors=None): def openhook(filename, mode): - return open(filename, mode, encoding=encoding) + return open(filename, mode, encoding=encoding, errors=errors) return openhook diff -r cb5645b36713 -r 8ab8f5259f09 Lib/test/test_fileinput.py --- a/Lib/test/test_fileinput.py Wed Apr 27 23:06:41 2016 +0300 +++ b/Lib/test/test_fileinput.py Wed Apr 27 23:13:46 2016 +0300 @@ -945,7 +945,8 @@ def test(self): encoding = object() - result = fileinput.hook_encoded(encoding) + errors = object() + result = fileinput.hook_encoded(encoding, errors=errors) fake_open = InvocationRecorder() original_open = builtins.open @@ -963,8 +964,26 @@ self.assertIs(args[0], filename) self.assertIs(args[1], mode) self.assertIs(kwargs.pop('encoding'), encoding) + self.assertIs(kwargs.pop('errors'), errors) self.assertFalse(kwargs) + def test_errors(self): + with open(TESTFN, 'wb') as f: + f.write(b'\x80abc') + self.addCleanup(safe_unlink, TESTFN) + + def check(errors, expected_lines): + with FileInput(files=TESTFN, mode='r', + openhook=hook_encoded('utf-8', errors=errors)) as fi: + lines = list(fi) + self.assertEqual(lines, expected_lines) + + check('ignore', ['abc']) + with self.assertRaises(UnicodeDecodeError): + check('strict', ['abc']) + check('replace', ['\ufffdabc']) + check('backslashreplace', ['\\x80abc']) + def test_modes(self): with open(TESTFN, 'wb') as f: # UTF-7 is a convenient, seldom used encoding diff -r cb5645b36713 -r 8ab8f5259f09 Misc/ACKS --- a/Misc/ACKS Wed Apr 27 23:06:41 2016 +0300 +++ b/Misc/ACKS Wed Apr 27 23:13:46 2016 +0300 @@ -538,6 +538,7 @@ Lars Gustäbel Thomas Güttler Jonas H. +Joseph Hackman Barry Haddow Philipp Hagemeister Paul ten Hagen diff -r cb5645b36713 -r 8ab8f5259f09 Misc/NEWS --- a/Misc/NEWS Wed Apr 27 23:06:41 2016 +0300 +++ b/Misc/NEWS Wed Apr 27 23:13:46 2016 +0300 @@ -256,6 +256,9 @@ Library ------- +- Issue #25788: fileinput.hook_encoded() now supports an "errors" argument + for passing to open. Original patch by Joseph Hackman. + - Issue #26634: recursive_repr() now sets __qualname__ of wrapper. Patch by Xiang Zhang.