changeset: 94993:e058423d3ca4 user: Berker Peksag date: Sun Mar 15 01:18:47 2015 +0200 files: Doc/library/difflib.rst Doc/whatsnew/3.5.rst Lib/difflib.py Lib/test/test_difflib.py Lib/test/test_difflib_expect.html Misc/NEWS description: Issue #2052: Add charset parameter to HtmlDiff.make_file(). diff -r a03cc14db96a -r e058423d3ca4 Doc/library/difflib.rst --- a/Doc/library/difflib.rst Sat Mar 14 21:34:25 2015 +0200 +++ b/Doc/library/difflib.rst Sun Mar 15 01:18:47 2015 +0200 @@ -104,7 +104,8 @@ The following methods are public: - .. method:: make_file(fromlines, tolines, fromdesc='', todesc='', context=False, numlines=5) + .. method:: make_file(fromlines, tolines, fromdesc='', todesc='', context=False, \ + numlines=5, *, charset='utf-8') Compares *fromlines* and *tolines* (lists of strings) and returns a string which is a complete HTML file containing a table showing line by line differences with @@ -123,6 +124,10 @@ the next difference highlight at the top of the browser without any leading context). + .. versionchanged:: 3.5 + *charset* keyword-only argument was added. The default charset of + HTML document changed from ``'ISO-8859-1'`` to ``'utf-8'``. + .. method:: make_table(fromlines, tolines, fromdesc='', todesc='', context=False, numlines=5) Compares *fromlines* and *tolines* (lists of strings) and returns a string which diff -r a03cc14db96a -r e058423d3ca4 Doc/whatsnew/3.5.rst --- a/Doc/whatsnew/3.5.rst Sat Mar 14 21:34:25 2015 +0200 +++ b/Doc/whatsnew/3.5.rst Sun Mar 15 01:18:47 2015 +0200 @@ -225,6 +225,14 @@ don't provide any options to redirect it. (Contributed by Berker Peksag in :issue:`22389`.) +difflib +------- + +* The charset of the HTML document generated by :meth:`difflib.HtmlDiff.make_file` + can now be customized by using *charset* keyword-only parameter. The default + charset of HTML document changed from ``'ISO-8859-1'`` to ``'utf-8'``. + (Contributed by Berker Peksag in :issue:`2052`.) + distutils --------- diff -r a03cc14db96a -r e058423d3ca4 Lib/difflib.py --- a/Lib/difflib.py Sat Mar 14 21:34:25 2015 +0200 +++ b/Lib/difflib.py Sun Mar 15 01:18:47 2015 +0200 @@ -1598,7 +1598,7 @@ + content="text/html; charset=%(charset)s" /> @@ -1685,8 +1685,8 @@ self._linejunk = linejunk self._charjunk = charjunk - def make_file(self,fromlines,tolines,fromdesc='',todesc='',context=False, - numlines=5): + def make_file(self, fromlines, tolines, fromdesc='', todesc='', + context=False, numlines=5, *, charset='utf-8'): """Returns HTML file of side by side comparison with change highlights Arguments: @@ -1701,13 +1701,16 @@ When context is False, controls the number of lines to place the "next" link anchors before the next change (so click of "next" link jumps to just before the change). + charset -- charset of the HTML document """ - return self._file_template % dict( - styles = self._styles, - legend = self._legend, - table = self.make_table(fromlines,tolines,fromdesc,todesc, - context=context,numlines=numlines)) + return (self._file_template % dict( + styles=self._styles, + legend=self._legend, + table=self.make_table(fromlines, tolines, fromdesc, todesc, + context=context, numlines=numlines), + charset=charset + )).encode(charset, 'xmlcharrefreplace').decode(charset) def _tab_newline_replace(self,fromlines,tolines): """Returns from/to line lists with tabs expanded and newlines removed. diff -r a03cc14db96a -r e058423d3ca4 Lib/test/test_difflib.py --- a/Lib/test/test_difflib.py Sat Mar 14 21:34:25 2015 +0200 +++ b/Lib/test/test_difflib.py Sun Mar 15 01:18:47 2015 +0200 @@ -107,6 +107,20 @@ 5. Flat is better than nested. """ +patch914575_nonascii_from1 = """ + 1. Beautiful is beTTer than ugly. + 2. Explicit is better than ımplıcıt. + 3. Simple is better than complex. + 4. Complex is better than complicated. +""" + +patch914575_nonascii_to1 = """ + 1. Beautiful is better than ügly. + 3. Sımple is better than complex. + 4. Complicated is better than cömplex. + 5. Flat is better than nested. +""" + patch914575_from2 = """ \t\tLine 1: preceeded by from:[tt] to:[ssss] \t\tLine 2: preceeded by from:[sstt] to:[sssst] @@ -223,6 +237,27 @@ new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)] difflib.SequenceMatcher(None, old, new).get_opcodes() + def test_make_file_default_charset(self): + html_diff = difflib.HtmlDiff() + output = html_diff.make_file(patch914575_from1.splitlines(), + patch914575_to1.splitlines()) + self.assertIn('content="text/html; charset=utf-8"', output) + + def test_make_file_iso88591_charset(self): + html_diff = difflib.HtmlDiff() + output = html_diff.make_file(patch914575_from1.splitlines(), + patch914575_to1.splitlines(), + charset='iso-8859-1') + self.assertIn('content="text/html; charset=iso-8859-1"', output) + + def test_make_file_usascii_charset_with_nonascii_input(self): + html_diff = difflib.HtmlDiff() + output = html_diff.make_file(patch914575_nonascii_from1.splitlines(), + patch914575_nonascii_to1.splitlines(), + charset='us-ascii') + self.assertIn('content="text/html; charset=us-ascii"', output) + self.assertIn('ımplıcıt', output) + class TestOutputFormat(unittest.TestCase): def test_tab_delimiter(self): diff -r a03cc14db96a -r e058423d3ca4 Lib/test/test_difflib_expect.html --- a/Lib/test/test_difflib_expect.html Sat Mar 14 21:34:25 2015 +0200 +++ b/Lib/test/test_difflib_expect.html Sun Mar 15 01:18:47 2015 +0200 @@ -6,7 +6,7 @@ + content="text/html; charset=utf-8" />