changeset: 95402:3ac1b21fbb42 branch: 2.7 parent: 95398:84d49ad9109b user: Serhiy Storchaka date: Thu Apr 02 23:05:23 2015 +0300 files: Lib/test/test_sax.py description: Issue #10590: Added tests for xml.sax.parse() and xml.sax.parseString(). diff -r 84d49ad9109b -r 3ac1b21fbb42 Lib/test/test_sax.py --- a/Lib/test/test_sax.py Thu Apr 02 20:55:46 2015 +0300 +++ b/Lib/test/test_sax.py Thu Apr 02 23:05:23 2015 +0300 @@ -15,10 +15,11 @@ from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl from cStringIO import StringIO import io +import gc import os.path import shutil import test.test_support as support -from test.test_support import findfile, run_unittest +from test.test_support import findfile, run_unittest, TESTFN import unittest TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") @@ -90,6 +91,111 @@ self.assertEqual(attrs["attr"], "val") self.assertEqual(attrs.getQNameByName("attr"), "attr") + +def xml_unicode(doc, encoding=None): + if encoding is None: + return doc + return u'\n%s' % (encoding, doc) + +def xml_bytes(doc, encoding, decl_encoding=Ellipsis): + if decl_encoding is Ellipsis: + decl_encoding = encoding + return xml_unicode(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace') + +def make_xml_file(doc, encoding, decl_encoding=Ellipsis): + if decl_encoding is Ellipsis: + decl_encoding = encoding + with io.open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f: + f.write(xml_unicode(doc, decl_encoding)) + + +class ParseTest(unittest.TestCase): + data = support.u(r'' + r'$\xa3\u20ac\U0001017b') + + def tearDown(self): + support.unlink(TESTFN) + + def check_parse(self, f): + from xml.sax import parse + result = StringIO() + parse(f, XMLGenerator(result, 'utf-8')) + self.assertEqual(result.getvalue(), xml_bytes(self.data, 'utf-8')) + + def test_parse_bytes(self): + # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, + # UTF-16 is autodetected + encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') + for encoding in encodings: + self.check_parse(io.BytesIO(xml_bytes(self.data, encoding))) + make_xml_file(self.data, encoding) + self.check_parse(TESTFN) + with io.open(TESTFN, 'rb') as f: + self.check_parse(f) + self.check_parse(io.BytesIO(xml_bytes(self.data, encoding, None))) + make_xml_file(self.data, encoding, None) + self.check_parse(TESTFN) + with io.open(TESTFN, 'rb') as f: + self.check_parse(f) + # accept UTF-8 with BOM + self.check_parse(io.BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))) + make_xml_file(self.data, 'utf-8-sig', 'utf-8') + self.check_parse(TESTFN) + with io.open(TESTFN, 'rb') as f: + self.check_parse(f) + self.check_parse(io.BytesIO(xml_bytes(self.data, 'utf-8-sig', None))) + make_xml_file(self.data, 'utf-8-sig', None) + self.check_parse(TESTFN) + with io.open(TESTFN, 'rb') as f: + self.check_parse(f) + # accept data with declared encoding + self.check_parse(io.BytesIO(xml_bytes(self.data, 'iso-8859-1'))) + make_xml_file(self.data, 'iso-8859-1') + self.check_parse(TESTFN) + with io.open(TESTFN, 'rb') as f: + self.check_parse(f) + # fail on non-UTF-8 incompatible data without declared encoding + with self.assertRaises(SAXException): + self.check_parse(io.BytesIO(xml_bytes(self.data, 'iso-8859-1', None))) + make_xml_file(self.data, 'iso-8859-1', None) + with self.assertRaises(SAXException): + self.check_parse(TESTFN) + with io.open(TESTFN, 'rb') as f: + with self.assertRaises(SAXException): + self.check_parse(f) + + def test_parse_InputSource(self): + # accept data without declared but with explicitly specified encoding + make_xml_file(self.data, 'iso-8859-1', None) + with io.open(TESTFN, 'rb') as f: + input = InputSource() + input.setByteStream(f) + input.setEncoding('iso-8859-1') + self.check_parse(input) + + def check_parseString(self, s): + from xml.sax import parseString + result = StringIO() + parseString(s, XMLGenerator(result, 'utf-8')) + self.assertEqual(result.getvalue(), xml_bytes(self.data, 'utf-8')) + + def test_parseString_bytes(self): + # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, + # UTF-16 is autodetected + encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') + for encoding in encodings: + self.check_parseString(xml_bytes(self.data, encoding)) + self.check_parseString(xml_bytes(self.data, encoding, None)) + # accept UTF-8 with BOM + self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8')) + self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None)) + # accept data with declared encoding + self.check_parseString(xml_bytes(self.data, 'iso-8859-1')) + # fail on non-UTF-8 incompatible data without declared encoding + with self.assertRaises(SAXException): + self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None)) + + class MakeParserTest(unittest.TestCase): def test_make_parser2(self): # Creating parsers several times in a row should succeed. @@ -949,6 +1055,7 @@ def test_main(): run_unittest(MakeParserTest, + ParseTest, SaxutilsTest, PrepareInputSourceTest, StringXmlgenTest,