changeset: 96846:59cbdc9eb3d9 branch: 3.5 parent: 96839:038b4f61d9b7 parent: 96845:c7f4f61697b7 user: Lars Gustäbel date: Mon Jul 06 09:29:41 2015 +0200 files: Lib/tarfile.py Lib/test/test_tarfile.py Misc/NEWS description: Merge with 3.4: Issue #24259: tarfile now raises a ReadError if an archive is truncated inside a data segment. diff -r 038b4f61d9b7 -r 59cbdc9eb3d9 Lib/tarfile.py --- a/Lib/tarfile.py Sun Jul 05 11:45:31 2015 -0700 +++ b/Lib/tarfile.py Mon Jul 06 09:29:41 2015 +0200 @@ -225,7 +225,7 @@ signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf)) return unsigned_chksum, signed_chksum -def copyfileobj(src, dst, length=None): +def copyfileobj(src, dst, length=None, exception=OSError): """Copy length bytes from fileobj src to fileobj dst. If length is None, copy the entire content. """ @@ -240,13 +240,13 @@ for b in range(blocks): buf = src.read(BUFSIZE) if len(buf) < BUFSIZE: - raise OSError("end of file reached") + raise exception("unexpected end of data") dst.write(buf) if remainder != 0: buf = src.read(remainder) if len(buf) < remainder: - raise OSError("end of file reached") + raise exception("unexpected end of data") dst.write(buf) return @@ -690,7 +690,10 @@ length = min(size, stop - self.position) if data: self.fileobj.seek(offset + (self.position - start)) - buf += self.fileobj.read(length) + b = self.fileobj.read(length) + if len(b) != length: + raise ReadError("unexpected end of data") + buf += b else: buf += NUL * length size -= length @@ -2150,9 +2153,9 @@ if tarinfo.sparse is not None: for offset, size in tarinfo.sparse: target.seek(offset) - copyfileobj(source, target, size) + copyfileobj(source, target, size, ReadError) else: - copyfileobj(source, target, tarinfo.size) + copyfileobj(source, target, tarinfo.size, ReadError) target.seek(tarinfo.size) target.truncate() @@ -2267,8 +2270,13 @@ self.firstmember = None return m + # Advance the file pointer. + if self.offset != self.fileobj.tell(): + self.fileobj.seek(self.offset - 1) + if not self.fileobj.read(1): + raise ReadError("unexpected end of data") + # Read the next block. - self.fileobj.seek(self.offset) tarinfo = None while True: try: diff -r 038b4f61d9b7 -r 59cbdc9eb3d9 Lib/test/test_tarfile.py --- a/Lib/test/test_tarfile.py Sun Jul 05 11:45:31 2015 -0700 +++ b/Lib/test/test_tarfile.py Mon Jul 06 09:29:41 2015 +0200 @@ -364,6 +364,29 @@ finally: tar.close() + def test_premature_end_of_archive(self): + for size in (512, 600, 1024, 1200): + with tarfile.open(tmpname, "w:") as tar: + t = tarfile.TarInfo("foo") + t.size = 1024 + tar.addfile(t, io.BytesIO(b"a" * 1024)) + + with open(tmpname, "r+b") as fobj: + fobj.truncate(size) + + with tarfile.open(tmpname) as tar: + with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): + for t in tar: + pass + + with tarfile.open(tmpname) as tar: + t = tar.next() + + with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): + tar.extract(t, TEMPDIR) + + with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): + tar.extractfile(t).read() class MiscReadTestBase(CommonReadTest): def requires_name_attribute(self): diff -r 038b4f61d9b7 -r 59cbdc9eb3d9 Misc/NEWS --- a/Misc/NEWS Sun Jul 05 11:45:31 2015 -0700 +++ b/Misc/NEWS Mon Jul 06 09:29:41 2015 +0200 @@ -17,6 +17,9 @@ Library ------- +- Issue #24259: tarfile now raises a ReadError if an archive is truncated + inside a data segment. + What's New in Python 3.5.0 beta 3? ==================================