changeset: 96845:c7f4f61697b7 branch: 3.4 parent: 96838:0deca75537ec user: Lars Gustäbel date: Mon Jul 06 09:27:24 2015 +0200 files: Lib/tarfile.py Lib/test/test_tarfile.py Misc/NEWS description: Issue #24259: tarfile now raises a ReadError if an archive is truncated inside a data segment. diff -r 0deca75537ec -r c7f4f61697b7 Lib/tarfile.py --- a/Lib/tarfile.py Sun Jul 05 11:45:01 2015 -0700 +++ b/Lib/tarfile.py Mon Jul 06 09:27:24 2015 +0200 @@ -225,7 +225,7 @@ signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf)) return unsigned_chksum, signed_chksum -def copyfileobj(src, dst, length=None): +def copyfileobj(src, dst, length=None, exception=OSError): """Copy length bytes from fileobj src to fileobj dst. If length is None, copy the entire content. """ @@ -240,13 +240,13 @@ for b in range(blocks): buf = src.read(BUFSIZE) if len(buf) < BUFSIZE: - raise OSError("end of file reached") + raise exception("unexpected end of data") dst.write(buf) if remainder != 0: buf = src.read(remainder) if len(buf) < remainder: - raise OSError("end of file reached") + raise exception("unexpected end of data") dst.write(buf) return @@ -690,7 +690,10 @@ length = min(size, stop - self.position) if data: self.fileobj.seek(offset + (self.position - start)) - buf += self.fileobj.read(length) + b = self.fileobj.read(length) + if len(b) != length: + raise ReadError("unexpected end of data") + buf += b else: buf += NUL * length size -= length @@ -2132,9 +2135,9 @@ if tarinfo.sparse is not None: for offset, size in tarinfo.sparse: target.seek(offset) - copyfileobj(source, target, size) + copyfileobj(source, target, size, ReadError) else: - copyfileobj(source, target, tarinfo.size) + copyfileobj(source, target, tarinfo.size, ReadError) target.seek(tarinfo.size) target.truncate() @@ -2244,8 +2247,13 @@ self.firstmember = None return m + # Advance the file pointer. + if self.offset != self.fileobj.tell(): + self.fileobj.seek(self.offset - 1) + if not self.fileobj.read(1): + raise ReadError("unexpected end of data") + # Read the next block. - self.fileobj.seek(self.offset) tarinfo = None while True: try: diff -r 0deca75537ec -r c7f4f61697b7 Lib/test/test_tarfile.py --- a/Lib/test/test_tarfile.py Sun Jul 05 11:45:01 2015 -0700 +++ b/Lib/test/test_tarfile.py Mon Jul 06 09:27:24 2015 +0200 @@ -349,6 +349,29 @@ finally: tar.close() + def test_premature_end_of_archive(self): + for size in (512, 600, 1024, 1200): + with tarfile.open(tmpname, "w:") as tar: + t = tarfile.TarInfo("foo") + t.size = 1024 + tar.addfile(t, io.BytesIO(b"a" * 1024)) + + with open(tmpname, "r+b") as fobj: + fobj.truncate(size) + + with tarfile.open(tmpname) as tar: + with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): + for t in tar: + pass + + with tarfile.open(tmpname) as tar: + t = tar.next() + + with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): + tar.extract(t, TEMPDIR) + + with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): + tar.extractfile(t).read() class MiscReadTestBase(CommonReadTest): def requires_name_attribute(self): diff -r 0deca75537ec -r c7f4f61697b7 Misc/NEWS --- a/Misc/NEWS Sun Jul 05 11:45:01 2015 -0700 +++ b/Misc/NEWS Mon Jul 06 09:27:24 2015 +0200 @@ -66,6 +66,9 @@ Library ------- +- Issue #24259: tarfile now raises a ReadError if an archive is truncated + inside a data segment. + - Issue #24552: Fix use after free in an error case of the _pickle module. - Issue #24514: tarfile now tolerates number fields consisting of only