bpo-42103: Improve validation of Plist files. (GH-22882)

* Prevent some possible DoS attacks via providing invalid Plist files with extremely large number of objects or collection sizes. * Raise InvalidFileException for too large bytes and string size instead of returning garbage. * Raise InvalidFileException instead of ValueError for specific invalid datetime (NaN). * Raise InvalidFileException instead of TypeError for non-hashable dict keys. * Add more tests for invalid Plist files.
2025-09-26 10:19:53 +00:00 · 2020-11-02 23:01:40 +02:00 · 2020-11-02 23:01:40 +02:00 · 34637a0ce2
commit 34637a0ce2
parent 6fdfcec5b1
4 changed files with 363 additions and 57 deletions
--- a/Lib/plistlib.py
+++ b/Lib/plistlib.py
@ -477,7 +477,7 @@ class _BinaryPlistParser:
            return self._read_object(top_object)

        except (OSError, IndexError, struct.error, OverflowError,
-                UnicodeDecodeError):
+                ValueError):
            raise InvalidFileException()

    def _get_size(self, tokenL):
@ -493,7 +493,7 @@ class _BinaryPlistParser:
    def _read_ints(self, n, size):
        data = self._fp.read(size * n)
        if size in _BINARY_FORMAT:
-            return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
+            return struct.unpack(f'>{n}{_BINARY_FORMAT[size]}', data)
        else:
            if not size or len(data) != size * n:
                raise InvalidFileException()
@ -553,14 +553,22 @@ class _BinaryPlistParser:
        elif tokenH == 0x40:  # data
            s = self._get_size(tokenL)
            result = self._fp.read(s)
+            if len(result) != s:
+                raise InvalidFileException()

        elif tokenH == 0x50:  # ascii string
            s = self._get_size(tokenL)
-            result =  self._fp.read(s).decode('ascii')
+            data = self._fp.read(s)
+            if len(data) != s:
+                raise InvalidFileException()
+            result = data.decode('ascii')

        elif tokenH == 0x60:  # unicode string
-            s = self._get_size(tokenL)
-            result = self._fp.read(s * 2).decode('utf-16be')
+            s = self._get_size(tokenL) * 2
+            data = self._fp.read(s)
+            if len(data) != s:
+                raise InvalidFileException()
+            result = data.decode('utf-16be')

        elif tokenH == 0x80:  # UID
            # used by Key-Archiver plist files
@ -585,9 +593,11 @@ class _BinaryPlistParser:
            obj_refs = self._read_refs(s)
            result = self._dict_type()
            self._objects[ref] = result
-            for k, o in zip(key_refs, obj_refs):
-                result[self._read_object(k)] = self._read_object(o)
-
+            try:
+                for k, o in zip(key_refs, obj_refs):
+                    result[self._read_object(k)] = self._read_object(o)
+            except TypeError:
+                raise InvalidFileException()
        else:
            raise InvalidFileException()