@@ -590,6 +590,8 @@ def __init__(self, message="Invalid file"):
590590
591591_BINARY_FORMAT = {1 : 'B' , 2 : 'H' , 4 : 'L' , 8 : 'Q' }
592592
593+ _undefined = object ()
594+
593595class _BinaryPlistParser :
594596 """
595597 Read or write a binary plist file, following the description of the binary
@@ -620,7 +622,8 @@ def parse(self, fp):
620622 ) = struct .unpack ('>6xBBQQQ' , trailer )
621623 self ._fp .seek (offset_table_offset )
622624 self ._object_offsets = self ._read_ints (num_objects , offset_size )
623- return self ._read_object (self ._object_offsets [top_object ])
625+ self ._objects = [_undefined ] * num_objects
626+ return self ._read_object (top_object )
624627
625628 except (OSError , IndexError , struct .error , OverflowError ,
626629 UnicodeDecodeError ):
@@ -649,71 +652,78 @@ def _read_ints(self, n, size):
649652 def _read_refs (self , n ):
650653 return self ._read_ints (n , self ._ref_size )
651654
652- def _read_object (self , offset ):
655+ def _read_object (self , ref ):
653656 """
654- read the object at offset .
657+ read the object by reference .
655658
656659 May recursively read sub-objects (content of an array/dict/set)
657660 """
661+ result = self ._objects [ref ]
662+ if result is not _undefined :
663+ return result
664+
665+ offset = self ._object_offsets [ref ]
658666 self ._fp .seek (offset )
659667 token = self ._fp .read (1 )[0 ]
660668 tokenH , tokenL = token & 0xF0 , token & 0x0F
661669
662670 if token == 0x00 :
663- return None
671+ result = None
664672
665673 elif token == 0x08 :
666- return False
674+ result = False
667675
668676 elif token == 0x09 :
669- return True
677+ result = True
670678
671679 # The referenced source code also mentions URL (0x0c, 0x0d) and
672680 # UUID (0x0e), but neither can be generated using the Cocoa libraries.
673681
674682 elif token == 0x0f :
675- return b''
683+ result = b''
676684
677685 elif tokenH == 0x10 : # int
678- return int .from_bytes (self ._fp .read (1 << tokenL ),
679- 'big' , signed = tokenL >= 3 )
686+ result = int .from_bytes (self ._fp .read (1 << tokenL ),
687+ 'big' , signed = tokenL >= 3 )
680688
681689 elif token == 0x22 : # real
682- return struct .unpack ('>f' , self ._fp .read (4 ))[0 ]
690+ result = struct .unpack ('>f' , self ._fp .read (4 ))[0 ]
683691
684692 elif token == 0x23 : # real
685- return struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
693+ result = struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
686694
687695 elif token == 0x33 : # date
688696 f = struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
689697 # timestamp 0 of binary plists corresponds to 1/1/2001
690698 # (year of Mac OS X 10.0), instead of 1/1/1970.
691- return datetime .datetime (2001 , 1 , 1 ) + datetime .timedelta (seconds = f )
699+ result = (datetime .datetime (2001 , 1 , 1 ) +
700+ datetime .timedelta (seconds = f ))
692701
693702 elif tokenH == 0x40 : # data
694703 s = self ._get_size (tokenL )
695704 if self ._use_builtin_types :
696- return self ._fp .read (s )
705+ result = self ._fp .read (s )
697706 else :
698- return Data (self ._fp .read (s ))
707+ result = Data (self ._fp .read (s ))
699708
700709 elif tokenH == 0x50 : # ascii string
701710 s = self ._get_size (tokenL )
702711 result = self ._fp .read (s ).decode ('ascii' )
703- return result
712+ result = result
704713
705714 elif tokenH == 0x60 : # unicode string
706715 s = self ._get_size (tokenL )
707- return self ._fp .read (s * 2 ).decode ('utf-16be' )
716+ result = self ._fp .read (s * 2 ).decode ('utf-16be' )
708717
709718 # tokenH == 0x80 is documented as 'UID' and appears to be used for
710719 # keyed-archiving, not in plists.
711720
712721 elif tokenH == 0xA0 : # array
713722 s = self ._get_size (tokenL )
714723 obj_refs = self ._read_refs (s )
715- return [self ._read_object (self ._object_offsets [x ])
716- for x in obj_refs ]
724+ result = []
725+ self ._objects [ref ] = result
726+ result .extend (self ._read_object (x ) for x in obj_refs )
717727
718728 # tokenH == 0xB0 is documented as 'ordset', but is not actually
719729 # implemented in the Apple reference code.
@@ -726,12 +736,15 @@ def _read_object(self, offset):
726736 key_refs = self ._read_refs (s )
727737 obj_refs = self ._read_refs (s )
728738 result = self ._dict_type ()
739+ self ._objects [ref ] = result
729740 for k , o in zip (key_refs , obj_refs ):
730- result [self ._read_object (self ._object_offsets [k ])
731- ] = self ._read_object (self ._object_offsets [o ])
732- return result
741+ result [self ._read_object (k )] = self ._read_object (o )
733742
734- raise InvalidFileException ()
743+ else :
744+ raise InvalidFileException ()
745+
746+ self ._objects [ref ] = result
747+ return result
735748
736749def _count_to_size (count ):
737750 if count < 1 << 8 :
@@ -746,6 +759,8 @@ def _count_to_size(count):
746759 else :
747760 return 8
748761
762+ _scalars = (str , int , float , datetime .datetime , bytes )
763+
749764class _BinaryPlistWriter (object ):
750765 def __init__ (self , fp , sort_keys , skipkeys ):
751766 self ._fp = fp
@@ -801,24 +816,25 @@ def _flatten(self, value):
801816 # First check if the object is in the object table, not used for
802817 # containers to ensure that two subcontainers with the same contents
803818 # will be serialized as distinct values.
804- if isinstance (value , (
805- str , int , float , datetime .datetime , bytes , bytearray )):
819+ if isinstance (value , _scalars ):
806820 if (type (value ), value ) in self ._objtable :
807821 return
808822
809823 elif isinstance (value , Data ):
810824 if (type (value .data ), value .data ) in self ._objtable :
811825 return
812826
827+ elif id (value ) in self ._objidtable :
828+ return
829+
813830 # Add to objectreference map
814831 refnum = len (self ._objlist )
815832 self ._objlist .append (value )
816- try :
817- if isinstance (value , Data ):
818- self ._objtable [(type (value .data ), value .data )] = refnum
819- else :
820- self ._objtable [(type (value ), value )] = refnum
821- except TypeError :
833+ if isinstance (value , _scalars ):
834+ self ._objtable [(type (value ), value )] = refnum
835+ elif isinstance (value , Data ):
836+ self ._objtable [(type (value .data ), value .data )] = refnum
837+ else :
822838 self ._objidtable [id (value )] = refnum
823839
824840 # And finally recurse into containers
@@ -845,12 +861,11 @@ def _flatten(self, value):
845861 self ._flatten (o )
846862
847863 def _getrefnum (self , value ):
848- try :
849- if isinstance (value , Data ):
850- return self ._objtable [(type (value .data ), value .data )]
851- else :
852- return self ._objtable [(type (value ), value )]
853- except TypeError :
864+ if isinstance (value , _scalars ):
865+ return self ._objtable [(type (value ), value )]
866+ elif isinstance (value , Data ):
867+ return self ._objtable [(type (value .data ), value .data )]
868+ else :
854869 return self ._objidtable [id (value )]
855870
856871 def _write_size (self , token , size ):
0 commit comments