@@ -525,6 +525,8 @@ def __init__(self, message="Invalid file"):
525525
526526_BINARY_FORMAT = {1 : 'B' , 2 : 'H' , 4 : 'L' , 8 : 'Q' }
527527
528+ _undefined = object ()
529+
528530class _BinaryPlistParser :
529531 """
530532 Read or write a binary plist file, following the description of the binary
@@ -555,7 +557,8 @@ def parse(self, fp):
555557 ) = struct .unpack ('>6xBBQQQ' , trailer )
556558 self ._fp .seek (offset_table_offset )
557559 self ._object_offsets = self ._read_ints (num_objects , offset_size )
558- return self ._read_object (self ._object_offsets [top_object ])
560+ self ._objects = [_undefined ] * num_objects
561+ return self ._read_object (top_object )
559562
560563 except (OSError , IndexError , struct .error , OverflowError ,
561564 UnicodeDecodeError ):
@@ -584,71 +587,78 @@ def _read_ints(self, n, size):
584587 def _read_refs (self , n ):
585588 return self ._read_ints (n , self ._ref_size )
586589
587- def _read_object (self , offset ):
590+ def _read_object (self , ref ):
588591 """
589- read the object at offset .
592+ read the object by reference .
590593
591594 May recursively read sub-objects (content of an array/dict/set)
592595 """
596+ result = self ._objects [ref ]
597+ if result is not _undefined :
598+ return result
599+
600+ offset = self ._object_offsets [ref ]
593601 self ._fp .seek (offset )
594602 token = self ._fp .read (1 )[0 ]
595603 tokenH , tokenL = token & 0xF0 , token & 0x0F
596604
597605 if token == 0x00 :
598- return None
606+ result = None
599607
600608 elif token == 0x08 :
601- return False
609+ result = False
602610
603611 elif token == 0x09 :
604- return True
612+ result = True
605613
606614 # The referenced source code also mentions URL (0x0c, 0x0d) and
607615 # UUID (0x0e), but neither can be generated using the Cocoa libraries.
608616
609617 elif token == 0x0f :
610- return b''
618+ result = b''
611619
612620 elif tokenH == 0x10 : # int
613- return int .from_bytes (self ._fp .read (1 << tokenL ),
614- 'big' , signed = tokenL >= 3 )
621+ result = int .from_bytes (self ._fp .read (1 << tokenL ),
622+ 'big' , signed = tokenL >= 3 )
615623
616624 elif token == 0x22 : # real
617- return struct .unpack ('>f' , self ._fp .read (4 ))[0 ]
625+ result = struct .unpack ('>f' , self ._fp .read (4 ))[0 ]
618626
619627 elif token == 0x23 : # real
620- return struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
628+ result = struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
621629
622630 elif token == 0x33 : # date
623631 f = struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
624632 # timestamp 0 of binary plists corresponds to 1/1/2001
625633 # (year of Mac OS X 10.0), instead of 1/1/1970.
626- return datetime .datetime (2001 , 1 , 1 ) + datetime .timedelta (seconds = f )
634+ result = (datetime .datetime (2001 , 1 , 1 ) +
635+ datetime .timedelta (seconds = f ))
627636
628637 elif tokenH == 0x40 : # data
629638 s = self ._get_size (tokenL )
630639 if self ._use_builtin_types :
631- return self ._fp .read (s )
640+ result = self ._fp .read (s )
632641 else :
633- return Data (self ._fp .read (s ))
642+ result = Data (self ._fp .read (s ))
634643
635644 elif tokenH == 0x50 : # ascii string
636645 s = self ._get_size (tokenL )
637646 result = self ._fp .read (s ).decode ('ascii' )
638- return result
647+ result = result
639648
640649 elif tokenH == 0x60 : # unicode string
641650 s = self ._get_size (tokenL )
642- return self ._fp .read (s * 2 ).decode ('utf-16be' )
651+ result = self ._fp .read (s * 2 ).decode ('utf-16be' )
643652
644653 # tokenH == 0x80 is documented as 'UID' and appears to be used for
645654 # keyed-archiving, not in plists.
646655
647656 elif tokenH == 0xA0 : # array
648657 s = self ._get_size (tokenL )
649658 obj_refs = self ._read_refs (s )
650- return [self ._read_object (self ._object_offsets [x ])
651- for x in obj_refs ]
659+ result = []
660+ self ._objects [ref ] = result
661+ result .extend (self ._read_object (x ) for x in obj_refs )
652662
653663 # tokenH == 0xB0 is documented as 'ordset', but is not actually
654664 # implemented in the Apple reference code.
@@ -661,12 +671,15 @@ def _read_object(self, offset):
661671 key_refs = self ._read_refs (s )
662672 obj_refs = self ._read_refs (s )
663673 result = self ._dict_type ()
674+ self ._objects [ref ] = result
664675 for k , o in zip (key_refs , obj_refs ):
665- result [self ._read_object (self ._object_offsets [k ])
666- ] = self ._read_object (self ._object_offsets [o ])
667- return result
676+ result [self ._read_object (k )] = self ._read_object (o )
668677
669- raise InvalidFileException ()
678+ else :
679+ raise InvalidFileException ()
680+
681+ self ._objects [ref ] = result
682+ return result
670683
671684def _count_to_size (count ):
672685 if count < 1 << 8 :
@@ -681,6 +694,8 @@ def _count_to_size(count):
681694 else :
682695 return 8
683696
697+ _scalars = (str , int , float , datetime .datetime , bytes )
698+
684699class _BinaryPlistWriter (object ):
685700 def __init__ (self , fp , sort_keys , skipkeys ):
686701 self ._fp = fp
@@ -736,24 +751,25 @@ def _flatten(self, value):
736751 # First check if the object is in the object table, not used for
737752 # containers to ensure that two subcontainers with the same contents
738753 # will be serialized as distinct values.
739- if isinstance (value , (
740- str , int , float , datetime .datetime , bytes , bytearray )):
754+ if isinstance (value , _scalars ):
741755 if (type (value ), value ) in self ._objtable :
742756 return
743757
744758 elif isinstance (value , Data ):
745759 if (type (value .data ), value .data ) in self ._objtable :
746760 return
747761
762+ elif id (value ) in self ._objidtable :
763+ return
764+
748765 # Add to objectreference map
749766 refnum = len (self ._objlist )
750767 self ._objlist .append (value )
751- try :
752- if isinstance (value , Data ):
753- self ._objtable [(type (value .data ), value .data )] = refnum
754- else :
755- self ._objtable [(type (value ), value )] = refnum
756- except TypeError :
768+ if isinstance (value , _scalars ):
769+ self ._objtable [(type (value ), value )] = refnum
770+ elif isinstance (value , Data ):
771+ self ._objtable [(type (value .data ), value .data )] = refnum
772+ else :
757773 self ._objidtable [id (value )] = refnum
758774
759775 # And finally recurse into containers
@@ -780,12 +796,11 @@ def _flatten(self, value):
780796 self ._flatten (o )
781797
782798 def _getrefnum (self , value ):
783- try :
784- if isinstance (value , Data ):
785- return self ._objtable [(type (value .data ), value .data )]
786- else :
787- return self ._objtable [(type (value ), value )]
788- except TypeError :
799+ if isinstance (value , _scalars ):
800+ return self ._objtable [(type (value ), value )]
801+ elif isinstance (value , Data ):
802+ return self ._objtable [(type (value .data ), value .data )]
803+ else :
789804 return self ._objidtable [id (value )]
790805
791806 def _write_size (self , token , size ):
0 commit comments