@@ -41,19 +41,19 @@ echo "2: " . mb_decode_numericentity($str2, $convmap, "UTF-8") . "\n";
4141echo "3: " . mb_decode_numericentity ($ str3 , $ convmap , "UTF-8 " ) . "\n" ;
4242
4343// Numeric entities which are truncated at end of string
44- // We do NOT decode such entities; they can be terminated by any non-digit character, but not by the end of the string
45- echo "4: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
46- echo "5: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
47- echo "6: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
48- echo "7: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
49- echo "8: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
50- echo "9: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
51- echo "10: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
52- echo "11: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
44+ echo "4: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ; // Entity is too big
45+ echo "5: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ; // Entity is too big
46+ echo "6: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ; // Too many digits
47+ echo "7: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ; // Too many digits
48+ echo "8: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ; // Too many digits
49+ echo "9: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ; // Too many digits
50+ echo "10: " . bin2hex (mb_decode_numericentity ('� ' , $ convmap )), "\n" ; // OK
51+ echo "11: " . bin2hex (mb_decode_numericentity ('� ' , $ convmap )), "\n" ; // OK
5352// Try with hex, not just decimal entities
54- echo "11b: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
55- echo "11c: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
56- echo "11d: " . mb_decode_numericentity ('𐀀 ' , $ convmap ), "\n" ;
53+ echo "11b: " . bin2hex (mb_decode_numericentity ('� ' , $ convmap )), "\n" ; // OK
54+ echo "11c: " . bin2hex (mb_decode_numericentity ('� ' , $ convmap )), "\n" ; // OK
55+ echo "11d: " . bin2hex (mb_decode_numericentity ('𐀀 ' , $ convmap )), "\n" ; // OK
56+ echo "11e: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ; // Too many digits
5757
5858// Large decimal entity, converting from non-ASCII input encoding
5959echo "12: " . bin2hex (mb_decode_numericentity (mb_convert_encoding ('� ' , 'UCS-4 ' , 'ASCII ' ), [0 , 0x7FFFFFFF , 0 , 0x7FFFFFFF ], 'UCS-4 ' )), "\n" ;
@@ -100,6 +100,8 @@ test("Successive &", "&A,", "&A,", [0, 0xFFFF, 0, 0xFFFF], 'ASCII');
100100test ("Successive &# " , "&#2 " , " " , [0 , 0xFFFF , 0 , 0xFFFF ], 'ASCII ' );
101101test ("Successive &#x " , "&#x2 " , " " , [0 , 0xFFFF , 0 , 0xFFFF ], 'ASCII ' );
102102
103+ test ("&#x only " , "&#x; " , "&#x; " , [0 , 0xFFFF , 0 , 0xFFFF ], 'ASCII ' );
104+
103105// The starting & of an entity can terminate a preceding entity
104106test ("Successive A " , "AA " , "AA " , [0 , 0xFFFF , 0 , 0xFFFF ], 'ASCII ' );
105107test ("Successive hex entities " , "22 " , "22 " , [0 , 0xFFFF , 0 , 0xFFFF ], 'ASCII ' );
@@ -131,6 +133,36 @@ test("Regression test (truncation of successive & with JIS encoding)", "&&&", "&
131133// Previously, signed arithmetic was used on convmap entries
132134test ("Regression test (convmap entries are now treated as unsigned) " , ", " , "?, " , [0x22FFFF11 , 0xBF111189 , 0x67726511 , 0x1161E719 ], "ASCII " );
133135
136+ // Try with '&', '&#', or '&#' at the end of a buffer of wchars, with more input
137+ // still left to process in the next buffer
138+ // (mb_decode_numericentity splits its input into 'chunks' and processes it one
139+ // chunk at a time)
140+ $ convmap = [0 , 0xFFFF , 0 , 0xFFFF ];
141+ for ($ i = 0 ; $ i < 256 ; $ i ++) {
142+ $ padding = str_repeat ("a " , $ i );
143+ // First try invalid decimal/hex entities
144+ if (mb_decode_numericentity ($ padding . "&#ZZZ " , $ convmap , 'UTF-8 ' ) !== $ padding . "&#ZZZ " )
145+ die ("&#ZZZ is broken when it spans two buffers! " );
146+ if (mb_decode_numericentity ($ padding . "&#xZZZ " , $ convmap , 'UTF-8 ' ) !== $ padding . "&#xZZZ " )
147+ die ("&#xZZZ is broken when it spans two buffers! " );
148+ // Now try valid decimal/hex entities
149+ if (mb_decode_numericentity ($ padding . "A " , $ convmap , 'UTF-8 ' ) !== $ padding . "A " )
150+ die ("A is broken when it spans two buffers! " );
151+ if (mb_decode_numericentity ($ padding . "A " , $ convmap , 'UTF-8 ' ) !== $ padding . "A " )
152+ die ("A is broken when it spans two buffers! " );
153+ }
154+
155+ // Try huge entities, big enough to fill an entire buffer
156+ for ($ i = 12 ; $ i < 256 ; $ i ++) {
157+ $ str = "&# " . str_repeat ("0 " , $ i ) . "65 " ;
158+ if (mb_decode_numericentity ($ str , $ convmap , 'UTF-8 ' ) !== $ str )
159+ die ("Decimal entity with huge number of digits broken " );
160+
161+ $ str = "&#x " . str_repeat ("0 " , $ i ) . "41 " ;
162+ if (mb_decode_numericentity ($ str , $ convmap , 'UTF-8 ' ) !== $ str )
163+ die ("Hexadecimal entity with huge number of digits broken " );
164+ }
165+
134166?>
135167--EXPECT--
1361681: ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
@@ -142,11 +174,12 @@ test("Regression test (convmap entries are now treated as unsigned)", ",", "?
1421747: �
1431758: �
1441769: �
145- 10: �
146- 11: �
147- 11b: �
148- 11c: �
149- 11d: 𐀀
177+ 10: 00
178+ 11: 00
179+ 11b: 00
180+ 11c: 00
181+ 11d: f0908080
182+ 11e: �
15018312: 00bc614e
15118413: föo
15218514: mb_decode_numericentity(): Argument #2 ($map) must have a multiple of 4 elements
@@ -164,6 +197,7 @@ Single &: string(1) "&" => string(1) "&" (Good)
164197Successive &: string(6) "&A," => string(3) "&A," (Good)
165198Successive &#: string(8) "&#2" => string(3) "" (Good)
166199Successive &#x: string(9) "&#x2" => string(4) "" (Good)
200+ &#x only: string(4) "&#x;" => string(4) "&#x;" (Good)
167201Successive A: string(9) "AA" => string(2) "AA" (Good)
168202Successive hex entities: string(11) "22" => string(2) "22" (Good)
169203Starting entity immediately after decimal entity which is too long: string(18) "�A" => string(14) "�A" (Good)
0 commit comments