@@ -11,7 +11,21 @@ ini_set('include_path','.');
1111include_once ('common.inc ' );
1212
1313// EUC-JP
14- $ euc_jp = '0123この文字列は日本語です。EUC-JPを使っています。日本語は面倒臭い。 ' ;
14+ $ euc_jp = mb_convert_encoding ('0123この文字列は日本語です。EUC-JPを使っています。日本語は面倒臭い。 ' , 'EUC-JP ' , 'UTF-8 ' );
15+ // SJIS
16+ $ sjis = mb_convert_encoding ('日本語テキストです。0123456789。 ' , 'SJIS ' , 'UTF-8 ' );
17+ // ISO-2022-JP
18+ $ iso2022jp = "\x1B\$B \x21\x21!r \x1B(BABC " ;
19+ // GB-18030
20+ $ gb18030 = mb_convert_encoding ('密码用户名密码名称名称 ' , 'GB18030 ' , 'UTF-8 ' );
21+ // HZ
22+ $ hz = "The next sentence is in GB.~{<:Ky2;S{#,NpJ)l6HK!#~}Bye. " ;
23+ // UTF-8
24+ $ utf8 = "Greek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь " ;
25+ // UTF-32
26+ $ utf32 = mb_convert_encoding ($ utf8 , 'UTF-32 ' , 'UTF-8 ' );
27+ // UTF-7
28+ $ utf7 = mb_convert_encoding ($ utf8 , 'UTF-7 ' , 'UTF-8 ' );
1529
1630print "1: " . bin2hex (mb_substr ($ euc_jp , 10 , 10 ,'EUC-JP ' )) . "\n" ;
1731print "2: " . bin2hex (mb_substr ($ euc_jp , 0 , 100 ,'EUC-JP ' )) . "\n" ;
@@ -20,12 +34,148 @@ $str = mb_substr($euc_jp, 100, 10,'EUC-JP');
2034// Note: returns last character
2135($ str === "" ) ? print "3 OK \n" : print "NG: " .bin2hex ($ str )."\n" ;
2236
23- $ str = mb_substr ($ euc_jp , -100 , 10 ,'EUC-JP ' );
24- ($ str !== "" ) ? print "4 OK: " .bin2hex ($ str )."\n" : print "NG: " .bin2hex ($ str )."\n" ;
37+ $ str = mb_substr ($ euc_jp , -100 , 10 , 'EUC-JP ' );
38+ print ($ str !== "" ) ? "4 OK: " . bin2hex ($ str ) . "\n" : "BAD: " . bin2hex ($ str ) . "\n" ;
39+
40+ echo "SJIS: \n" ;
41+ print "1: " . bin2hex (mb_substr ($ sjis , 0 , 3 , 'SJIS ' )) . "\n" ;
42+ print "2: " . bin2hex (mb_substr ($ sjis , -1 , null , 'SJIS ' )) . "\n" ;
43+ print "3: " . bin2hex (mb_substr ($ sjis , -5 , 3 , 'SJIS ' )) . "\n" ;
44+ print "4: " . bin2hex (mb_substr ($ sjis , 1 , null , 'SJIS ' )) . "\n" ;
45+ print "5: " . bin2hex (mb_substr ($ sjis , 10 , 0 , 'SJIS ' )) . "\n" ;
46+ echo "-- Testing illegal SJIS byte 0x80 -- \n" ;
47+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 3 , 2 , 'SJIS ' )) . "\n" ;
48+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 0 , 3 , 'SJIS ' )) . "\n" ;
49+
50+ echo "SJIS-2004: \n" ;
51+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 3 , 2 , 'SJIS-2004 ' )) . "\n" ;
52+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 0 , 3 , 'SJIS-2004 ' )) . "\n" ;
53+
54+ echo "MacJapanese: \n" ;
55+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 3 , 2 , 'MacJapanese ' )) . "\n" ;
56+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 0 , 3 , 'MacJapanese ' )) . "\n" ;
57+
58+ echo "SJIS-Mobile#DOCOMO: \n" ;
59+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 3 , 2 , 'SJIS-Mobile#DOCOMO ' )) . "\n" ;
60+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 0 , 3 , 'SJIS-Mobile#DOCOMO ' )) . "\n" ;
61+
62+ echo "SJIS-Mobile#KDDI: \n" ;
63+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 3 , 2 , 'SJIS-Mobile#KDDI ' )) . "\n" ;
64+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 0 , 3 , 'SJIS-Mobile#KDDI ' )) . "\n" ;
65+
66+ echo "SJIS-Mobile#SoftBank: \n" ;
67+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 3 , 2 , 'SJIS-Mobile#SoftBank ' )) . "\n" ;
68+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 0 , 3 , 'SJIS-Mobile#SoftBank ' )) . "\n" ;
69+
70+ echo "ISO-2022-JP: \n" ;
71+ print "1: " . bin2hex (mb_substr ($ iso2022jp , 0 , 3 , 'ISO-2022-JP ' )) . "\n" ;
72+ print "2: " . bin2hex (mb_substr ($ iso2022jp , -1 , null , 'ISO-2022-JP ' )) . "\n" ;
73+ print "3: " . bin2hex (mb_substr ($ iso2022jp , -6 , 3 , 'ISO-2022-JP ' )) . "\n" ;
74+ print "4: " . bin2hex (mb_substr ($ iso2022jp , -3 , 2 , 'ISO-2022-JP ' )) . "\n" ;
75+ print "5: " . bin2hex (mb_substr ($ iso2022jp , 1 , null , 'ISO-2022-JP ' )) . "\n" ;
76+ print "6: " . bin2hex (mb_substr ($ iso2022jp , 10 , 0 , 'ISO-2022-JP ' )) . "\n" ;
77+ print "7: " . bin2hex (mb_substr ($ iso2022jp , 100 , 10 , 'ISO-2022-JP ' )) . "\n" ;
78+
79+ echo "GB-18030: \n" ;
80+ print "1: " . bin2hex (mb_substr ($ gb18030 , 0 , 3 , 'GB-18030 ' )) . "\n" ;
81+ print "2: " . bin2hex (mb_substr ($ gb18030 , -1 , null , 'GB-18030 ' )) . "\n" ;
82+ print "3: " . bin2hex (mb_substr ($ gb18030 , -5 , 3 , 'GB-18030 ' )) . "\n" ;
83+ print "4: " . bin2hex (mb_substr ($ gb18030 , 1 , null , 'GB-18030 ' )) . "\n" ;
84+ print "5: " . bin2hex (mb_substr ($ gb18030 , 10 , 0 , 'GB-18030 ' )) . "\n" ;
85+
86+ echo "HZ: \n" ;
87+ print "1: " . mb_substr ($ hz , 0 , 3 , 'HZ ' ) . "\n" ;
88+ print "2: " . mb_substr ($ hz , -1 , null , 'HZ ' ) . "\n" ;
89+ print "3: " . mb_substr ($ hz , -5 , 3 , 'HZ ' ) . "\n" ;
90+ print "4: " . mb_substr ($ hz , 1 , null , 'HZ ' ) . "\n" ;
91+ print "5: " . mb_substr ($ hz , 10 , 0 , 'HZ ' ) . "\n" ;
92+
93+ echo "UTF-8: \n" ;
94+ print "1: " . mb_substr ($ utf8 , 0 , 3 , 'UTF-8 ' ) . "\n" ;
95+ print "2: " . mb_substr ($ utf8 , -1 , null , 'UTF-8 ' ) . "\n" ;
96+ print "3: " . mb_substr ($ utf8 , -5 , 3 , 'UTF-8 ' ) . "\n" ;
97+ print "4: " . mb_substr ($ utf8 , 1 , null , 'UTF-8 ' ) . "\n" ;
98+ print "5: " . mb_substr ($ utf8 , 10 , 0 , 'UTF-8 ' ) . "\n" ;
99+
100+ echo "UTF-32: \n" ;
101+ print "1: " . mb_convert_encoding (mb_substr ($ utf32 , 0 , 3 , 'UTF-32 ' ), 'UTF-8 ' , 'UTF-32 ' ) . "\n" ;
102+ print "2: " . mb_convert_encoding (mb_substr ($ utf32 , -1 , null , 'UTF-32 ' ), 'UTF-8 ' , 'UTF-32 ' ) . "\n" ;
103+ print "3: " . mb_convert_encoding (mb_substr ($ utf32 , -5 , 3 , 'UTF-32 ' ), 'UTF-8 ' , 'UTF-32 ' ) . "\n" ;
104+ print "4: " . mb_convert_encoding (mb_substr ($ utf32 , 1 , null , 'UTF-32 ' ), 'UTF-8 ' , 'UTF-32 ' ) . "\n" ;
105+ print "5: " . mb_convert_encoding (mb_substr ($ utf32 , 10 , 0 , 'UTF-32 ' ), 'UTF-8 ' , 'UTF-32 ' ) . "\n" ;
106+
107+ echo "UTF-7: \n" ;
108+ print "1: " . mb_convert_encoding (mb_substr ($ utf7 , 0 , 3 , 'UTF-7 ' ), 'UTF-8 ' , 'UTF-7 ' ) . "\n" ;
109+ print "2: " . mb_convert_encoding (mb_substr ($ utf7 , -1 , null , 'UTF-7 ' ), 'UTF-8 ' , 'UTF-7 ' ) . "\n" ;
110+ print "3: " . mb_convert_encoding (mb_substr ($ utf7 , -5 , 3 , 'UTF-7 ' ), 'UTF-8 ' , 'UTF-7 ' ) . "\n" ;
111+ print "4: " . mb_convert_encoding (mb_substr ($ utf7 , 1 , null , 'UTF-7 ' ), 'UTF-8 ' , 'UTF-7 ' ) . "\n" ;
112+ print "5: " . mb_convert_encoding (mb_substr ($ utf7 , 10 , 0 , 'UTF-7 ' ), 'UTF-8 ' , 'UTF-7 ' ) . "\n" ;
25113
26114?>
27115--EXPECT--
281161: c6fccbdcb8eca4c7a4b9a1a34555432d
291172: 30313233a4b3a4cecab8bbfacef3a4cfc6fccbdcb8eca4c7a4b9a1a34555432d4a50a4f2bbc8a4c3a4c6a4a4a4dea4b9a1a3c6fccbdcb8eca4cfccccc5ddbdada4a4a1a3
301183 OK
311194 OK: 30313233a4b3a4cecab8bbfacef3a4cf
120+ SJIS:
121+ 1: 93fa967b8cea
122+ 2: 8142
123+ 3: 825582568257
124+ 4: 967b8cea8365834c8358836782c582b781423031323334825482558256825782588142
125+ 5:
126+ -- Testing illegal SJIS byte 0x80 --
127+ 6380
128+ 806162
129+ SJIS-2004:
130+ 6380
131+ 806162
132+ MacJapanese:
133+ 6380
134+ 806162
135+ SJIS-Mobile#DOCOMO:
136+ 6380
137+ 806162
138+ SJIS-Mobile#KDDI:
139+ 6380
140+ 806162
141+ SJIS-Mobile#SoftBank:
142+ 6380
143+ 806162
144+ ISO-2022-JP:
145+ 1: 1b2442212121721b284241
146+ 2: 43
147+ 3: 1b2442212121721b284241
148+ 4: 4142
149+ 5: 1b244221721b2842414243
150+ 6:
151+ 7:
152+ GB-18030:
153+ 1: c3dcc2ebd3c3
154+ 2: b3c6
155+ 3: c2ebc3fbb3c6
156+ 4: c2ebd3c3bba7c3fbc3dcc2ebc3fbb3c6c3fbb3c6
157+ 5:
158+ HZ:
159+ 1: The
160+ 2: .
161+ 3: ~{!#~}By
162+ 4: he next sentence is in GB.~{<:Ky2;S{#,NpJ)l6HK!#~}Bye.
163+ 5:
164+ UTF-8:
165+ 1: Gre
166+ 2: ь
167+ 3: йте
168+ 4: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь
169+ 5:
170+ UTF-32:
171+ 1: Gre
172+ 2: ь
173+ 3: йте
174+ 4: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь
175+ 5:
176+ UTF-7:
177+ 1: Gre
178+ 2: ь
179+ 3: йте
180+ 4: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь
181+ 5:
0 commit comments