@@ -410,7 +410,7 @@ int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter)
410410}
411411
412412static const unsigned short sjis_decode_tbl1 [] = {
413- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 188 , 376 , 564 , 752 , 940 , 1128 , 1316 , 1504 , 1692 , 1880 , 2068 , 2256 , 2444 , 2632 , 2820 , 3008 , 3196 , 3384 , 3572 , 3760 , 3948 , 4136 , 4324 , 4512 , 4700 , 4888 , 5076 , 5264 , 5452 , 5640 , -6204 , -6016 , -5828 , -5640 , -5452 , -5264 , -5076 , -4888 , -4700 , -4512 , -4324 , -4136 , -3948 , -3760 , -3572 , -3384 , -3196 , -3008 , -2820 , -2632 , -2444 , -2256 , -2068 , -1880 , -1692 , -1504 , -1316 , -1128 , -940 , -752 , -564 , -376 , -188 , 0 , 188 , 376 , 564 , 752 , 940 , 1128 , 1316 , 1504 , 1692 , 1880 , 2068 , 2256 , 2444 , 2632 , 2820 , 3008 , 3196 , 3384 , 3572 , 3760 , 3948 , 4136 , 4324 , 4512 , 4700 , 4888 , 5076 , 5264 , 5452 , 5640 , 5828 , 6016 , 6204 , 6392 , 6580 , 6768 , 6956 , 7144 , 7332 , 7520 , 7708 , 7896 , 8084 , 8272 , 8460 , 8648 , 8836 , 9024 , 9212 , 9400 , 9588 , 9776 , 9964 , 10152 , 10340 , 10528 , 10716 , 10904 , 11092
413+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0xFFFF , 0 , 188 , 376 , 564 , 752 , 940 , 1128 , 1316 , 1504 , 1692 , 1880 , 2068 , 2256 , 2444 , 2632 , 2820 , 3008 , 3196 , 3384 , 3572 , 3760 , 3948 , 4136 , 4324 , 4512 , 4700 , 4888 , 5076 , 5264 , 5452 , 5640 , 0xFFFF , -6016 , -5828 , -5640 , -5452 , -5264 , -5076 , -4888 , -4700 , -4512 , -4324 , -4136 , -3948 , -3760 , -3572 , -3384 , -3196 , -3008 , -2820 , -2632 , -2444 , -2256 , -2068 , -1880 , -1692 , -1504 , -1316 , -1128 , -940 , -752 , -564 , -376 , -188 , 0 , 188 , 376 , 564 , 752 , 940 , 1128 , 1316 , 1504 , 1692 , 1880 , 2068 , 2256 , 2444 , 2632 , 2820 , 3008 , 3196 , 3384 , 3572 , 3760 , 3948 , 4136 , 4324 , 4512 , 4700 , 4888 , 5076 , 5264 , 5452 , 5640 , 5828 , 6016 , 6204 , 6392 , 6580 , 6768 , 6956 , 7144 , 7332 , 7520 , 7708 , 7896 , 8084 , 8272 , 8460 , 8648 , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF
414414};
415415
416416static const unsigned short sjis_decode_tbl2 [] = {
@@ -422,34 +422,52 @@ static size_t mb_sjis_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf
422422 unsigned char * p = * in , * e = p + * in_len ;
423423 uint32_t * out = buf , * limit = buf + bufsize ;
424424
425+ e -- ; /* Stop the main loop 1 byte short of the end of the input */
426+
425427 while (p < e && out < limit ) {
426428 unsigned char c = * p ++ ;
427429
428430 if (c <= 0x7F ) {
429431 * out ++ = c ;
430432 } else if (c >= 0xA1 && c <= 0xDF ) { /* Kana */
431433 * out ++ = 0xFEC0 + c ;
432- } else if (c > 0x80 && c <= 0xEF && c != 0xA0 && p < e ) {
434+ } else {
435+ /* Don't need to check p < e; it's not possible to go out of bounds here, due to e-- above */
433436 unsigned char c2 = * p ++ ;
434437 /* This is only legal if c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F
435438 * But the values in the above conversion tables have been chosen such that
436439 * illegal values of c2 will always result in w > jisx0208_ucs_table_size,
437- * so we don't need to do a separate bounds check on c2 */
440+ * so we don't need to do a separate bounds check on c2
441+ * Likewise, the values in the conversion tables are such that illegal values
442+ * for c will always result in w > jisx0208_ucs_table_size */
438443 uint32_t w = sjis_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
439444 if (w < jisx0208_ucs_table_size ) {
440445 w = jisx0208_ucs_table [w ];
441446 if (!w )
442447 w = MBFL_BAD_INPUT ;
443448 * out ++ = w ;
444449 } else {
450+ if (c == 0x80 || c == 0xA0 || c > 0xEF ) {
451+ p -- ;
452+ }
445453 * out ++ = MBFL_BAD_INPUT ;
446454 }
455+ }
456+ }
457+
458+ /* Finish up last byte of input string if there is one */
459+ if (p == e && out < limit ) {
460+ unsigned char c = * p ++ ;
461+ if (c <= 0x7F ) {
462+ * out ++ = c ;
463+ } else if (c >= 0xA1 && c <= 0xDF ) {
464+ * out ++ = 0xFEC0 + c ;
447465 } else {
448466 * out ++ = MBFL_BAD_INPUT ;
449467 }
450468 }
451469
452- * in_len = e - p ;
470+ * in_len = e - p + 1 ;
453471 * in = p ;
454472 return out - buf ;
455473}
@@ -1057,11 +1075,17 @@ static size_t mb_sjismac_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
10571075 while (p < e && out < limit ) {
10581076 unsigned char c = * p ++ ;
10591077
1060- if (c < 0x80 && c != 0x5C ) {
1061- * out ++ = c ;
1078+ if (c <= 0x80 || c == 0xA0 ) {
1079+ if (c == 0x5C ) {
1080+ * out ++ = 0xA5 ;
1081+ } else if (c == 0x80 ) {
1082+ * out ++ = 0x5C ;
1083+ } else {
1084+ * out ++ = c ;
1085+ }
10621086 } else if (c >= 0xA1 && c <= 0xDF ) {
10631087 * out ++ = 0xFEC0 + c ;
1064- } else if (c > 0x80 && c <= 0xED && c != 0xA0 ) {
1088+ } else if (c <= 0xED ) {
10651089 if (p == e ) {
10661090 * out ++ = MBFL_BAD_INPUT ;
10671091 break ;
@@ -1162,12 +1186,6 @@ static size_t mb_sjismac_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
11621186 } else {
11631187 * out ++ = MBFL_BAD_INPUT ;
11641188 }
1165- } else if (c == 0x5C ) {
1166- * out ++ = 0xA5 ;
1167- } else if (c == 0x80 ) {
1168- * out ++ = 0x5C ;
1169- } else if (c == 0xA0 ) {
1170- * out ++ = 0xA0 ;
11711189 } else if (c == 0xFD ) {
11721190 * out ++ = 0xA9 ;
11731191 } else if (c == 0xFE ) {
@@ -2095,6 +2113,10 @@ int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
20952113 return 0 ;
20962114}
20972115
2116+ static const unsigned short sjis_mobile_decode_tbl1 [] = {
2117+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0xFFFF , 0 , 188 , 376 , 564 , 752 , 940 , 1128 , 1316 , 1504 , 1692 , 1880 , 2068 , 2256 , 2444 , 2632 , 2820 , 3008 , 3196 , 3384 , 3572 , 3760 , 3948 , 4136 , 4324 , 4512 , 4700 , 4888 , 5076 , 5264 , 5452 , 5640 , 0xFFFF , -6016 , -5828 , -5640 , -5452 , -5264 , -5076 , -4888 , -4700 , -4512 , -4324 , -4136 , -3948 , -3760 , -3572 , -3384 , -3196 , -3008 , -2820 , -2632 , -2444 , -2256 , -2068 , -1880 , -1692 , -1504 , -1316 , -1128 , -940 , -752 , -564 , -376 , -188 , 0 , 188 , 376 , 564 , 752 , 940 , 1128 , 1316 , 1504 , 1692 , 1880 , 2068 , 2256 , 2444 , 2632 , 2820 , 3008 , 3196 , 3384 , 3572 , 3760 , 3948 , 4136 , 4324 , 4512 , 4700 , 4888 , 5076 , 5264 , 5452 , 5640 , 5828 , 6016 , 6204 , 6392 , 6580 , 6768 , 6956 , 7144 , 7332 , 7520 , 7708 , 7896 , 8084 , 8272 , 8460 , 8648 , 8836 , 9024 , 9212 , 9400 , 9588 , 9776 , 9964 , 10152 , 10340 , 10528 , 10716 , 10904 , 11092 , 0xFFFF , 0xFFFF , 0xFFFF
2118+ };
2119+
20982120static size_t mb_sjis_docomo_to_wchar (unsigned char * * in , size_t * in_len , uint32_t * buf , size_t bufsize , unsigned int * state )
20992121{
21002122 unsigned char * p = * in , * e = p + * in_len ;
@@ -2110,14 +2132,14 @@ static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32
21102132 } else if (c >= 0xA1 && c <= 0xDF ) {
21112133 /* Kana */
21122134 * out ++ = 0xFEC0 + c ;
2113- } else if ( c > 0x80 && c < 0xFD && c != 0xA0 ) {
2135+ } else {
21142136 /* Kanji */
21152137 if (p == e ) {
21162138 * out ++ = MBFL_BAD_INPUT ;
21172139 break ;
21182140 }
21192141 unsigned char c2 = * p ++ ;
2120- uint32_t w = sjis_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
2142+ uint32_t w = sjis_mobile_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
21212143
21222144 if (w <= 137 ) {
21232145 if (w == 31 ) {
@@ -2161,13 +2183,14 @@ static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32
21612183 } else if (w >= (94 * 94 ) && w < (114 * 94 )) {
21622184 w = w - (94 * 94 ) + 0xE000 ;
21632185 } else {
2186+ if (c == 0x80 || c == 0xA0 || c >= 0xFD ) {
2187+ p -- ;
2188+ }
21642189 * out ++ = MBFL_BAD_INPUT ;
21652190 continue ;
21662191 }
21672192
21682193 * out ++ = w ? w : MBFL_BAD_INPUT ;
2169- } else {
2170- * out ++ = MBFL_BAD_INPUT ;
21712194 }
21722195 }
21732196
@@ -2337,14 +2360,14 @@ static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t
23372360 } else if (c >= 0xA1 && c <= 0xDF ) {
23382361 /* Kana */
23392362 * out ++ = 0xFEC0 + c ;
2340- } else if ( c > 0x80 && c < 0xFD && c != 0xA0 ) {
2363+ } else {
23412364 /* Kanji */
23422365 if (p == e ) {
23432366 * out ++ = MBFL_BAD_INPUT ;
23442367 break ;
23452368 }
23462369 unsigned char c2 = * p ++ ;
2347- uint32_t w = sjis_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
2370+ uint32_t w = sjis_mobile_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
23482371
23492372 if (w <= 137 ) {
23502373 if (w == 31 ) {
@@ -2375,7 +2398,7 @@ static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t
23752398 int snd = 0 ;
23762399 w = mbfilter_sjis_emoji_kddi2unicode (w , & snd );
23772400 if (!w ) {
2378- w = sjis_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
2401+ w = sjis_mobile_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
23792402 if (w >= (94 * 94 ) && w < (114 * 94 )) {
23802403 w = w - (94 * 94 ) + 0xE000 ;
23812404 }
@@ -2393,13 +2416,14 @@ static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t
23932416 } else if (w >= (94 * 94 ) && w < (114 * 94 )) {
23942417 w = w - (94 * 94 ) + 0xE000 ;
23952418 } else {
2419+ if (c == 0x80 || c == 0xA0 || c >= 0xFD ) {
2420+ p -- ;
2421+ }
23962422 * out ++ = MBFL_BAD_INPUT ;
23972423 continue ;
23982424 }
23992425
24002426 * out ++ = w ? w : MBFL_BAD_INPUT ;
2401- } else {
2402- * out ++ = MBFL_BAD_INPUT ;
24032427 }
24042428 }
24052429
@@ -2645,14 +2669,14 @@ static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
26452669 } else if (c >= 0xA1 && c <= 0xDF ) {
26462670 /* Kana */
26472671 * out ++ = 0xFEC0 + c ;
2648- } else if ( c > 0x80 && c < 0xFD && c != 0xA0 ) {
2672+ } else {
26492673 /* Kanji */
26502674 if (p == e ) {
26512675 * out ++ = MBFL_BAD_INPUT ;
26522676 break ;
26532677 }
26542678 unsigned char c2 = * p ++ ;
2655- uint32_t w = sjis_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
2679+ uint32_t w = sjis_mobile_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
26562680
26572681 if (w <= 137 ) {
26582682 if (w == 31 ) {
@@ -2683,7 +2707,7 @@ static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
26832707 int snd = 0 ;
26842708 w = mbfilter_sjis_emoji_sb2unicode (w , & snd );
26852709 if (!w ) {
2686- w = sjis_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
2710+ w = sjis_mobile_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
26872711 if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max ) {
26882712 w = cp932ext3_ucs_table [w - cp932ext3_ucs_table_min ];
26892713 } else if (w >= (94 * 94 ) && w < (114 * 94 )) {
@@ -2703,13 +2727,14 @@ static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
27032727 } else if (w >= (94 * 94 ) && w < (114 * 94 )) {
27042728 w = w - (94 * 94 ) + 0xE000 ;
27052729 } else {
2730+ if (c == 0x80 || c == 0xA0 || c >= 0xFD ) {
2731+ p -- ;
2732+ }
27062733 * out ++ = MBFL_BAD_INPUT ;
27072734 continue ;
27082735 }
27092736
27102737 * out ++ = w ? w : MBFL_BAD_INPUT ;
2711- } else {
2712- * out ++ = MBFL_BAD_INPUT ;
27132738 }
27142739 }
27152740
0 commit comments