@@ -101,3 +101,63 @@ fn test_string_to_ansi_not_null_terminated() {
101101fn test_utf8_str_visible_length () {
102102 assert utf8_str_visible_length ('𝐀𝐁𝐂' ) == 3
103103}
104+
105+ fn test_utf8_to_utf32_cases () {
106+ test_case1 := 'A' .bytes ()
107+ assert impl_utf8_to_utf32 (& u8 (test_case1 .data), test_case1 .len) == rune (`A` )
108+
109+ test_case2 := 'é' .bytes ()
110+ assert impl_utf8_to_utf32 (& u8 (test_case2 .data), test_case2 .len) == rune (`é` )
111+
112+ test_case3 := '€' .bytes ()
113+ assert impl_utf8_to_utf32 (& u8 (test_case3 .data), test_case3 .len) == rune (`€` )
114+
115+ test_case4 := '𐍈' .bytes ()
116+ assert impl_utf8_to_utf32 (& u8 (test_case4 .data), test_case4 .len) == rune (0x10348 )
117+ assert impl_utf8_to_utf32 (& u8 (test_case4 .data), test_case4 .len) == rune (`𐍈` )
118+
119+ test_case5 := '中' .bytes ()
120+ assert impl_utf8_to_utf32 (& u8 (test_case5 .data), test_case5 .len) == rune (0x4E2D )
121+ assert impl_utf8_to_utf32 (& u8 (test_case5 .data), test_case5 .len) == rune (`中` )
122+
123+ // emoji, 4-byte UTF-8
124+ test_case6 := '😀' .bytes ()
125+ assert impl_utf8_to_utf32 (& u8 (test_case6 .data), test_case6 .len) == rune (0x1F600 )
126+ assert impl_utf8_to_utf32 (& u8 (test_case6 .data), test_case6 .len) == `😀`
127+
128+ test_case7 := 'Ж' .bytes ()
129+ assert impl_utf8_to_utf32 (& u8 (test_case7 .data), test_case7 .len) == rune (`Ж` )
130+
131+ test_case8 := 'م' .bytes ()
132+ assert impl_utf8_to_utf32 (& u8 (test_case8 .data), test_case8 .len) == rune (`م` )
133+
134+ test_case9 := '߿' .bytes ()
135+ assert impl_utf8_to_utf32 (& u8 (test_case9 .data), test_case9 .len) == rune (0x07FF )
136+ assert impl_utf8_to_utf32 (& u8 (test_case9 .data), test_case9 .len) == rune (`߿` )
137+
138+ test_case10 := 'ࠀ' .bytes ()
139+ assert impl_utf8_to_utf32 (& u8 (test_case10 .data), test_case10 .len) == rune (0x0800 )
140+ assert impl_utf8_to_utf32 (& u8 (test_case10 .data), test_case10 .len) == rune (`ࠀ` )
141+
142+ test_case11 := '' .bytes ()
143+ assert impl_utf8_to_utf32 (& u8 (test_case11 .data), test_case11 .len) == rune (0xFFFF )
144+ assert impl_utf8_to_utf32 (& u8 (test_case11 .data), test_case11 .len) == rune (`` )
145+
146+ test_case12 := '𐀀' .bytes ()
147+ assert impl_utf8_to_utf32 (& u8 (test_case12 .data), test_case12 .len) == rune (0x10000 )
148+ assert impl_utf8_to_utf32 (& u8 (test_case12 .data), test_case12 .len) == rune (`𐀀` )
149+
150+ test_case13 := '' .bytes ()
151+ assert impl_utf8_to_utf32 (& u8 (test_case13 .data), test_case13 .len) == rune (0x10FFFF )
152+ assert impl_utf8_to_utf32 (& u8 (test_case13 .data), test_case13 .len) == rune (`` )
153+ }
154+
155+ fn test_utf8_to_utf32_invalid_length () {
156+ // More than 4 bytes is invalid
157+ invalid := [u8 (0xF0 ), 0x9F , 0x98 , 0x80 , 0x00 ]
158+ assert impl_utf8_to_utf32 (& u8 (invalid.data), invalid.len) == 0
159+ }
160+
161+ fn test_utf8_to_utf32_empty () {
162+ assert impl_utf8_to_utf32 (& u8 ([]u8 {}.data), 0 ) == 0
163+ }
0 commit comments