@@ -577,14 +577,28 @@ d := b + x // d is of type `f64` - automatic promotion of `x`'s value
577577
578578### Strings
579579
580- ``` v nofmt
580+ In V, strings are encoded in UTF-8, and are immutable (read-only) by default:
581+
582+ ``` v
583+ s := 'hello 🌎' // the `world` emoji takes 4 bytes, and string length is reported in bytes
584+ assert s.len == 10
585+
586+ arr := s.bytes() // convert `string` to `[]u8`
587+ assert arr.len == 10
588+
589+ s2 := arr.bytestr() // convert `[]u8` to `string`
590+ assert s2 == s
591+
581592name := 'Bob'
582- assert name.len == 3 // will print 3
583- assert name[0] == u8(66) // indexing gives a byte, u8(66) == `B`
584- assert name[1..3] == 'ob' // slicing gives a string 'ob'
593+ assert name.len == 3
594+ // indexing gives a byte, u8(66) == `B`
595+ assert name[0] == u8(66)
596+ // slicing gives a string 'ob'
597+ assert name[1..3] == 'ob'
585598
586599// escape codes
587- windows_newline := '\r\n' // escape special characters like in C
600+ // escape special characters like in C
601+ windows_newline := '\r\n'
588602assert windows_newline.len == 2
589603
590604// arbitrary bytes can be directly specified using `\x##` notation where `#` is
@@ -601,23 +615,11 @@ assert aardvark_str2 == 'aardvark'
601615// and will be converted internally to its UTF-8 representation
602616star_str := '\u2605' // ★
603617assert star_str == '★'
604- assert star_str == '\xe2\x98\x85' // UTF-8 can be specified this way too.
605- ```
606-
607- In V, strings are read-only, and Unicode characters are encoded in UTF-8:
608-
609- ``` v
610- s := 'hello 🌎' // emoji takes 4 bytes
611- assert s.len == 10
612-
613- arr := s.bytes() // convert `string` to `[]u8`
614- assert arr.len == 10
615-
616- s2 := arr.bytestr() // convert `[]u8` to `string`
617- assert s2 == s
618+ // UTF-8 can be specified this way too, as individual bytes.
619+ assert star_str == '\xe2\x98\x85'
618620```
619621
620- String values are immutable. You cannot mutate elements :
622+ Since strings are immutable, you cannot directly change characters in a string :
621623
622624``` v failcompile
623625mut s := 'hello 🌎'
@@ -643,17 +645,20 @@ _are_ any non-ASCII characters.
643645
644646``` v
645647mut s := 'hello 🌎'
648+ // there are 10 bytes in the string (as shown earlier), but only 7 runes, since the `world` emoji
649+ // only counts as one `rune` (one Unicode character)
650+ assert s.runes().len == 7
646651println(s.runes()[6])
647652```
648653
649- If you want the code point from a specific ` string ` index or other more advanced
650- utf8 processing and conversions, refer to the
651- [ vlib/encoding. utf8] ( https://modules.vlang.io/encoding.utf8.html ) module.
654+ If you want the code point from a specific ` string ` index or other more advanced UTF-8 processing
655+ and conversions, refer to the
656+ [ vlib/encoding/ utf8] ( https://modules.vlang.io/encoding.utf8.html ) module.
652657
653658Both single and double quotes can be used to denote strings. For consistency, ` vfmt ` converts double
654659quotes to single quotes unless the string contains a single quote character.
655660
656- For raw strings, prepend ` r ` . Escape handling is not done for raw strings :
661+ Prepend ` r ` for raw strings. Escapes are not handled, so you will get exacly what you type :
657662
658663``` v
659664s := r'hello\nworld' // the `\n` will be preserved as two characters
@@ -7797,7 +7802,7 @@ Ordinary zero terminated C strings can be converted to V strings with
77977802> If you need to make a copy of the C string (some libc APIs like `getenv` pretty much require that,
77987803> since they return pointers to internal libc memory), you can use `cstring_to_vstring(cstring)`.
77997804
7800- On Windows, C APIs often return so called `wide` strings (utf16 encoding).
7805+ On Windows, C APIs often return so called `wide` strings (UTF-16 encoding).
78017806These can be converted to V strings with `string_from_wide(&u16(cwidestring))` .
78027807
78037808V has these types for easier interoperability with C:
0 commit comments