Skip to content

Commit fced8de

Browse files
committed
v.scanner: eliminate str_helper_tokens changes outside string interpolation (fix speed loss after 106da40)
1 parent 64bade1 commit fced8de

2 files changed

Lines changed: 65 additions & 34 deletions

File tree

‎vlib/v/scanner/scanner.v‎

Lines changed: 36 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ pub mut:
5555
is_nested_string bool // '${'abc':-12s}'
5656
is_inter_start bool // for hacky string interpolation TODO simplify
5757
is_inter_end bool
58-
str_helper_tokens []u8 // ', ", 0 (string interpolation with lcbr), { (block)
58+
str_helper_tokens []u8 = []u8{cap: 16} // ', ", 0 (string interpolation with lcbr), { (block)
5959
line_comment string
6060
last_lt int = -1 // position of latest <
6161
is_print_line_on_error bool
@@ -78,11 +78,11 @@ pub mut:
7878
should_abort bool // when too many errors/warnings/notices are accumulated, should_abort becomes true, and the scanner should stop
7979

8080
// the following are used only inside ident_string, but are here to avoid allocating new arrays for the most common case of strings without escapes
81-
all_pos []int
82-
u16_escapes_pos []int // pos list of \uXXXX
83-
u32_escapes_pos []int // pos list of \UXXXXXXXX
84-
h_escapes_pos []int // pos list of \xXX
85-
str_segments []string
81+
all_pos []int = []int{cap: 30}
82+
u16_escapes_pos []int = []int{cap: 10} // pos list of \uXXXX
83+
u32_escapes_pos []int = []int{cap: 10} // pos list of \UXXXXXXXX
84+
h_escapes_pos []int = []int{cap: 10} // pos list of \xXX
85+
str_segments []string = []string{cap: 10}
8686
}
8787

8888
/*
@@ -154,7 +154,13 @@ const internally_generated_v_code = 'internally_generated_v_code'
154154

155155
// new scanner from string.
156156
pub fn new_scanner(text string, comments_mode CommentsMode, pref_ &pref.Preferences) &Scanner {
157-
mut s := &Scanner{
157+
mut s := new_plain_scanner(text, comments_mode, pref_)
158+
s.scan_all_tokens_in_buffer()
159+
return s
160+
}
161+
162+
fn new_plain_scanner(text string, comments_mode CommentsMode, pref_ &pref.Preferences) &Scanner {
163+
return &Scanner{
158164
pref: pref_
159165
text: text
160166
all_tokens: []token.Token{cap: text.len / 3}
@@ -166,8 +172,6 @@ pub fn new_scanner(text string, comments_mode CommentsMode, pref_ &pref.Preferen
166172
file_path: internally_generated_v_code
167173
file_base: internally_generated_v_code
168174
}
169-
s.scan_all_tokens_in_buffer()
170-
return s
171175
}
172176

173177
@[unsafe]
@@ -832,11 +836,14 @@ pub fn (mut s Scanner) text_scan() token.Token {
832836
return s.new_token(.rsbr, '', 1)
833837
}
834838
`{` {
835-
// Skip { in `${` in strings
836-
if 255 != s.str_quote() {
837-
s.str_helper_tokens << 0
838-
} else {
839-
s.str_helper_tokens << c
839+
// Keep interpolation helper state only while scanning string interpolation.
840+
if s.str_helper_tokens.len > 0 {
841+
// Skip { in `${` in strings
842+
if 255 != s.str_quote() {
843+
s.str_helper_tokens << 0
844+
} else {
845+
s.str_helper_tokens << c
846+
}
840847
}
841848
if s.is_inside_string && s.text[s.pos - 1] == `$` {
842849
continue
@@ -855,24 +862,23 @@ pub fn (mut s Scanner) text_scan() token.Token {
855862
// s = `hello ${name} !`
856863
if s.str_helper_tokens.len > 0 {
857864
s.str_helper_tokens.delete_last()
858-
}
859-
quote := s.str_quote()
860-
if 255 != quote {
861-
if s.pos < s.text.len - 1 {
862-
s.pos++
863-
} else {
864-
s.error('unfinished string literal')
865-
}
866-
if s.text[s.pos] == quote {
867-
s.is_inside_string = false
868-
s.str_helper_tokens.delete_last()
869-
return s.new_token(.string, '', 1)
865+
quote := s.str_quote()
866+
if 255 != quote {
867+
if s.pos < s.text.len - 1 {
868+
s.pos++
869+
} else {
870+
s.error('unfinished string literal')
871+
}
872+
if s.text[s.pos] == quote {
873+
s.is_inside_string = false
874+
s.str_helper_tokens.delete_last()
875+
return s.new_token(.string, '', 1)
876+
}
877+
ident_string := s.ident_string()
878+
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
870879
}
871-
ident_string := s.ident_string()
872-
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
873-
} else {
874-
return s.new_token(.rcbr, '', 1)
875880
}
881+
return s.new_token(.rcbr, '', 1)
876882
}
877883
`&` {
878884
if nextc == `&` {

‎vlib/v/scanner/scanner_test.v‎

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ import v.token
55
import v.pref
66

77
fn scan_kinds(text string) []token.Kind {
8-
mut scanner := new_scanner(text, .skip_comments, &pref.Preferences{})
8+
mut scanner := new_plain_scanner(text, .skip_comments, &pref.Preferences{})
99
mut token_kinds := []token.Kind{}
1010
for {
11-
tok := scanner.scan()
11+
tok := scanner.text_scan()
1212
if tok.kind == .eof {
1313
break
1414
}
@@ -18,10 +18,10 @@ fn scan_kinds(text string) []token.Kind {
1818
}
1919

2020
fn scan_tokens(text string) []token.Token {
21-
mut scanner := new_scanner(text, .parse_comments, &pref.Preferences{})
21+
mut scanner := new_plain_scanner(text, .parse_comments, &pref.Preferences{})
2222
mut tokens := []token.Token{}
2323
for {
24-
tok := scanner.scan()
24+
tok := scanner.text_scan()
2525
if tok.kind == .eof {
2626
break
2727
}
@@ -313,6 +313,31 @@ fn test_escape_string() {
313313
// result = scan_tokens(r'`hello`') // should always result in an error
314314
}
315315

316+
fn assert_str_interpolation_works(mlen int, text string) {
317+
mut max_len := 0
318+
mut scanner := new_plain_scanner(text, .skip_comments, &pref.Preferences{})
319+
for {
320+
tok := scanner.text_scan()
321+
if scanner.str_helper_tokens.len > max_len {
322+
max_len = scanner.str_helper_tokens.len
323+
}
324+
if tok.kind == .eof {
325+
break
326+
}
327+
}
328+
assert max_len == mlen
329+
assert scanner.errors.len == 0
330+
assert scanner.str_helper_tokens.len == 0
331+
}
332+
333+
fn test_string_interpolation_with_nested_string_does_not_grow_str_helper_tokens_too_much() {
334+
sinterpolation := " s := 'x \${if true { '{' } else { '}' }} y' "
335+
assert_str_interpolation_works(3, sinterpolation)
336+
assert_str_interpolation_works(3, sinterpolation + sinterpolation + sinterpolation)
337+
assert_str_interpolation_works(3, '{'.repeat(100) + sinterpolation + '}'.repeat(100))
338+
assert_str_interpolation_works(0, '{'.repeat(100) + '}'.repeat(100))
339+
}
340+
316341
fn test_comment_string() {
317342
mut result := scan_tokens('// single line comment will get an \\x01 prepended')
318343
assert result[0].kind == .comment

0 commit comments

Comments
 (0)