Skip to content

Commit 1fcb701

Browse files
committed
v2: lots of ssa builder and arm64 fixes
1 parent ad2a629 commit 1fcb701

16 files changed

Lines changed: 1292 additions & 174 deletions

File tree

‎vlib/v2/gen/arm64/arm64.v‎

Lines changed: 127 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,19 @@ pub fn (mut g Gen) gen() {
100100
g.macho.data_data << bytes
101101
}
102102
else {
103-
// Non-scalar constants are emitted as zero-initialized storage for now.
104-
for _ in 0 .. size {
105-
g.macho.data_data << 0
103+
// For struct constants (e.g., sum types), emit initial_value as first 8 bytes
104+
// (the tag for sum types), then zeros for the rest.
105+
if gvar.initial_value != 0 && size >= 8 {
106+
mut bytes := []u8{len: 8}
107+
binary.little_endian_put_u64(mut bytes, u64(gvar.initial_value))
108+
g.macho.data_data << bytes
109+
for _ in 0 .. size - 8 {
110+
g.macho.data_data << 0
111+
}
112+
} else {
113+
for _ in 0 .. size {
114+
g.macho.data_data << 0
115+
}
106116
}
107117
}
108118
}
@@ -980,6 +990,29 @@ fn (mut g Gen) gen_instr(val_id int) {
980990
g.emit_add_fp_imm(8, data_off)
981991
g.store_reg_to_val(8, val_id)
982992
}
993+
.heap_alloc {
994+
// Heap-allocate memory for a struct type.
995+
// Result type is ptr(T), compute sizeof(T) and call calloc(1, size).
996+
mut alloc_size := 8
997+
ha_val := g.mod.values[val_id]
998+
if ha_val.typ > 0 && ha_val.typ < g.mod.type_store.types.len {
999+
ptr_typ := g.mod.type_store.types[ha_val.typ]
1000+
if ptr_typ.kind == .ptr_t && ptr_typ.elem_type > 0 {
1001+
alloc_size = g.type_size(ptr_typ.elem_type)
1002+
if alloc_size <= 0 {
1003+
alloc_size = 8
1004+
}
1005+
}
1006+
}
1007+
// calloc(1, size) → x0 = 1, x1 = size
1008+
g.emit_mov_imm(0, 1)
1009+
g.emit_mov_imm(1, u64(alloc_size))
1010+
sym_idx := g.macho.add_undefined('_calloc')
1011+
g.macho.add_reloc(g.macho.text_data.len, sym_idx, arm64_reloc_branch26, true)
1012+
g.emit(asm_bl_reloc())
1013+
// calloc returns heap pointer in x0
1014+
g.store_reg_to_val(0, val_id)
1015+
}
9831016
.get_element_ptr {
9841017
// GEP: Base + scaled index (or struct field offset for aggregate pointers)
9851018
base_reg := g.get_operand_reg(instr.operands[0], 8)
@@ -1448,6 +1481,12 @@ fn (mut g Gen) gen_instr(val_id int) {
14481481
// Small struct (≤ 16 bytes) - return in registers x0, x1
14491482
actual_struct_typ := if is_indirect_struct_return { fn_ret_typ } else { ret_typ }
14501483

1484+
// Ensure string literals are materialized on the stack
1485+
// before we try to load their fields into return registers.
1486+
if ret_val.kind == .string_literal {
1487+
g.load_val_to_reg(9, ret_val_id)
1488+
}
1489+
14511490
if is_indirect_struct_return {
14521491
// Return value is a pointer to struct - load each field via the pointer
14531492
g.load_val_to_reg(8, ret_val_id)
@@ -1636,6 +1675,24 @@ fn (mut g Gen) gen_instr(val_id int) {
16361675
dest_id := instr.operands[0]
16371676
src_id := instr.operands[1]
16381677
mut handled_aggregate_copy := false
1678+
// Diagnostic: check if assign involves multi-word dest
1679+
if dest_id > 0 && dest_id < g.mod.values.len {
1680+
diag_dt := g.mod.values[dest_id].typ
1681+
if diag_dt > 0 && diag_dt < g.mod.type_store.types.len {
1682+
diag_dsz := g.type_size(diag_dt)
1683+
if diag_dsz > 8 {
1684+
src_kind_str := if src_id > 0 && src_id < g.mod.values.len {
1685+
'kind=${g.mod.values[src_id].kind} typ_kind=${g.mod.type_store.types[g.mod.values[src_id].typ].kind} name="${g.mod.values[src_id].name}"'
1686+
} else {
1687+
'invalid_src'
1688+
}
1689+
dest_kind_str := 'kind=${g.mod.values[dest_id].kind} typ_kind=${g.mod.type_store.types[diag_dt].kind} name="${g.mod.values[dest_id].name}"'
1690+
has_src_stack := src_id in g.stack_map
1691+
has_dest_stack := dest_id in g.stack_map
1692+
eprintln('DIAG ASSIGN multi: dest[${dest_id}]={${dest_kind_str} sz=${diag_dsz} stk=${has_dest_stack}} src[${src_id}]={${src_kind_str} stk=${has_src_stack}} fn=${g.cur_func_name}')
1693+
}
1694+
}
1695+
}
16391696
if dest_id > 0 && dest_id < g.mod.values.len {
16401697
dest_typ_id := g.mod.values[dest_id].typ
16411698
if dest_typ_id > 0 && dest_typ_id < g.mod.type_store.types.len {
@@ -1685,10 +1742,25 @@ fn (mut g Gen) gen_instr(val_id int) {
16851742
}
16861743
}
16871744
if can_copy {
1688-
for i in 0 .. num_chunks {
1745+
// Determine how many chunks the source actually has
1746+
mut src_chunks := num_chunks
1747+
if src_id > 0 && src_id < g.mod.values.len {
1748+
src_sz := g.type_size(g.mod.values[src_id].typ)
1749+
if src_sz > 0 && src_sz < dest_size {
1750+
src_chunks = (src_sz + 7) / 8
1751+
}
1752+
}
1753+
for i in 0 .. src_chunks {
16891754
g.emit(asm_ldr_imm(Reg(10), Reg(src_ptr_reg), u32(i)))
16901755
g.emit_str_reg_offset(10, 29, dest_off + i * 8)
16911756
}
1757+
// Zero-fill remaining chunks if source is smaller
1758+
if src_chunks < num_chunks {
1759+
g.emit_mov_reg(10, 31) // xzr
1760+
for i in src_chunks .. num_chunks {
1761+
g.emit_str_reg_offset(10, 29, dest_off + i * 8)
1762+
}
1763+
}
16921764
handled_aggregate_copy = true
16931765
}
16941766
}
@@ -1698,6 +1770,38 @@ fn (mut g Gen) gen_instr(val_id int) {
16981770
if handled_aggregate_copy {
16991771
return
17001772
}
1773+
// For multi-word struct destinations with constant sources (undef/0),
1774+
// zero-fill all chunks instead of storing a single register.
1775+
if dest_id > 0 && dest_id < g.mod.values.len {
1776+
d_typ_id := g.mod.values[dest_id].typ
1777+
if d_typ_id > 0 && d_typ_id < g.mod.type_store.types.len {
1778+
d_sz := g.type_size(d_typ_id)
1779+
if d_sz > 8 {
1780+
is_const_src := src_id > 0 && src_id < g.mod.values.len
1781+
&& g.mod.values[src_id].kind == .constant
1782+
if is_const_src {
1783+
if d_off := g.stack_map[dest_id] {
1784+
num_chunks := (d_sz + 7) / 8
1785+
g.emit_mov_reg(10, 31) // xzr
1786+
for ci in 0 .. num_chunks {
1787+
g.emit_str_reg_offset(10, 29, d_off + ci * 8)
1788+
}
1789+
return
1790+
}
1791+
}
1792+
}
1793+
}
1794+
}
1795+
// Check if this single-reg fallback is for a multi-word dest
1796+
if dest_id > 0 && dest_id < g.mod.values.len {
1797+
fb_dt := g.mod.values[dest_id].typ
1798+
if fb_dt > 0 && fb_dt < g.mod.type_store.types.len {
1799+
fb_dsz := g.type_size(fb_dt)
1800+
if fb_dsz > 8 {
1801+
eprintln('WARN ASSIGN single-reg fallback for multi-word dest! dest_sz=${fb_dsz} fn=${g.cur_func_name}')
1802+
}
1803+
}
1804+
}
17011805
g.load_val_to_reg(8, src_id)
17021806
g.store_reg_to_val(8, dest_id)
17031807
}
@@ -1758,6 +1862,8 @@ fn (mut g Gen) gen_instr(val_id int) {
17581862
}
17591863
}
17601864
}
1865+
} else {
1866+
// typ out of range — use default field_byte_off and field_elem_size
17611867
}
17621868

17631869
// If the tuple source is a string_literal (e.g. after mem2reg
@@ -1877,6 +1983,19 @@ fn (mut g Gen) gen_instr(val_id int) {
18771983
struct_size := g.type_size(instr.typ)
18781984
num_chunks := if struct_size > 0 { (struct_size + 7) / 8 } else { 1 }
18791985

1986+
// Diagnostic: check sum type struct_init for zero _data
1987+
if struct_typ.field_names.len == 2 && struct_typ.field_names[0] == '_tag'
1988+
&& struct_typ.field_names[1] == '_data' && instr.operands.len >= 2 {
1989+
tag_id := instr.operands[0]
1990+
data_id := instr.operands[1]
1991+
tag_val := g.mod.values[tag_id]
1992+
data_val := g.mod.values[data_id]
1993+
if tag_val.kind == .constant && tag_val.name != '0' && data_val.kind == .constant
1994+
&& data_val.name == '0' {
1995+
eprintln('DIAG: struct_init sum type with _tag=${tag_val.name} but _data=0 (const zero)! fn=${g.cur_func_name} val_id=${val_id} data_id=${data_id}')
1996+
}
1997+
}
1998+
18801999
// Zero-initialize the entire struct first
18812000
g.emit_mov_reg(9, 31) // xzr
18822001
for i in 0 .. num_chunks {
@@ -1934,7 +2053,9 @@ fn (mut g Gen) gen_instr(val_id int) {
19342053
g.emit_str_reg_offset(10, 29, result_offset + field_off + w * 8)
19352054
}
19362055
} else {
1937-
// Fallback: store first word
2056+
// Fallback: store first word only
2057+
in_reg := field_id in g.reg_map
2058+
eprintln('WARN: struct_init multi-word field fallback: field_id=${field_id} field_size=${field_size} field_chunks=${field_chunks} in_reg=${in_reg} val_kind=${field_val.kind} val_name="${field_val.name}" fn=${g.cur_func_name}')
19382059
g.load_val_to_reg(8, field_id)
19392060
g.emit_str_reg_offset(8, 29, result_offset + field_off)
19402061
}
@@ -3073,7 +3194,7 @@ fn (mut g Gen) allocate_registers(func mir.Function) {
30733194
}
30743195

30753196
instr := g.mod.instrs[val.index]
3076-
if instr.op in [.call, .call_indirect, .call_sret] {
3197+
if instr.op in [.call, .call_indirect, .call_sret, .heap_alloc] {
30773198
call_indices << instr_idx
30783199
}
30793200

‎vlib/v2/gen/arm64/linker.v‎

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,28 @@ const force_external_syms = ['_malloc', '_free', '_calloc', '_realloc', '_exit',
5858
'_memmove', '_memset', '_memcmp', '___stdoutp', '___stderrp', '_puts', '_printf', '_write',
5959
'_read', '_open', '_close', '_fwrite', '_fflush', '_fopen', '_fclose', '_putchar', '_sprintf',
6060
'_snprintf', '_fprintf', '_sscanf', '_mmap', '_munmap', '_getcwd', '_access', '_readlink',
61-
'_getenv', '_strlen']
61+
'_getenv', '_strlen',
62+
// Filesystem/directory operations '_opendir', '_readdir', '_closedir', '_mkdir', '_rmdir',
63+
'_unlink', '_rename', '_remove', '_stat', '_lstat', '_fstat', '_chmod', '_chdir', '_realpath',
64+
'_symlink', '_link',
65+
// Process/system '_getpid', '_getuid', '_geteuid', '_fork', '_execve', '_execvp', '_waitpid',
66+
'_kill', '_system', '_posix_spawn', '_signal', '_atexit',
67+
// I/O '_fgets', '_fputs', '_fread', '_fseek', '_ftell', '_rewind', '_fileno', '_popen',
68+
'_pclose', '_dup', '_dup2', '_pipe', '_isatty', '_freopen', '_dprintf', '_getc',
69+
// String/memory '_strdup', '_strcmp', '_strncmp', '_strchr', '_strrchr', '_strerror',
70+
'_strncasecmp', '_strcasecmp', '_atoi', '_atof', '_qsort',
71+
// Time '_time', '_localtime_r', '_gmtime_r', '_mktime', '_gettimeofday',
72+
'_clock_gettime_nsec_np', '_mach_absolute_time', '_mach_timebase_info', '_nanosleep', '_sleep',
73+
'_usleep', '_strftime',
74+
// Other '_rand', '_srand', '_isdigit', '_isspace', '_tolower', '_toupper', '_setenv',
75+
'_unsetenv', '_sysconf', '_uname', '_gethostname', '_pthread_mutex_init', '_pthread_mutex_lock',
76+
'_pthread_mutex_unlock', '_pthread_mutex_destroy', '_pthread_self', '_arc4random_buf',
77+
'_proc_pidpath', '_backtrace', '_backtrace_symbols_fd',
78+
// macOS specific '_dispatch_semaphore_create', '_dispatch_semaphore_signal',
79+
'_dispatch_semaphore_wait', '_dispatch_time', '_dispatch_release', '_setvbuf', '_setbuf',
80+
'_memchr', '_getlogin_r', '_getppid', '_getgid', '_getegid', '_ftruncate', '_mkstemp', '_statvfs',
81+
'_chown', '_sigaction', '_sigemptyset', '_sigaddset', '_sigprocmask', '_select', '_kqueue',
82+
'_abs']
6283

6384
pub struct Linker {
6485
macho &MachOObject
@@ -109,20 +130,20 @@ pub fn (mut l Linker) link(output_path string, entry_name string) {
109130
mut t := time.now()
110131
mut t_total := time.now()
111132

112-
// First pass: collect all defined symbols (except force_external_syms)
133+
// First pass: collect all defined symbols (except external ones)
113134
mut defined_syms := map[string]bool{}
114135
for sym in l.macho.symbols {
115136
// N_SECT (0x0E) means symbol is defined in a section
116137
if (sym.type_ & 0x0E) == 0x0E {
117-
// Don't track force_external symbols as defined - they should come from libc
138+
// Don't track external symbols as defined - they should come from libc
118139
if sym.name !in force_external_syms {
119140
defined_syms[sym.name] = true
120141
}
121142
}
122143
}
123144

124145
// Second pass: collect truly external symbols.
125-
// Only force_external_syms (libc functions) should go through GOT/stubs.
146+
// force_external_syms should go through GOT/stubs.
126147
// All other undefined symbols are internal V functions or V-embedded C functions
127148
// (like wyhash) that resolve to local stubs.
128149
for sym in l.macho.symbols {
@@ -382,8 +403,10 @@ pub fn (mut l Linker) link(output_path string, entry_name string) {
382403
}
383404

384405
fn (l Linker) codesign_output(output_path string) {
385-
// Skip external codesign — our built-in ad-hoc signature is sufficient
386-
// and codesign -s - -f can rewrite the binary layout, breaking stub→GOT references
406+
// Re-sign with system codesign to ensure valid signature for large binaries.
407+
// Our built-in ad-hoc signature works for small binaries but has issues with
408+
// large (30MB+) executables that cause dyld to hang.
409+
os.execute('codesign -s - -f ${output_path}')
387410
}
388411

389412
fn (mut l Linker) write_header(ncmds int, cmdsize int) {
@@ -882,11 +905,11 @@ fn (mut l Linker) write_text_with_relocations() {
882905
// Map symbol names to their defined addresses (for resolving undefined references)
883906
mut sym_name_to_addr := map[string]u64{}
884907

885-
// First pass: collect all defined symbol addresses (except force_external_syms)
908+
// First pass: collect all defined symbol addresses (except external syms)
886909
for i, sym in l.macho.symbols {
887910
// N_SECT (0x0E) means symbol is defined in a section
888911
if (sym.type_ & 0x0E) == 0x0E {
889-
// Skip force_external symbols - they should always resolve to libc
912+
// Skip external symbols - they should always resolve to libc
890913
is_external := sym.name in force_external_syms
891914
if sym.sect == 1 {
892915
// Text section symbol (code)
@@ -936,7 +959,7 @@ fn (mut l Linker) write_text_with_relocations() {
936959
sym_name := l.macho.symbols[r.sym_idx].name
937960
mut sym_addr := sym_addrs[r.sym_idx]
938961
if sym_name in force_external_syms {
939-
// Use stub address for force_external symbols
962+
// Use stub address for external symbols
940963
if sym_name in l.sym_to_got {
941964
got_idx := l.sym_to_got[sym_name]
942965
sym_addr = stubs_vmaddr + u64(got_idx * 12)

‎vlib/v2/gen/c/c.v‎

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ fn (g &Gen) is_stub_function(func ssa.Function) bool {
262262
if instr.op == .ret {
263263
continue
264264
}
265-
if instr.op == .alloca || instr.op == .load {
266-
// alloca+load for struct return stubs
265+
if instr.op in [.alloca, .heap_alloc, .load] {
266+
// alloca/heap_alloc+load for struct return stubs
267267
continue
268268
}
269269
real_instrs++
@@ -352,6 +352,15 @@ fn (mut g Gen) gen_function(func ssa.Function) {
352352
declared_vars[val.id] = true
353353
}
354354
}
355+
.heap_alloc {
356+
// Heap-allocate: declare as pointer and call calloc
357+
elem_type := g.mod.type_store.types[instr.typ].elem_type
358+
if elem_type != 0 {
359+
g.write_indent()
360+
g.sb.writeln('${g.type_name(elem_type)}* ${val.name} = (${g.type_name(elem_type)}*)calloc(1, sizeof(${g.type_name(elem_type)}));')
361+
declared_vars[val.id] = true
362+
}
363+
}
355364
.store {
356365
if instr.operands.len >= 2 {
357366
src := instr.operands[0]
@@ -360,8 +369,13 @@ fn (mut g Gen) gen_function(func ssa.Function) {
360369
src_val := g.mod.values[src]
361370
g.write_indent()
362371
if dst_val.kind == .global || (dst_val.kind == .instruction
363-
&& g.mod.instrs[dst_val.index].op == .alloca) {
364-
g.sb.write_string('${sanitize_c_ident(dst_val.name)} = ')
372+
&& g.mod.instrs[dst_val.index].op in [.alloca, .heap_alloc]) {
373+
if dst_val.kind == .instruction
374+
&& g.mod.instrs[dst_val.index].op == .heap_alloc {
375+
g.sb.write_string('*${sanitize_c_ident(dst_val.name)} = ')
376+
} else {
377+
g.sb.write_string('${sanitize_c_ident(dst_val.name)} = ')
378+
}
365379
// If src is an alloca and dst's elem type is a pointer,
366380
// we need & (e.g., storing &Point{} alloca into a Point* variable)
367381
if src_val.kind == .instruction

‎vlib/v2/gen/cleanc/expr.v‎

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,6 +1290,30 @@ fn (mut g Gen) gen_type_cast_expr(type_name string, expr ast.Expr) {
12901290
break
12911291
}
12921292
}
1293+
// If direct matching failed, try qualifying inner_type with the sum type's module prefix
1294+
// (e.g. 'Array_Attribute' → 'Array_ast__Attribute' when sum type is 'ast__Stmt')
1295+
if tag < 0 && type_name.contains('__') {
1296+
mod_prefix := type_name.all_before_last('__') + '__'
1297+
// Qualify the type: Array_X → Array_mod__X, or just X → mod__X
1298+
qualified := if inner_type.starts_with('Array_') && !inner_type[6..].contains('__') {
1299+
'Array_${mod_prefix}${inner_type[6..]}'
1300+
} else if inner_type.starts_with('Map_') && !inner_type[4..].contains('__') {
1301+
'Map_${mod_prefix}${inner_type[4..]}'
1302+
} else if !inner_type.contains('__') {
1303+
'${mod_prefix}${inner_type}'
1304+
} else {
1305+
''
1306+
}
1307+
if qualified != '' {
1308+
for i, v in variants {
1309+
if v == qualified {
1310+
tag = i
1311+
field_name = v
1312+
break
1313+
}
1314+
}
1315+
}
1316+
}
12931317
// If direct matching failed, check if inner_type is a known sum type
12941318
// that appears as a variant of the target sum type (e.g. ast__Type -> ast__Expr._Type)
12951319
if tag < 0 && inner_type in g.sum_type_variants {

‎vlib/v2/gen/cleanc/struct.v‎

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -363,13 +363,13 @@ fn (mut g Gen) infer_sum_variant_from_expr(type_name string, variants []string,
363363
v_short := if v.contains('__') { v.all_after_last('__') } else { v }
364364
if v_short == expr_variant_short || v == expr_variant || v == expr_variant_c {
365365
inner_type := if type_name.contains('__') {
366-
'${type_name.all_before_last('__')}__${v_short}'
366+
'${type_name.all_before_last('__')}__${v}'
367367
} else {
368-
v_short
368+
v
369369
}
370370
return SumVariantMatch{
371371
tag: i
372-
field_name: v_short
372+
field_name: v
373373
is_primitive: false
374374
inner_type: inner_type
375375
}

0 commit comments

Comments
 (0)