bench: a better V GC bench

medvednikov · medvednikov · commit e2b7ad7df2f7 · 2026-04-02T14:13:42.000+03:00
diff --git a/bench/bench_gc.v b/bench/bench_gc.v
@@ -1,10 +1,22 @@
-// bench_gc.v - Benchmark comparing VGC vs Boehm GC
-// Run with: v -gc boehm -o bench_boehm bench/bench_gc.v && ./bench_boehm
-//           v -gc vgc   -o bench_vgc   bench/bench_gc.v && ./bench_vgc
+// bench_gc.v - Benchmark comparing Boehm GC vs V GC (vgc)
+//
+// Usage (runner mode - compiles and compares both):
+//   v run bench/bench_gc.v
+//
+// Usage (individual, to test a specific GC mode):
+//   v -gc boehm -prod -o /tmp/bench_boehm bench/bench_gc.v && /tmp/bench_boehm
+//   v -gc vgc   -prod -o /tmp/bench_vgc   bench/bench_gc.v && /tmp/bench_vgc
+module main
+
+import os
 import time
 
 const n_allocs = 1_000_000
 const n_iters = 5
+const tree_depth = 18
+const tree_rounds = 10
+const array_rounds = 100
+const array_size = 100_000
 
 struct Node {
 mut:
@@ -13,16 +25,27 @@ mut:
 	value int
 }
 
+fn gc_mode_name() string {
+	return $if gcboehm ? {
+		'boehm'
+	} $else $if vgc ? {
+		'vgc'
+	} $else {
+		'none'
+	}
+}
+
+// --- Workloads ---
+
 // Allocate many small objects (typical GC workload)
-fn bench_small_allocs() {
+fn bench_small_allocs() (i64, int) {
 	sw := time.new_stopwatch()
 	mut sum := 0
 	for _ in 0 .. n_allocs {
 		s := 'hello world ${sum}'
 		sum += s.len
 	}
-	elapsed := sw.elapsed()
-	eprintln('  small allocs (${n_allocs}x string): ${elapsed.milliseconds()} ms  (sum=${sum})')
+	return sw.elapsed().milliseconds(), sum
 }
 
 // Build a binary tree (tests pointer-heavy allocation)
@@ -46,51 +69,247 @@ fn tree_sum(n &Node) int {
 	return n.value + tree_sum(n.left) + tree_sum(n.right)
 }
 
-fn bench_tree() {
+fn bench_tree() (i64, int) {
 	sw := time.new_stopwatch()
 	mut total := 0
-	for _ in 0 .. 10 {
-		t := make_tree(18)
+	for _ in 0 .. tree_rounds {
+		t := make_tree(tree_depth)
 		total += tree_sum(t)
 	}
-	elapsed := sw.elapsed()
-	eprintln('  tree build+walk (depth=18, 10x): ${elapsed.milliseconds()} ms  (sum=${total})')
+	return sw.elapsed().milliseconds(), total
 }
 
 // Allocate and discard arrays (tests realloc / growing)
-fn bench_arrays() {
+fn bench_arrays() (i64, int) {
 	sw := time.new_stopwatch()
 	mut total := 0
-	for _ in 0 .. 100 {
+	for _ in 0 .. array_rounds {
 		mut arr := []int{cap: 16}
-		for j in 0 .. 100_000 {
+		for j in 0 .. array_size {
 			arr << j
 		}
 		total += arr.len
 	}
-	elapsed := sw.elapsed()
-	eprintln('  array grow (100x 100k pushes): ${elapsed.milliseconds()} ms  (total=${total})')
+	return sw.elapsed().milliseconds(), total
 }
 
-fn main() {
-	gc_mode := $if gcboehm ? {
-		'boehm'
-	} $else $if vgc ? {
-		'vgc'
-	} $else {
-		'none'
+// Allocate many maps
+fn bench_maps() (i64, int) {
+	sw := time.new_stopwatch()
+	mut total := 0
+	for round in 0 .. 20 {
+		mut m := map[string]int{}
+		for j in 0 .. 10_000 {
+			m['key_${round}_${j}'] = j
+		}
+		total += m.len
 	}
-	eprintln('=== GC Benchmark (mode: ${gc_mode}) ===')
+	return sw.elapsed().milliseconds(), total
+}
 
-	for iter in 0 .. n_iters {
-		eprintln('--- iteration ${iter + 1}/${n_iters} ---')
-		bench_small_allocs()
-		bench_tree()
-		bench_arrays()
+// Mixed workload: interleave allocations and collections
+fn bench_mixed() (i64, int) {
+	sw := time.new_stopwatch()
+	mut total := 0
+	for _ in 0 .. 50 {
+		// strings
+		mut strs := []string{cap: 1000}
+		for j in 0 .. 1000 {
+			strs << 'item_${j}_${'x'.repeat(j % 50)}'
+		}
+		total += strs.len
+		// tree
+		t := make_tree(12)
+		total += tree_sum(t)
+		// array
+		mut arr := []int{cap: 10_000}
+		for j in 0 .. 10_000 {
+			arr << j * 2
+		}
+		total += arr.len
 	}
+	return sw.elapsed().milliseconds(), total
+}
+
+// --- Workload runner (compiled with a specific GC) ---
 
+fn run_workload() {
+	mode := gc_mode_name()
+	for iter in 0 .. n_iters {
+		ms_alloc, sum_alloc := bench_small_allocs()
+		ms_tree, sum_tree := bench_tree()
+		ms_array, sum_array := bench_arrays()
+		ms_map, sum_map := bench_maps()
+		ms_mixed, sum_mixed := bench_mixed()
+
+		// Machine-readable output: iter,test,ms,checksum
+		println('${iter},allocs,${ms_alloc},${sum_alloc}')
+		println('${iter},tree,${ms_tree},${sum_tree}')
+		println('${iter},arrays,${ms_array},${sum_array}')
+		println('${iter},maps,${ms_map},${sum_map}')
+		println('${iter},mixed,${ms_mixed},${sum_mixed}')
+	}
 	usage := gc_heap_usage()
-	eprintln('--- heap usage ---')
-	eprintln('  heap_size:  ${usage.heap_size / 1024} KB')
-	eprintln('  free_bytes: ${usage.free_bytes / 1024} KB')
+	println('heap,${usage.heap_size},${usage.free_bytes}')
+	println('gc_mode,${mode}')
+}
+
+// --- Runner mode: compile with both GCs and compare ---
+
+struct BenchData {
+mut:
+	times [5][]i64 // indexed by test (allocs=0, tree=1, arrays=2, maps=3, mixed=4)
+	heap  i64
+	free  i64
+	mode  string
+}
+
+fn test_index(name string) int {
+	return match name {
+		'allocs' { 0 }
+		'tree' { 1 }
+		'arrays' { 2 }
+		'maps' { 3 }
+		'mixed' { 4 }
+		else { -1 }
+	}
+}
+
+fn test_name(idx int) string {
+	return ['small allocs (${n_allocs}x string)',
+		'tree build+walk (depth=${tree_depth}, ${tree_rounds}x)',
+		'array grow (${array_rounds}x ${array_size} pushes)', 'map insert (20x 10k entries)',
+		'mixed workload (50 rounds)'][idx]
+}
+
+fn median(mut vals []i64) i64 {
+	if vals.len == 0 {
+		return 0
+	}
+	vals.sort(a < b)
+	return vals[vals.len / 2]
+}
+
+fn parse_output(output string) BenchData {
+	mut d := BenchData{}
+	for line in output.split_into_lines() {
+		parts := line.split(',')
+		if parts.len < 2 {
+			continue
+		}
+		if parts[0] == 'heap' && parts.len >= 3 {
+			d.heap = parts[1].i64()
+			d.free = parts[2].i64()
+		} else if parts[0] == 'gc_mode' {
+			d.mode = parts[1]
+		} else if parts.len >= 4 {
+			ti := test_index(parts[1])
+			if ti >= 0 {
+				d.times[ti] << parts[2].i64()
+			}
+		}
+	}
+	return d
+}
+
+fn rpad(s string, width int) string {
+	if s.len >= width {
+		return s
+	}
+	return s + ' '.repeat(width - s.len)
+}
+
+fn lpad(s string, width int) string {
+	if s.len >= width {
+		return s
+	}
+	return ' '.repeat(width - s.len) + s
+}
+
+fn run_comparison() {
+	v_exe := os.getenv_opt('VEXE') or { @VEXE }
+	src := os.join_path(@VMODROOT, 'bench', 'bench_gc.v')
+	tmp_boehm := os.join_path(os.temp_dir(), 'bench_gc_boehm')
+	tmp_vgc := os.join_path(os.temp_dir(), 'bench_gc_vgc')
+
+	println('=== GC Benchmark: Boehm vs VGC ===')
+	println('')
+
+	// Compile both
+	for gc_mode in ['boehm', 'vgc'] {
+		out := if gc_mode == 'boehm' { tmp_boehm } else { tmp_vgc }
+		print('compiling with -gc ${gc_mode}...')
+		flush_stdout()
+		r := os.execute('${v_exe} -gc ${gc_mode} -prod -o ${out} ${src}')
+		if r.exit_code != 0 {
+			eprintln(' FAILED')
+			eprintln(r.output)
+			exit(1)
+		}
+		println(' ok')
+	}
+
+	println('')
+
+	// Run both
+	mut data := [2]BenchData{}
+	for i, bin in [tmp_boehm, tmp_vgc] {
+		gc_name := if i == 0 { 'boehm' } else { 'vgc' }
+		print('running ${gc_name}...')
+		flush_stdout()
+		r := os.execute('${bin} --run-workload')
+		if r.exit_code != 0 {
+			eprintln(' FAILED')
+			eprintln(r.output)
+			exit(1)
+		}
+		data[i] = parse_output(r.output)
+		println(' done')
+	}
+
+	// Print comparison table
+	println('')
+	println('  ${rpad('test', 44)} ${lpad('boehm', 9)} ${lpad('vgc', 9)} ${lpad('ratio',
+		9)}')
+	println('  ${'—'.repeat(44)} ${'—'.repeat(9)} ${'—'.repeat(9)} ${'—'.repeat(9)}')
+
+	for ti in 0 .. 5 {
+		mut boehm_vals := data[0].times[ti].clone()
+		mut vgc_vals := data[1].times[ti].clone()
+		mb := median(mut boehm_vals)
+		mv := median(mut vgc_vals)
+		ratio := if mb > 0 { f64(mv) / f64(mb) } else { 0.0 }
+		winner := if ratio < 0.95 {
+			' <-- vgc'
+		} else if ratio > 1.05 {
+			' <-- boehm'
+		} else {
+			''
+		}
+		label := '${ratio:.2f}x${winner}'
+		println('  ${rpad(test_name(ti), 44)} ${lpad('${mb} ms', 9)} ${lpad('${mv} ms',
+			9)} ${lpad(label, 9)}')
+	}
+
+	// Heap usage
+	println('')
+	println('  heap usage:')
+	println('    boehm: ${data[0].heap / 1024} KB allocated, ${data[0].free / 1024} KB free')
+	println('    vgc:   ${data[1].heap / 1024} KB allocated, ${data[1].free / 1024} KB free')
+	println('')
+	println('  (ratio < 1.0 = vgc faster, > 1.0 = boehm faster)')
+
+	// Cleanup
+	os.rm(tmp_boehm) or {}
+	os.rm(tmp_vgc) or {}
+}
+
+fn main() {
+	if '--run-workload' in os.args {
+		// We were launched by the runner - execute the workload
+		run_workload()
+	} else {
+		// Act as runner: compile with both GCs and compare
+		run_comparison()
+	}
 }