Skip to content

Commit ea5c815

Browse files
authored
Merge pull request #8735 from sylvestre/sort-perf-2
sort: add benchmark
2 parents 5efafd6 + 5b0d173 commit ea5c815

File tree

9 files changed

+614
-59
lines changed

9 files changed

+614
-59
lines changed

‎.vscode/cSpell.json‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
".devcontainer/**",
3333
"util/gnu-patches/**",
3434
"docs/src/release-notes/**",
35+
"src/uu/*/benches/*.rs",
36+
"src/uucore/src/lib/features/benchmark.rs",
3537
],
3638

3739
"enableGlobDot": true,

‎Cargo.lock‎

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎src/uu/numfmt/benches/numfmt_bench.rs‎

Lines changed: 27 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,56 +4,45 @@
44
// file that was distributed with this source code.
55

66
use divan::{Bencher, black_box};
7-
use tempfile::TempDir;
87
use uu_numfmt::uumain;
9-
use uucore::benchmark::{create_test_file, run_util_function};
10-
11-
/// Generate numeric data for benchmarking
12-
fn generate_numbers(count: usize) -> String {
13-
(1..=count)
14-
.map(|n| n.to_string())
15-
.collect::<Vec<_>>()
16-
.join("\n")
17-
}
18-
19-
/// Setup benchmark environment with test data
20-
fn setup_benchmark(data: String) -> (TempDir, String) {
21-
let temp_dir = tempfile::tempdir().unwrap();
22-
let file_path = create_test_file(data.as_bytes(), temp_dir.path());
23-
let file_path_str = file_path.to_str().unwrap().to_string();
24-
(temp_dir, file_path_str)
25-
}
8+
use uucore::benchmark::{run_util_function, setup_test_file, text_data};
269

2710
/// Benchmark SI formatting with different number counts
2811
#[divan::bench(args = [1_000_000])]
2912
fn numfmt_to_si(bencher: Bencher, count: usize) {
30-
let (_temp_dir, file_path_str) = setup_benchmark(generate_numbers(count));
13+
let data = text_data::generate_numbers(count);
14+
let file_path = setup_test_file(data.as_bytes());
15+
let file_path_str = file_path.to_str().unwrap();
3116

3217
bencher.bench(|| {
33-
black_box(run_util_function(uumain, &["--to=si", &file_path_str]));
18+
black_box(run_util_function(uumain, &["--to=si", file_path_str]));
3419
});
3520
}
3621

3722
/// Benchmark SI formatting with precision format
3823
#[divan::bench(args = [1_000_000])]
3924
fn numfmt_to_si_precision(bencher: Bencher, count: usize) {
40-
let (_temp_dir, file_path_str) = setup_benchmark(generate_numbers(count));
25+
let data = text_data::generate_numbers(count);
26+
let file_path = setup_test_file(data.as_bytes());
27+
let file_path_str = file_path.to_str().unwrap();
4128

4229
bencher.bench(|| {
4330
black_box(run_util_function(
4431
uumain,
45-
&["--to=si", "--format=%.6f", &file_path_str],
32+
&["--to=si", "--format=%.6f", file_path_str],
4633
));
4734
});
4835
}
4936

5037
/// Benchmark IEC (binary) formatting
5138
#[divan::bench(args = [1_000_000])]
5239
fn numfmt_to_iec(bencher: Bencher, count: usize) {
53-
let (_temp_dir, file_path_str) = setup_benchmark(generate_numbers(count));
40+
let data = text_data::generate_numbers(count);
41+
let file_path = setup_test_file(data.as_bytes());
42+
let file_path_str = file_path.to_str().unwrap();
5443

5544
bencher.bench(|| {
56-
black_box(run_util_function(uumain, &["--to=iec", &file_path_str]));
45+
black_box(run_util_function(uumain, &["--to=iec", file_path_str]));
5746
});
5847
}
5948

@@ -65,10 +54,11 @@ fn numfmt_from_si(bencher: Bencher, count: usize) {
6554
.map(|n| format!("{:.1}K", n as f64 / 1000.0))
6655
.collect::<Vec<_>>()
6756
.join("\n");
68-
let (_temp_dir, file_path_str) = setup_benchmark(data);
57+
let file_path = setup_test_file(data.as_bytes());
58+
let file_path_str = file_path.to_str().unwrap();
6959

7060
bencher.bench(|| {
71-
black_box(run_util_function(uumain, &["--from=si", &file_path_str]));
61+
black_box(run_util_function(uumain, &["--from=si", file_path_str]));
7262
});
7363
}
7464

@@ -80,37 +70,42 @@ fn numfmt_large_numbers_si(bencher: Bencher, count: usize) {
8070
.map(|n| (n * 1_000_000).to_string())
8171
.collect::<Vec<_>>()
8272
.join("\n");
83-
let (_temp_dir, file_path_str) = setup_benchmark(data);
73+
let file_path = setup_test_file(data.as_bytes());
74+
let file_path_str = file_path.to_str().unwrap();
8475

8576
bencher.bench(|| {
86-
black_box(run_util_function(uumain, &["--to=si", &file_path_str]));
77+
black_box(run_util_function(uumain, &["--to=si", file_path_str]));
8778
});
8879
}
8980

9081
/// Benchmark different padding widths
9182
#[divan::bench(args = [(1_000_000, 5), (1_000_000, 50)])]
9283
fn numfmt_padding(bencher: Bencher, (count, padding): (usize, usize)) {
93-
let (_temp_dir, file_path_str) = setup_benchmark(generate_numbers(count));
84+
let data = text_data::generate_numbers(count);
85+
let file_path = setup_test_file(data.as_bytes());
86+
let file_path_str = file_path.to_str().unwrap();
9487
let padding_arg = format!("--padding={padding}");
9588

9689
bencher.bench(|| {
9790
black_box(run_util_function(
9891
uumain,
99-
&["--to=si", &padding_arg, &file_path_str],
92+
&["--to=si", &padding_arg, file_path_str],
10093
));
10194
});
10295
}
10396

10497
/// Benchmark round modes with SI formatting
10598
#[divan::bench(args = [("up", 100_000), ("down", 1_000_000), ("towards-zero", 1_000_000)])]
10699
fn numfmt_round_modes(bencher: Bencher, (round_mode, count): (&str, usize)) {
107-
let (_temp_dir, file_path_str) = setup_benchmark(generate_numbers(count));
100+
let data = text_data::generate_numbers(count);
101+
let file_path = setup_test_file(data.as_bytes());
102+
let file_path_str = file_path.to_str().unwrap();
108103
let round_arg = format!("--round={round_mode}");
109104

110105
bencher.bench(|| {
111106
black_box(run_util_function(
112107
uumain,
113-
&["--to=si", &round_arg, &file_path_str],
108+
&["--to=si", &round_arg, file_path_str],
114109
));
115110
});
116111
}

‎src/uu/sort/Cargo.toml‎

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,21 @@ fluent = { workspace = true }
4040
[target.'cfg(target_os = "linux")'.dependencies]
4141
nix = { workspace = true }
4242

43+
[dev-dependencies]
44+
divan = { workspace = true }
45+
tempfile = { workspace = true }
46+
uucore = { workspace = true, features = [
47+
"benchmark",
48+
"fs",
49+
"parser",
50+
"version-cmp",
51+
"i18n-collator",
52+
] }
53+
4354
[[bin]]
4455
name = "sort"
4556
path = "src/main.rs"
57+
58+
[[bench]]
59+
name = "sort_bench"
60+
harness = false

‎src/uu/sort/benches/sort_bench.rs‎

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
// This file is part of the uutils coreutils package.
2+
//
3+
// For the full copyright and license information, please view the LICENSE
4+
// file that was distributed with this source code.
5+
6+
use divan::{Bencher, black_box};
7+
use uu_sort::uumain;
8+
use uucore::benchmark::{run_util_function, setup_test_file, text_data};
9+
10+
/// Benchmark sorting ASCII-only data
11+
#[divan::bench(args = [100_000, 500_000])]
12+
fn sort_ascii_only(bencher: Bencher, num_lines: usize) {
13+
let data = text_data::generate_ascii_data(num_lines);
14+
let file_path = setup_test_file(&data);
15+
16+
bencher.bench(|| {
17+
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
18+
});
19+
}
20+
21+
/// Benchmark sorting accented/non-ASCII data
22+
#[divan::bench(args = [100_000, 500_000])]
23+
fn sort_accented_data(bencher: Bencher, num_lines: usize) {
24+
let data = text_data::generate_accented_data(num_lines);
25+
let file_path = setup_test_file(&data);
26+
27+
bencher.bench(|| {
28+
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
29+
});
30+
}
31+
32+
/// Benchmark sorting mixed ASCII/non-ASCII data
33+
#[divan::bench(args = [100_000, 500_000])]
34+
fn sort_mixed_data(bencher: Bencher, num_lines: usize) {
35+
let data = text_data::generate_mixed_data(num_lines);
36+
let file_path = setup_test_file(&data);
37+
38+
bencher.bench(|| {
39+
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
40+
});
41+
}
42+
43+
/// Benchmark case-sensitive sorting with mixed case data
44+
#[divan::bench(args = [100_000, 500_000])]
45+
fn sort_case_sensitive(bencher: Bencher, num_lines: usize) {
46+
let data = text_data::generate_case_sensitive_data(num_lines);
47+
let file_path = setup_test_file(&data);
48+
49+
bencher.bench(|| {
50+
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
51+
});
52+
}
53+
54+
/// Benchmark case-insensitive sorting (fold case)
55+
#[divan::bench(args = [100_000, 500_000])]
56+
fn sort_case_insensitive(bencher: Bencher, num_lines: usize) {
57+
let data = text_data::generate_case_sensitive_data(num_lines);
58+
let file_path = setup_test_file(&data);
59+
60+
bencher.bench(|| {
61+
black_box(run_util_function(
62+
uumain,
63+
&["-f", file_path.to_str().unwrap()],
64+
));
65+
});
66+
}
67+
68+
/// Benchmark dictionary order sorting (only blanks and alphanumeric)
69+
#[divan::bench(args = [100_000, 500_000])]
70+
fn sort_dictionary_order(bencher: Bencher, num_lines: usize) {
71+
let data = text_data::generate_mixed_data(num_lines);
72+
let file_path = setup_test_file(&data);
73+
74+
bencher.bench(|| {
75+
black_box(run_util_function(
76+
uumain,
77+
&["-d", file_path.to_str().unwrap()],
78+
));
79+
});
80+
}
81+
82+
/// Benchmark numeric sorting with mixed data
83+
#[divan::bench(args = [100_000, 500_000])]
84+
fn sort_numeric(bencher: Bencher, num_lines: usize) {
85+
let mut data = Vec::new();
86+
87+
// Generate numeric data with some text prefixes
88+
for i in 0..num_lines {
89+
let value = (i * 13) % 10000; // Pseudo-random numeric values
90+
data.extend_from_slice(format!("value_{value}\n").as_bytes());
91+
}
92+
93+
let file_path = setup_test_file(&data);
94+
95+
bencher.bench(|| {
96+
black_box(run_util_function(
97+
uumain,
98+
&["-n", file_path.to_str().unwrap()],
99+
));
100+
});
101+
}
102+
103+
/// Benchmark reverse sorting with locale-aware data
104+
#[divan::bench(args = [100_000, 500_000])]
105+
fn sort_reverse_locale(bencher: Bencher, num_lines: usize) {
106+
let data = text_data::generate_accented_data(num_lines);
107+
let file_path = setup_test_file(&data);
108+
109+
bencher.bench(|| {
110+
black_box(run_util_function(
111+
uumain,
112+
&["-r", file_path.to_str().unwrap()],
113+
));
114+
});
115+
}
116+
117+
/// Benchmark sorting with specific key field
118+
#[divan::bench(args = [100_000, 500_000])]
119+
fn sort_key_field(bencher: Bencher, num_lines: usize) {
120+
let mut data = Vec::new();
121+
122+
// Generate data with multiple fields
123+
let words = ["café", "naïve", "apple", "über", "banana"];
124+
for i in 0..num_lines {
125+
let word = words[i % words.len()];
126+
let num1 = i % 100;
127+
let num2 = (i * 7) % 100;
128+
data.extend_from_slice(format!("{num1}\t{word}\t{num2}\n").as_bytes());
129+
}
130+
131+
let file_path = setup_test_file(&data);
132+
133+
bencher.bench(|| {
134+
// Sort by second field
135+
black_box(run_util_function(
136+
uumain,
137+
&["-k", "2", file_path.to_str().unwrap()],
138+
));
139+
});
140+
}
141+
142+
/// Benchmark unique sorting with locale-aware data
143+
#[divan::bench(args = [100_000, 500_000])]
144+
fn sort_unique_locale(bencher: Bencher, num_lines: usize) {
145+
let data = text_data::generate_accented_data(num_lines);
146+
let file_path = setup_test_file(&data);
147+
148+
bencher.bench(|| {
149+
black_box(run_util_function(
150+
uumain,
151+
&["-u", file_path.to_str().unwrap()],
152+
));
153+
});
154+
}
155+
156+
fn main() {
157+
divan::main();
158+
}

0 commit comments

Comments
 (0)