Optimize Vec push by preventing address escapes rust-lang/rust#150950

Open

32 comments and reviews loaded in 1.67s

gerben-stavenga Avatar
gerben-stavenga on 2026-01-11 02:40:08 UTC · edited
gerben-stavenga Avatar
gerben-stavenga on 2026-01-11 02:40:08 UTC · edited
View on GitHub

View all comments

This change makes RawVecInner non-generic over the allocator, allowing it to be Copy. The allocator is moved to RawVec itself. Key optimizations:

  • RawVecInner is now Copy (no allocator field)
  • grow_one uses ptr::read/ptr::write to copy allocator to a temporary, preventing &self from escaping through &dyn Allocator parameter
  • Drop::drop similarly copies to temporaries before deallocating
  • deallocate takes self by value instead of &mut self
  • All these functions are #[inline(always)]

This allows LLVM to keep Vec fields (cap, ptr, len) in registers during push loops instead of storing/loading from memory every iteration.

Benchmark results (push with pre-allocated capacity):

  • 100 elements: 1.74x faster
  • 1000 elements: 1.87x faster
  • 10000 elements: 2.41x faster

Secondary benefit: grow_one_impl and other growth functions use &dyn Allocator, so they are compiled once in libstd rather than monomorphized per allocator type.

Preserves const compatibility with the const_heap feature by using generics for the const allocation path while using &dyn Allocator for runtime paths.

🚀4
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 02:45:00 UTC · hidden as outdated
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 02:45:00 UTC · hidden as outdated
View on GitHub

The job tidy failed! Check out the build log: (web) (plain enhanced) (plain)

Click to see the possible cause of the failure (guessed by this bot)
[TIMING:end] tool::ToolBuild { build_compiler: Compiler { stage: 0, host: x86_64-unknown-linux-gnu, forced_compiler: false }, target: x86_64-unknown-linux-gnu, tool: "tidy", path: "src/tools/tidy", mode: ToolBootstrap, source_type: InTree, extra_features: [], allow_features: "", cargo_args: [], artifact_kind: Binary } -- 12.411
[TIMING:end] tool::Tidy { compiler: Compiler { stage: 0, host: x86_64-unknown-linux-gnu, forced_compiler: false }, target: x86_64-unknown-linux-gnu } -- 0.000
fmt check
Diff in /checkout/library/alloc/src/raw_vec/tests.rs:126:
     assert_eq!(v.try_reserve_exact(101, usize::MAX - 100), cap_err);
     zst_sanity(&v);
 
-    assert_eq!(unsafe { v.inner.grow_amortized(100, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
-    assert_eq!(unsafe { v.inner.grow_amortized(101, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
+    assert_eq!(
+        unsafe { v.inner.grow_amortized(100, usize::MAX - 100, ZST::LAYOUT, &Global) },
+        cap_err
+    );
+    assert_eq!(
+        unsafe { v.inner.grow_amortized(101, usize::MAX - 100, ZST::LAYOUT, &Global) },
+        cap_err
+    );
     zst_sanity(&v);
 
     assert_eq!(unsafe { v.inner.grow_exact(100, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
Diff in /checkout/library/alloc/src/raw_vec/mod.rs:265:
         unsafe {
             let ptr = ptr.cast();
             let capacity = new_cap::<T>(capacity);
-            Self {
-                inner: RawVecInner::from_raw_parts(ptr, capacity),
-                alloc,
-                _marker: PhantomData,
-            }
+            Self { inner: RawVecInner::from_raw_parts(ptr, capacity), alloc, _marker: PhantomData }
         }
     }
 
Diff in /checkout/library/alloc/src/raw_vec/mod.rs:749:
 
 #[cfg(not(no_global_oom_handling))]
 #[inline(never)]
-fn grow_one_impl(mut inner: RawVecInner, alloc: &dyn Allocator, elem_layout: Layout) -> RawVecInner {
+fn grow_one_impl(
+    mut inner: RawVecInner,
+    alloc: &dyn Allocator,
+    elem_layout: Layout,
+) -> RawVecInner {
     // SAFETY: elem_layout must be correct for the element type
     if let Err(err) = unsafe { inner.grow_amortized(inner.cap.as_inner(), 1, elem_layout, alloc) } {
         handle_error(err);
fmt: checked 6634 files
Bootstrap failed while executing `test src/tools/tidy tidyselftest --extra-checks=py,cpp,js,spellcheck`
Build completed unsuccessfully in 0:00:52
  local time: Sun Jan 11 02:44:47 UTC 2026
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 03:14:37 UTC · hidden as outdated
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 03:14:37 UTC · hidden as outdated
View on GitHub

The job aarch64-gnu-llvm-20-2 failed! Check out the build log: (web) (plain enhanced) (plain)

Click to see the possible cause of the failure (guessed by this bot)
[TIMING:end] tool::Tidy { compiler: Compiler { stage: 0, host: aarch64-unknown-linux-gnu, forced_compiler: false }, target: aarch64-unknown-linux-gnu } -- 0.000
fmt check
Diff in /checkout/library/alloc/src/raw_vec/mod.rs:265:
         unsafe {
             let ptr = ptr.cast();
             let capacity = new_cap::<T>(capacity);
-            Self {
-                inner: RawVecInner::from_raw_parts(ptr, capacity),
-                alloc,
-                _marker: PhantomData,
-            }
+            Self { inner: RawVecInner::from_raw_parts(ptr, capacity), alloc, _marker: PhantomData }
         }
     }
 
Diff in /checkout/library/alloc/src/raw_vec/mod.rs:749:
 
 #[cfg(not(no_global_oom_handling))]
 #[inline(never)]
-fn grow_one_impl(mut inner: RawVecInner, alloc: &dyn Allocator, elem_layout: Layout) -> RawVecInner {
+fn grow_one_impl(
+    mut inner: RawVecInner,
+    alloc: &dyn Allocator,
+    elem_layout: Layout,
+) -> RawVecInner {
     // SAFETY: elem_layout must be correct for the element type
     if let Err(err) = unsafe { inner.grow_amortized(inner.cap.as_inner(), 1, elem_layout, alloc) } {
         handle_error(err);
Diff in /checkout/library/alloc/src/raw_vec/tests.rs:126:
     assert_eq!(v.try_reserve_exact(101, usize::MAX - 100), cap_err);
     zst_sanity(&v);
 
-    assert_eq!(unsafe { v.inner.grow_amortized(100, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
-    assert_eq!(unsafe { v.inner.grow_amortized(101, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
+    assert_eq!(
+        unsafe { v.inner.grow_amortized(100, usize::MAX - 100, ZST::LAYOUT, &Global) },
+        cap_err
+    );
+    assert_eq!(
+        unsafe { v.inner.grow_amortized(101, usize::MAX - 100, ZST::LAYOUT, &Global) },
+        cap_err
+    );
     zst_sanity(&v);
 
     assert_eq!(unsafe { v.inner.grow_exact(100, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
fmt: checked 6634 files
Bootstrap failed while executing `--stage 2 test --skip tests --skip coverage-map --skip coverage-run --skip library --skip tidyselftest`
Build completed unsuccessfully in 0:00:28
  local time: Sun Jan 11 03:07:43 UTC 2026
  network time: Sun, 11 Jan 2026 03:07:44 GMT
saethlin Avatar
saethlin on 2026-01-11 03:41:10 UTC
saethlin Avatar
saethlin on 2026-01-11 03:41:10 UTC
View on GitHub
  • All these functions are #[inline(always)]

Please try to only use that attribute where it is demonstrated to be better than #[inline].

Secondary benefit: grow_one_impl and other growth functions use &dyn Allocator, so they are compiled once in libstd rather than monomorphized per allocator type.

Isn't this a penalty for small custom allocators that can be inlined?

rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 03:54:02 UTC · hidden as outdated
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 03:54:02 UTC · hidden as outdated
View on GitHub

The job tidy failed! Check out the build log: (web) (plain enhanced) (plain)

Click to see the possible cause of the failure (guessed by this bot)
[TIMING:end] tool::Tidy { compiler: Compiler { stage: 0, host: x86_64-unknown-linux-gnu, forced_compiler: false }, target: x86_64-unknown-linux-gnu } -- 0.000
fmt check
Diff in /checkout/library/alloc/src/raw_vec/mod.rs:263:
         unsafe {
             let ptr = ptr.cast();
             let capacity = new_cap::<T>(capacity);
-            Self {
-                inner: RawVecInner::from_raw_parts(ptr, capacity),
-                alloc,
-                _marker: PhantomData,
-            }
+            Self { inner: RawVecInner::from_raw_parts(ptr, capacity), alloc, _marker: PhantomData }
         }
     }
 
Diff in /checkout/library/alloc/src/raw_vec/mod.rs:816:
 
 #[cfg(not(no_global_oom_handling))]
 #[inline(never)]
-fn grow_one_impl(mut inner: RawVecInner, alloc: &dyn Allocator, elem_layout: Layout) -> RawVecInner {
+fn grow_one_impl(
+    mut inner: RawVecInner,
+    alloc: &dyn Allocator,
+    elem_layout: Layout,
+) -> RawVecInner {
     // SAFETY: elem_layout must be correct for the element type
     if let Err(err) = unsafe { inner.grow_amortized(inner.cap.as_inner(), 1, elem_layout, alloc) } {
         handle_error(err);
Diff in /checkout/library/alloc/src/raw_vec/tests.rs:126:
     assert_eq!(v.try_reserve_exact(101, usize::MAX - 100), cap_err);
     zst_sanity(&v);
 
-    assert_eq!(unsafe { v.inner.grow_amortized(100, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
-    assert_eq!(unsafe { v.inner.grow_amortized(101, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
+    assert_eq!(
+        unsafe { v.inner.grow_amortized(100, usize::MAX - 100, ZST::LAYOUT, &Global) },
+        cap_err
+    );
+    assert_eq!(
+        unsafe { v.inner.grow_amortized(101, usize::MAX - 100, ZST::LAYOUT, &Global) },
+        cap_err
+    );
     zst_sanity(&v);
 
     assert_eq!(unsafe { v.inner.grow_exact(100, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
fmt: checked 6634 files
Bootstrap failed while executing `test src/tools/tidy tidyselftest --extra-checks=py,cpp,js,spellcheck`
Build completed unsuccessfully in 0:00:51
  local time: Sun Jan 11 03:52:18 UTC 2026
  network time: Sun, 11 Jan 2026 03:52:18 GMT
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 04:05:02 UTC · hidden as outdated
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 04:05:02 UTC · hidden as outdated
View on GitHub

The job tidy failed! Check out the build log: (web) (plain enhanced) (plain)

Click to see the possible cause of the failure (guessed by this bot)
[TIMING:end] tool::Tidy { compiler: Compiler { stage: 0, host: x86_64-unknown-linux-gnu, forced_compiler: false }, target: x86_64-unknown-linux-gnu } -- 0.000
fmt check
Diff in /checkout/library/alloc/src/raw_vec/mod.rs:263:
         unsafe {
             let ptr = ptr.cast();
             let capacity = new_cap::<T>(capacity);
-            Self {
-                inner: RawVecInner::from_raw_parts(ptr, capacity),
-                alloc,
-                _marker: PhantomData,
-            }
+            Self { inner: RawVecInner::from_raw_parts(ptr, capacity), alloc, _marker: PhantomData }
         }
     }
 
Diff in /checkout/library/alloc/src/raw_vec/mod.rs:816:
 
 #[cfg(not(no_global_oom_handling))]
 #[inline(never)]
-fn grow_one_impl(mut inner: RawVecInner, alloc: &dyn Allocator, elem_layout: Layout) -> RawVecInner {
+fn grow_one_impl(
+    mut inner: RawVecInner,
+    alloc: &dyn Allocator,
+    elem_layout: Layout,
+) -> RawVecInner {
     // SAFETY: elem_layout must be correct for the element type
     if let Err(err) = unsafe { inner.grow_amortized(inner.cap.as_inner(), 1, elem_layout, alloc) } {
         handle_error(err);
Diff in /checkout/library/alloc/src/raw_vec/tests.rs:126:
     assert_eq!(v.try_reserve_exact(101, usize::MAX - 100), cap_err);
     zst_sanity(&v);
 
-    assert_eq!(unsafe { v.inner.grow_amortized(100, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
-    assert_eq!(unsafe { v.inner.grow_amortized(101, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
+    assert_eq!(
+        unsafe { v.inner.grow_amortized(100, usize::MAX - 100, ZST::LAYOUT, &Global) },
+        cap_err
+    );
+    assert_eq!(
+        unsafe { v.inner.grow_amortized(101, usize::MAX - 100, ZST::LAYOUT, &Global) },
+        cap_err
+    );
     zst_sanity(&v);
 
     assert_eq!(unsafe { v.inner.grow_exact(100, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
fmt: checked 6634 files
Bootstrap failed while executing `test src/tools/tidy tidyselftest --extra-checks=py,cpp,js,spellcheck`
Build completed unsuccessfully in 0:00:51
  local time: Sun Jan 11 04:04:52 UTC 2026
  network time: Sun, 11 Jan 2026 04:04:52 GMT
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 04:12:26 UTC · hidden as outdated
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 04:12:26 UTC · hidden as outdated
View on GitHub

The job tidy failed! Check out the build log: (web) (plain enhanced) (plain)

Click to see the possible cause of the failure (guessed by this bot)
[TIMING:end] tool::Tidy { compiler: Compiler { stage: 0, host: x86_64-unknown-linux-gnu, forced_compiler: false }, target: x86_64-unknown-linux-gnu } -- 0.000
fmt check
Diff in /checkout/library/alloc/src/raw_vec/mod.rs:262:
         unsafe {
             let ptr = ptr.cast();
             let capacity = new_cap::<T>(capacity);
-            Self {
-                inner: RawVecInner::from_raw_parts(ptr, capacity),
-                alloc,
-                _marker: PhantomData,
-            }
+            Self { inner: RawVecInner::from_raw_parts(ptr, capacity), alloc, _marker: PhantomData }
         }
     }
 
Diff in /checkout/library/alloc/src/raw_vec/mod.rs:812:
 
 #[cfg(not(no_global_oom_handling))]
 #[inline(never)]
-fn grow_one_impl(mut inner: RawVecInner, alloc: &dyn Allocator, elem_layout: Layout) -> RawVecInner {
+fn grow_one_impl(
+    mut inner: RawVecInner,
+    alloc: &dyn Allocator,
+    elem_layout: Layout,
+) -> RawVecInner {
     // SAFETY: elem_layout must be correct for the element type
     if let Err(err) = unsafe { inner.grow_amortized(inner.cap.as_inner(), 1, elem_layout, alloc) } {
         handle_error(err);
Diff in /checkout/library/alloc/src/raw_vec/tests.rs:126:
     assert_eq!(v.try_reserve_exact(101, usize::MAX - 100), cap_err);
     zst_sanity(&v);
 
-    assert_eq!(unsafe { v.inner.grow_amortized(100, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
-    assert_eq!(unsafe { v.inner.grow_amortized(101, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
+    assert_eq!(
+        unsafe { v.inner.grow_amortized(100, usize::MAX - 100, ZST::LAYOUT, &Global) },
+        cap_err
+    );
+    assert_eq!(
+        unsafe { v.inner.grow_amortized(101, usize::MAX - 100, ZST::LAYOUT, &Global) },
+        cap_err
+    );
     zst_sanity(&v);
 
     assert_eq!(unsafe { v.inner.grow_exact(100, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
fmt: checked 6634 files
Bootstrap failed while executing `test src/tools/tidy tidyselftest --extra-checks=py,cpp,js,spellcheck`
Build completed unsuccessfully in 0:00:52
  local time: Sun Jan 11 04:12:09 UTC 2026
  network time: Sun, 11 Jan 2026 04:12:09 GMT
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 04:20:51 UTC · hidden as outdated
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 04:20:51 UTC · hidden as outdated
View on GitHub

The job tidy failed! Check out the build log: (web) (plain enhanced) (plain)

Click to see the possible cause of the failure (guessed by this bot)
[TIMING:end] tool::Tidy { compiler: Compiler { stage: 0, host: x86_64-unknown-linux-gnu, forced_compiler: false }, target: x86_64-unknown-linux-gnu } -- 0.000
fmt check
Diff in /checkout/library/alloc/src/raw_vec/mod.rs:262:
         unsafe {
             let ptr = ptr.cast();
             let capacity = new_cap::<T>(capacity);
-            Self {
-                inner: RawVecInner::from_raw_parts(ptr, capacity),
-                alloc,
-                _marker: PhantomData,
-            }
+            Self { inner: RawVecInner::from_raw_parts(ptr, capacity), alloc, _marker: PhantomData }
         }
     }
 
Diff in /checkout/library/alloc/src/raw_vec/mod.rs:470:
     #[inline(never)]
     fn grow_one(mut self, alloc: &dyn Allocator, elem_layout: Layout) -> Self {
         // SAFETY: Precondition passed to caller
-        if let Err(err) = unsafe { self.grow_amortized(self.cap.as_inner(), 1, elem_layout, alloc) } {
+        if let Err(err) = unsafe { self.grow_amortized(self.cap.as_inner(), 1, elem_layout, alloc) }
+        {
             handle_error(err);
         }
         self
Diff in /checkout/library/alloc/src/raw_vec/tests.rs:126:
     assert_eq!(v.try_reserve_exact(101, usize::MAX - 100), cap_err);
     zst_sanity(&v);
 
-    assert_eq!(unsafe { v.inner.grow_amortized(100, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
-    assert_eq!(unsafe { v.inner.grow_amortized(101, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
+    assert_eq!(
+        unsafe { v.inner.grow_amortized(100, usize::MAX - 100, ZST::LAYOUT, &Global) },
+        cap_err
+    );
+    assert_eq!(
+        unsafe { v.inner.grow_amortized(101, usize::MAX - 100, ZST::LAYOUT, &Global) },
+        cap_err
+    );
     zst_sanity(&v);
 
     assert_eq!(unsafe { v.inner.grow_exact(100, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
fmt: checked 6634 files
Bootstrap failed while executing `test src/tools/tidy tidyselftest --extra-checks=py,cpp,js,spellcheck`
Build completed unsuccessfully in 0:00:52
  local time: Sun Jan 11 04:20:39 UTC 2026
  network time: Sun, 11 Jan 2026 04:20:39 GMT
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 04:26:54 UTC · hidden as outdated
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 04:26:54 UTC · hidden as outdated
View on GitHub

The job tidy failed! Check out the build log: (web) (plain enhanced) (plain)

Click to see the possible cause of the failure (guessed by this bot)
[TIMING:end] tool::ToolBuild { build_compiler: Compiler { stage: 0, host: x86_64-unknown-linux-gnu, forced_compiler: false }, target: x86_64-unknown-linux-gnu, tool: "tidy", path: "src/tools/tidy", mode: ToolBootstrap, source_type: InTree, extra_features: [], allow_features: "", cargo_args: [], artifact_kind: Binary } -- 12.485
[TIMING:end] tool::Tidy { compiler: Compiler { stage: 0, host: x86_64-unknown-linux-gnu, forced_compiler: false }, target: x86_64-unknown-linux-gnu } -- 0.000
fmt check
Diff in /checkout/library/alloc/src/raw_vec/tests.rs:126:
     assert_eq!(v.try_reserve_exact(101, usize::MAX - 100), cap_err);
     zst_sanity(&v);
 
-    assert_eq!(unsafe { v.inner.grow_amortized(100, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
-    assert_eq!(unsafe { v.inner.grow_amortized(101, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
+    assert_eq!(
+        unsafe { v.inner.grow_amortized(100, usize::MAX - 100, ZST::LAYOUT, &Global) },
+        cap_err
+    );
+    assert_eq!(
+        unsafe { v.inner.grow_amortized(101, usize::MAX - 100, ZST::LAYOUT, &Global) },
+        cap_err
+    );
     zst_sanity(&v);
 
     assert_eq!(unsafe { v.inner.grow_exact(100, usize::MAX - 100, ZST::LAYOUT, &Global) }, cap_err);
Diff in /checkout/library/alloc/src/raw_vec/mod.rs:262:
         unsafe {
             let ptr = ptr.cast();
             let capacity = new_cap::<T>(capacity);
-            Self {
-                inner: RawVecInner::from_raw_parts(ptr, capacity),
-                alloc,
-                _marker: PhantomData,
-            }
+            Self { inner: RawVecInner::from_raw_parts(ptr, capacity), alloc, _marker: PhantomData }
         }
     }
 
Diff in /checkout/library/alloc/src/raw_vec/mod.rs:470:
     #[inline(never)]
     fn grow_one(mut self, alloc: &dyn Allocator, elem_layout: Layout) -> Self {
         // SAFETY: Precondition passed to caller
-        if let Err(err) = unsafe { self.grow_amortized(self.cap.as_inner(), 1, elem_layout, alloc) } {
+        if let Err(err) = unsafe { self.grow_amortized(self.cap.as_inner(), 1, elem_layout, alloc) }
+        {
             handle_error(err);
         }
         self
fmt: checked 6634 files
gerben-stavenga Avatar
gerben-stavenga on 2026-01-11 04:51:59 UTC · edited
gerben-stavenga Avatar
gerben-stavenga on 2026-01-11 04:51:59 UTC · edited
View on GitHub
  • All these functions are #[inline(always)]

Please try to only use that attribute where it is demonstrated to be better than #[inline].

These are on xxx(&mut self) functions that forward to functions that take by self and return self. If not always inlined the &mut self escapes a reference, producing code that is drastically worse. The inner loop in the benchmarks with this PR is

bf330: mov %r13,(%rdx,%r13,8) ; vec[len] = len
bf334: inc %r13 ; len++
bf337: cmp %r13,%r15 ; compare with target
bf33a: je bf370 ; done if equal
bf33c: cmp %rax,%r13 ; compare with capacity
bf33f: jne bf330 ; loop back

before:

bf600: mov -0x38(%rbp),%rax ; LOAD ptr from stack
bf604: mov %r15,(%rax,%r15,8) ; vec[len] = len
bf608: inc %r15 ; len++
bf60b: mov %r15,-0x30(%rbp) ; STORE len to stack
bf60f: cmp %r15,%r14 ; compare with target
bf612: je bf630 ; done if equal
bf614: cmp -0x40(%rbp),%r15 ; LOAD capacity from stack
bf618: jne bf600 ; loop back

Secondary benefit: grow_one_impl and other growth functions use &dyn Allocator, so they are compiled once in libstd rather than monomorphized per allocator type.

Isn't this a penalty for small custom allocators that can be inlined?

I suspect there is a small penalty due to indirection (although the compiler seem to generate call reg in the direct case too). But there are also positive side effects due to code dedup. These are fallback paths so from that perspective a tiny regression isn't the worst. The point of this PR is that the existence of fallback path should not influence the compilers ability to optimize the fast path and keep that clean and tight.

The &dyn Allocator change can be changed to &Allocator at the cost of monomorphizing grow function.

rustbot Avatar
rustbot on 2026-01-11 05:06:19 UTC
rustbot Avatar
rustbot on 2026-01-11 05:06:19 UTC
View on GitHub

r? @tgross35

rustbot has assigned @tgross35.
They will have a look at your PR within the next two weeks and either review your PR or reassign to another reviewer.

Use r? to explicitly pick a reviewer

tgross35 Avatar
tgross35 on 2026-01-11 05:26:18 UTC
tgross35 Avatar
tgross35 on 2026-01-11 05:26:18 UTC
View on GitHub

@bors try @rust-timer queue

rust-timer Avatar
rust-timer on 2026-01-11 05:26:20 UTC
rust-timer Avatar
rust-timer on 2026-01-11 05:26:20 UTC
View on GitHub

Awaiting bors try build completion.

@rustbot label: +S-waiting-on-perf

rust-bors Avatar
rust-bors on 2026-01-11 05:26:22 UTC · edited
rust-bors Avatar
rust-bors on 2026-01-11 05:26:22 UTC · edited
View on GitHub

⌛ Trying commit ac22726 with merge b00b54d

To cancel the try build, run the command @bors try cancel.

Workflow: https://github.com/rust-lang/rust/actions/runs/20890110771

rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 05:27:57 UTC · hidden as outdated
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 05:27:57 UTC · hidden as outdated
View on GitHub

The job aarch64-gnu-llvm-20-2 failed! Check out the build log: (web) (plain enhanced) (plain)

Click to see the possible cause of the failure (guessed by this bot)
[RUSTC-TIMING] alloctests test:true 7.987
error: could not compile `alloctests` (test "alloctests") due to 1 previous error
warning: build failed, waiting for other jobs to finish...
[RUSTC-TIMING] coretests test:true 62.411
env -u RUSTC_WRAPPER CARGO_ENCODED_RUSTDOCFLAGS="-Csymbol-mangling-version=v0\u{1f}-Zannotate-moves\u{1f}-Zrandomize-layout\u{1f}-Zunstable-options\u{1f}--check-cfg=cfg(bootstrap)\u{1f}-Wrustdoc::invalid_codeblock_attributes\u{1f}--crate-version\u{1f}1.94.0-nightly\t(5dde9360b\t2026-01-11)" CARGO_ENCODED_RUSTFLAGS="-Csymbol-mangling-version=v0\u{1f}-Zannotate-moves\u{1f}-Zrandomize-layout\u{1f}-Zunstable-options\u{1f}--check-cfg=cfg(bootstrap)\u{1f}-Zmacro-backtrace\u{1f}-Csplit-debuginfo=off\u{1f}-Clink-arg=-L/usr/lib/llvm-20/lib\u{1f}-Cllvm-args=-import-instr-limit=10\u{1f}-Clink-args=-Wl,-z,origin\u{1f}-Clink-args=-Wl,-rpath,$ORIGIN/../lib\u{1f}-Alinker-messages\u{1f}--cap-lints=allow\u{1f}--cfg\u{1f}randomized_layouts" RUSTC="/checkout/obj/build/aarch64-unknown-linux-gnu/stage2-codegen/cg_clif/dist/rustc-clif" RUSTDOC="/checkout/obj/build/aarch64-unknown-linux-gnu/stage2-codegen/cg_clif/dist/rustdoc-clif" "/checkout/obj/build/aarch64-unknown-linux-gnu/stage0/bin/cargo" "test" "--manifest-path" "/checkout/obj/build/aarch64-unknown-linux-gnu/stage2-codegen/cg_clif/build/sysroot_tests/Cargo.toml" "--target-dir" "/checkout/obj/build/aarch64-unknown-linux-gnu/stage2-codegen/cg_clif/build/sysroot_tests_target" "--locked" "--target" "aarch64-unknown-linux-gnu" "-p" "coretests" "-p" "alloctests" "--tests" "--" "-q" exited with status ExitStatus(unix_wait_status(25856))
Bootstrap failed while executing `--stage 2 test --skip tests --skip coverage-map --skip coverage-run --skip library --skip tidyselftest`
Command `/checkout/obj/build/aarch64-unknown-linux-gnu/stage0/bin/cargo run -Zwarnings --target aarch64-unknown-linux-gnu -Zbinary-dep-depinfo -j 4 -Zroot-dir=/checkout --locked --color=always --release --manifest-path /checkout/compiler/rustc_codegen_cranelift/build_system/Cargo.toml -- test --download-dir /checkout/obj/build/cg_clif_download --out-dir /checkout/obj/build/aarch64-unknown-linux-gnu/stage2-codegen/cg_clif --no-unstable-features --use-backend cranelift --sysroot llvm --skip-test testsuite.extended_sysroot [workdir=/checkout/compiler/rustc_codegen_cranelift]` failed with exit code 1
Created at: src/bootstrap/src/core/build_steps/test.rs:3860:25
Executed at: src/bootstrap/src/core/build_steps/test.rs:3905:26

Command has failed. Rerun with -v to see more details.
Build completed unsuccessfully in 0:26:08
  local time: Sun Jan 11 05:27:43 UTC 2026
  network time: Sun, 11 Jan 2026 05:27:43 GMT
##[error]Process completed with exit code 1.
##[group]Run echo "disk usage:"
tgross35 Avatar
tgross35 on 2026-01-11 05:29:07 UTC
tgross35 Avatar
tgross35 on 2026-01-11 05:29:07 UTC
View on GitHub

Looks like the build isn't working yet
@bors try cancel

rust-bors Avatar
rust-bors on 2026-01-11 05:29:11 UTC
rust-bors Avatar
rust-bors on 2026-01-11 05:29:11 UTC
View on GitHub

Try build cancelled. Cancelled workflows:

tgross35 Avatar
tgross35 commented on 2026-01-11 05:34:48 UTC
library/alloctests/benches/vec.rs
11 b.iter(|| {
12 let mut v = Vec::new();
13 for i in 0..n {
14 v.push(i);
15 }
16 black_box(v.as_slice());
17 v
18 });
tgross35 Avatar tgross35 on 2026-01-11 05:34:48 UTC
View on GitHub

You should move the constructor outside of the loop so we're not benchmarking that cost (more relevant for with_capacity). It would also be a good idea to black_box(v).push(i), in which case you don't need to do the as_slice bit.

gerben-stavenga Avatar gerben-stavenga on 2026-01-11 13:08:25 UTC
View on GitHub

black_box(v).push(i) would prevent the compiler optimizations we are trying to enable, by explicitly escaping the reference to v.

Moving the constructor out of the lambda also have a similar effect, due to criterion iter black_boxing the returned v.

tgross35 Avatar
tgross35 requested changes on 2026-01-11 05:46:30 UTC
library/alloc/src/vec/mod.rs · outdated
905 905 #[cfg(not(no_global_oom_handling))]
906 906 #[rustc_const_unstable(feature = "const_heap", issue = "79597")]
907 #[rustfmt::skip] // FIXME(fee1-dead): temporary measure before rustfmt is bumped
907 #[rustfmt::skip]
tgross35 Avatar tgross35 on 2026-01-11 05:35:01 UTC
View on GitHub

Accidental change?

library/alloc/src/raw_vec/mod.rs · outdated
836 810 /// # Safety
837 811 ///
838 /// This function deallocates the owned allocation, but does not update `ptr` or `cap` to
839 /// prevent double-free or use-after-free. Essentially, do not do anything with the caller
840 /// after this function returns.
841 /// Ideally this function would take `self` by move, but it cannot because it exists to be
842 /// called from a `Drop` impl.
843 unsafe fn deallocate(&mut self, elem_layout: Layout) {
812 /// This function deallocates the owned allocation.
813 #[inline]
814 unsafe fn deallocate(self, elem_layout: Layout, alloc: &dyn Allocator) {
tgross35 Avatar tgross35 on 2026-01-11 05:36:08 UTC
View on GitHub

Deleted safety comments?

gerben-stavenga Avatar gerben-stavenga on 2026-01-11 12:57:57 UTC
View on GitHub

The safety comments are mainly because it taked &mut self. The comments even mention it should take self, but couldn't because earlier it wasn't Copy. Now it takes self so the safety comments don't make sense.

hkBst Avatar hkBst on 2026-01-12 11:54:46 UTC
View on GitHub

So it's no longer unsafe then?

library/alloc/src/raw_vec/mod.rs · outdated
165 unsafe {
166 // Make it more obvious that a subsequent Vec::reserve(capacity) will not allocate.
167 hint::assert_unchecked(!inner.needs_to_grow(0, capacity, T::LAYOUT));
168 }
tgross35 Avatar tgross35 on 2026-01-11 05:38:10 UTC
View on GitHub

We're trying to get better about having SAFETY comments in std, please make sure to cover any new unsafe blocks.

library/alloc/src/raw_vec/mod.rs
186 181 pub(crate) fn grow_one(&mut self) {
187 // SAFETY: All calls on self.inner pass T::LAYOUT as the elem_layout
188 unsafe { self.inner.grow_one(T::LAYOUT) }
182 // Copy allocator to a temporary to prevent &self from escaping
183 // through the &dyn Allocator parameter, allowing LLVM to keep
184 // the Vec fields in registers.
185 let alloc = unsafe { ptr::read(&self.alloc) };
186 self.inner = self.inner.grow_one(&alloc, T::LAYOUT);
187 unsafe { ptr::write(&mut self.alloc, alloc) };
189 188 }
tgross35 Avatar tgross35 on 2026-01-11 05:46:12 UTC · edited
View on GitHub

Maybe I'm missing something but I don't understand this at all. How does self "escape" through &dyn Allocator? How does saving+restoring make a difference? What happens when alloc has interior mutability and you clobber it? What happens when grow_one panics on OOM and there are now two instances of A, which may impl drop, pointing to the same memory?

hkBst Avatar hkBst on 2026-01-11 12:24:54 UTC
View on GitHub

Is this perhaps just a trick to get around the borrow checker? If so, then perhaps something like this would work:

let inner = self.inner;
inner.grow_one(self.alloc, T::LAYOUT);
rustbot Avatar
rustbot on 2026-01-11 05:46:33 UTC
rustbot Avatar
rustbot on 2026-01-11 05:46:33 UTC
View on GitHub

Reminder, once the PR becomes ready for a review, use @rustbot ready.

Noratrieb Avatar
Noratrieb on 2026-01-11 10:33:03 UTC
Noratrieb Avatar
Noratrieb on 2026-01-11 10:33:03 UTC
View on GitHub

FWIW the compile time benchmarks will not say anything about the runtime perf of this change, since such large refactorings of Vec are pretty much guaranteed to have some compile time impact on crates that use vec (so every crate).
So the compile time impact of this change and the runtime impact on the vecs in the compiler will be hard to untangle.

👍1
hkBst Avatar
hkBst requested changes on 2026-01-11 12:45:15 UTC
library/alloc/src/raw_vec/mod.rs
470 {
477 471 handle_error(err);
478 472 }
473 self
hkBst Avatar hkBst on 2026-01-11 12:39:30 UTC
View on GitHub

I don't see any changes that could warrant this function changing to no longer being unsafe, it even still has the same safety comments...

library/alloctests/benches/vec.rs · outdated
6 // ============================================================================
7 // PUSH BENCHMARKS - The focus of your optimization work
8 // ============================================================================
hkBst Avatar hkBst on 2026-01-11 12:44:40 UTC
View on GitHub

Please remove these AI comments.

library/alloctests/benches/vec.rs · outdated
59 do_bench_push_preallocated(b, 10000);
60 }
61
62 // ============================================================================
hkBst Avatar hkBst on 2026-01-11 12:44:59 UTC
View on GitHub

same here

saethlin Avatar
saethlin on 2026-01-11 14:33:07 UTC
saethlin Avatar
saethlin on 2026-01-11 14:33:07 UTC
View on GitHub

If not always inlined the &mut self escapes a reference, producing code that is drastically worse.

When you say "always inlined" are you referring to the attribute or the optimization?

tgross35 Avatar
tgross35 on 2026-01-11 14:38:18 UTC
tgross35 Avatar
tgross35 on 2026-01-11 14:38:18 UTC
View on GitHub

Could you define the “escaping” term that you keep using? In Rust I’m only aware of that referring to lifetimes, which isn’t relevant for codegen.

gerben-stavenga Avatar
gerben-stavenga on 2026-01-11 14:56:54 UTC
gerben-stavenga Avatar
gerben-stavenga on 2026-01-11 14:56:54 UTC
View on GitHub

Could you define the “escaping” term that you keep using? In Rust I’m only aware of that referring to lifetimes, which isn’t relevant for codegen.

In compiler analysis the most important step is mem2reg, ie. lower stack variables to register. The crucial part of this step is that there is no reference to the stack variable. So local function variables whose stack address does not escape the compiler analysis (for example by passing it to some other function) can easily be moved to SSA variables. This allows subsequent codegen to keep those variables in register (because there is no need to sync the register to stack). So currently the reference to self (len, cap, ptr) is passed to grow and thus all subsequent codegen is poluted by unnecessary register <-> stack syncing (see the asm i posted). This PR removes the reference to stack variables from the outline function call. in lieu of passing and returning cap, ptr by value (ie. register)

gerben-stavenga Avatar
gerben-stavenga on 2026-01-11 17:54:04 UTC
gerben-stavenga Avatar
gerben-stavenga on 2026-01-11 17:54:04 UTC
View on GitHub

If not always inlined the &mut self escapes a reference, producing code that is drastically worse.

When you say "always inlined" are you referring to the attribute or the optimization?

I refer to the optimization, it's crucial that functions taking &mut self, are always inlined because then compiler optimization will see that the reference can eliminated.

gerben-stavenga Avatar
gerben-stavenga on 2026-01-11 17:57:40 UTC
gerben-stavenga Avatar
gerben-stavenga on 2026-01-11 17:57:40 UTC
View on GitHub

FWIW the compile time benchmarks will not say anything about the runtime perf of this change, since such large refactorings of Vec are pretty much guaranteed to have some compile time impact on crates that use vec (so every crate). So the compile time impact of this change and the runtime impact on the vecs in the compiler will be hard to untangle.

I'm not sure if I understand you. The main point of this PR is runtime performance of code. I hope the benchmarks I'm running are the benchmarks for measuring the runtime perf of the Vec implementation.

There might be a compile time benefit. Because the fallback grow function is only compiled once as part of the standard lib and not, like the current state, in each crate that uses vec.

saethlin Avatar
saethlin on 2026-01-11 18:18:22 UTC · edited
saethlin Avatar
saethlin on 2026-01-11 18:18:22 UTC · edited
View on GitHub

I refer to the optimization, it's crucial that functions taking &mut self, are always inlined because then compiler optimization will see that the reference can eliminated.

I do not think this is sufficient justification for inline(always). We have so many functions which would cause similar or worse optimization degradation if they weren't inlined in optimized builds. This just isn't worth the cost of the degradation to debug build times that is caused by inline(always). If #[inline] suffices, always is all cost no benefit.

tgross35 Avatar
tgross35 on 2026-01-11 20:00:48 UTC
tgross35 Avatar
tgross35 on 2026-01-11 20:00:48 UTC
View on GitHub

^ to reiterate that, the rule of thumb now is that any use of #[inline(always)] needs to be backed up by benchmarks and codegen showing it makes a meaningful difference over ‘#[inline]. ‘#[inline(always)]` hurts unoptimized builds and size-optimized binaries so we need to be very cautious with its use.

In general here, it would be helpful if you could put a mini version of the before and after code on godbolt so we can get the bigger picture of what’s actually happening at the different levels.

rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 20:00:54 UTC · hidden as outdated
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-11 20:00:54 UTC · hidden as outdated
View on GitHub

The job tidy failed! Check out the build log: (web) (plain enhanced) (plain)

Click to see the possible cause of the failure (guessed by this bot)
 use rand::RngCore;
 use test::{Bencher, black_box};
 
-
 fn do_bench_push(b: &mut Bencher, n: usize) {
     b.iter(|| {
         let mut v = Vec::new();
fmt: checked 6634 files
Bootstrap failed while executing `test src/tools/tidy tidyselftest --extra-checks=py,cpp,js,spellcheck`
Build completed unsuccessfully in 0:00:53
  local time: Sun Jan 11 20:00:41 UTC 2026
  network time: Sun, 11 Jan 2026 20:00:41 GMT
gerben-stavenga Avatar
gerben-stavenga on 2026-01-11 20:47:43 UTC
gerben-stavenga Avatar
gerben-stavenga on 2026-01-11 20:47:43 UTC
View on GitHub

^ to reiterate that, the rule of thumb now is that any use of #[inline(always)] needs to be backed up by benchmarks and codegen showing it makes a meaningful difference over ‘#[inline]. ‘#[inline(always)]` hurts unoptimized builds and size-optimized binaries so we need to be very cautious with its use.

In general here, it would be helpful if you could put a mini version of the before and after code on godbolt so we can get the bigger picture of what’s actually happening at the different levels.

https://godbolt.org/z/nrnP4T83e

shows a rather minimal version, you can see the difference in codegen the test functions

vec_push vs rf_push

👍1
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-12 01:21:09 UTC
rust-log-analyzer Avatar
rust-log-analyzer on 2026-01-12 01:21:09 UTC
View on GitHub

The job aarch64-gnu-llvm-20-1 failed! Check out the build log: (web) (plain enhanced) (plain)

Click to see the possible cause of the failure (guessed by this bot)
   Compiling alloc v0.0.0 (/checkout/library/alloc)
[RUSTC-TIMING] rustc_std_workspace_core test:false 0.047
[RUSTC-TIMING] core test:false 32.544
   Compiling memchr v2.7.6
error[E0277]: the trait bound `CaptureLocally<'_, A>: core::alloc::Allocator` is not satisfied
   --> library/alloc/src/raw_vec/mod.rs:207:42
    |
207 |         self.inner = self.inner.grow_one(&local_alloc, T::LAYOUT);
    |                                          ^^^^^^^^^^^^ unsatisfied trait bound
    |
help: the trait `core::alloc::Allocator` is not implemented for `CaptureLocally<'_, A>`
   --> library/alloc/src/raw_vec/mod.rs:171:1
    |
171 | struct CaptureLocally<'a, T> {
    | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    = help: the following other types implement trait `core::alloc::Allocator`:
              &A
              &mut A
              Arc<T, A>
              Box<T, A>
              Rc<T, A>
              alloc::Global
    = note: required for the cast from `&CaptureLocally<'_, A>` to `&dyn core::alloc::Allocator`

[RUSTC-TIMING] libc test:false 1.965
   Compiling unwind v0.0.0 (/checkout/library/unwind)
[RUSTC-TIMING] unwind test:false 0.065
   Compiling adler2 v2.0.1
rust-bors Avatar
rust-bors on 2026-02-23 06:33:49 UTC
rust-bors Avatar
rust-bors on 2026-02-23 06:33:49 UTC
View on GitHub

☔ The latest upstream changes (presumably #153000) made this pull request unmergeable. Please resolve the merge conflicts.