@@ -237,12 +237,12 @@ macro_rules! deinterleaving_load {
237237 ( $elem: ty, $lanes: literal, 2 , $ptr: expr) => { {
238238 use $crate:: core_arch:: macros:: deinterleave_mask;
239239 use $crate:: core_arch:: simd:: Simd ;
240- use $crate:: { mem:: transmute, ptr } ;
240+ use $crate:: mem:: transmute;
241241
242242 type V = Simd <$elem, $lanes>;
243243 type W = Simd <$elem, { $lanes * 2 } >;
244244
245- let w: W = ptr:: read_unaligned( $ptr as * const W ) ;
245+ let w: W = $crate :: ptr:: read_unaligned( $ptr as * const W ) ;
246246
247247 let v0: V = simd_shuffle!( w, w, deinterleave_mask:: <$lanes, 2 , 0 >( ) ) ;
248248 let v1: V = simd_shuffle!( w, w, deinterleave_mask:: <$lanes, 2 , 1 >( ) ) ;
@@ -253,12 +253,20 @@ macro_rules! deinterleaving_load {
253253 ( $elem: ty, $lanes: literal, 3 , $ptr: expr) => { {
254254 use $crate:: core_arch:: macros:: deinterleave_mask;
255255 use $crate:: core_arch:: simd:: Simd ;
256- use $crate:: { mem:: transmute , ptr } ;
256+ use $crate:: mem:: { MaybeUninit , transmute } ;
257257
258258 type V = Simd <$elem, $lanes>;
259259 type W = Simd <$elem, { $lanes * 3 } >;
260260
261- let w: W = ptr:: read_unaligned( $ptr as * const W ) ;
261+ // NOTE: repr(simd) adds padding to make the total size a power of two.
262+ // Hence reading W from ptr might read out of bounds.
263+ let mut mem = MaybeUninit :: <W >:: uninit( ) ;
264+ $crate:: ptr:: copy_nonoverlapping(
265+ $ptr. cast:: <$elem>( ) ,
266+ mem. as_mut_ptr( ) . cast:: <$elem>( ) ,
267+ $lanes * 3 ,
268+ ) ;
269+ let w = mem. assume_init( ) ;
262270
263271 let v0: V = simd_shuffle!( w, w, deinterleave_mask:: <$lanes, 3 , 0 >( ) ) ;
264272 let v1: V = simd_shuffle!( w, w, deinterleave_mask:: <$lanes, 3 , 1 >( ) ) ;
@@ -270,12 +278,12 @@ macro_rules! deinterleaving_load {
270278 ( $elem: ty, $lanes: literal, 4 , $ptr: expr) => { {
271279 use $crate:: core_arch:: macros:: deinterleave_mask;
272280 use $crate:: core_arch:: simd:: Simd ;
273- use $crate:: { mem:: transmute, ptr } ;
281+ use $crate:: mem:: transmute;
274282
275283 type V = Simd <$elem, $lanes>;
276284 type W = Simd <$elem, { $lanes * 4 } >;
277285
278- let w: W = ptr:: read_unaligned( $ptr as * const W ) ;
286+ let w: W = $crate :: ptr:: read_unaligned( $ptr as * const W ) ;
279287
280288 let v0: V = simd_shuffle!( w, w, deinterleave_mask:: <$lanes, 4 , 0 >( ) ) ;
281289 let v1: V = simd_shuffle!( w, w, deinterleave_mask:: <$lanes, 4 , 1 >( ) ) ;
@@ -322,8 +330,15 @@ macro_rules! interleaving_store {
322330 simd_shuffle!( $v. 2 , $v. 2 , identity:: <{ $lanes * 2 } >( ) ) ;
323331
324332 type W = Simd <$elem, { $lanes * 3 } >;
333+
334+ // NOTE: repr(simd) adds padding to make the total size a power of two.
335+ // Hence writing W to ptr might write out of bounds.
325336 let w: W = simd_shuffle!( v0v1, v2v2, interleave_mask:: <{ $lanes * 3 } , $lanes, 3 >( ) ) ;
326- $crate:: ptr:: write_unaligned( $ptr as * mut W , w) ;
337+ $crate:: ptr:: copy_nonoverlapping(
338+ ( & w as * const W ) . cast:: <$elem>( ) ,
339+ $ptr. cast:: <$elem>( ) ,
340+ $lanes * 3 ,
341+ ) ;
327342 } } ;
328343
329344 // N = 4
0 commit comments