1- use rustc_abi:: { Align , Endian , HasDataLayout , Size } ;
1+ use rustc_abi:: { Align , BackendRepr , Endian , HasDataLayout , Primitive , Size , TyAndLayout } ;
2+ use rustc_codegen_ssa:: MemFlags ;
23use rustc_codegen_ssa:: common:: IntPredicate ;
34use rustc_codegen_ssa:: mir:: operand:: OperandRef ;
4- use rustc_codegen_ssa:: traits:: { BaseTypeCodegenMethods , BuilderMethods , ConstCodegenMethods } ;
5+ use rustc_codegen_ssa:: traits:: {
6+ BaseTypeCodegenMethods , BuilderMethods , ConstCodegenMethods , LayoutTypeCodegenMethods ,
7+ } ;
58use rustc_middle:: ty:: Ty ;
69use rustc_middle:: ty:: layout:: { HasTyCtxt , LayoutOf } ;
710
@@ -303,6 +306,298 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
303306 bx. load ( val_type, val_addr, layout. align . abi )
304307}
305308
309+ fn emit_x86_64_sysv64_va_arg < ' ll , ' tcx > (
310+ bx : & mut Builder < ' _ , ' ll , ' tcx > ,
311+ list : OperandRef < ' tcx , & ' ll Value > ,
312+ target_ty : Ty < ' tcx > ,
313+ ) -> & ' ll Value {
314+ let dl = bx. cx . data_layout ( ) ;
315+
316+ // Implementation of the systemv x86_64 ABI calling convention for va_args, see
317+ // https://gitlab.com/x86-psABIs/x86-64-ABI (section 3.5.7). This implementation is heavily
318+ // based on the one in clang.
319+
320+ // We're able to take some shortcuts because the return type of `va_arg` must implement the
321+ // `VaArgSafe` trait. Currently, only pointers, f64, i32, u32, i64 and u64 implement this trait.
322+
323+ // typedef struct __va_list_tag {
324+ // unsigned int gp_offset;
325+ // unsigned int fp_offset;
326+ // void *overflow_arg_area;
327+ // void *reg_save_area;
328+ // } va_list[1];
329+ let va_list_addr = list. immediate ( ) ;
330+
331+ // Peel off any newtype wrappers.
332+ let layout = {
333+ let mut layout = bx. cx . layout_of ( target_ty) ;
334+
335+ while let Some ( ( _, inner) ) = layout. non_1zst_field ( bx. cx ) {
336+ layout = inner;
337+ }
338+
339+ layout
340+ } ;
341+
342+ // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
343+ // in the registers. If not go to step 7.
344+
345+ // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
346+ // general purpose registers needed to pass type and num_fp to hold
347+ // the number of floating point registers needed.
348+
349+ let mut num_gp_registers = 0 ;
350+ let mut num_fp_registers = 0 ;
351+
352+ let mut registers_for_primitive = |p| match p {
353+ Primitive :: Int ( integer, _is_signed) => {
354+ num_gp_registers += integer. size ( ) . bytes ( ) . div_ceil ( 8 ) as u32 ;
355+ }
356+ Primitive :: Float ( float) => {
357+ num_fp_registers += float. size ( ) . bytes ( ) . div_ceil ( 16 ) as u32 ;
358+ }
359+ Primitive :: Pointer ( _) => {
360+ num_gp_registers += 1 ;
361+ }
362+ } ;
363+
364+ match layout. layout . backend_repr ( ) {
365+ BackendRepr :: Scalar ( scalar) => {
366+ registers_for_primitive ( scalar. primitive ( ) ) ;
367+ }
368+ BackendRepr :: ScalarPair ( scalar1, scalar2) => {
369+ registers_for_primitive ( scalar1. primitive ( ) ) ;
370+ registers_for_primitive ( scalar2. primitive ( ) ) ;
371+ }
372+ BackendRepr :: SimdVector { .. } => {
373+ // Because no instance of VaArgSafe uses a non-scalar `BackendRepr`.
374+ unreachable ! (
375+ "No x86-64 SysV va_arg implementation for {:?}" ,
376+ layout. layout. backend_repr( )
377+ )
378+ }
379+ BackendRepr :: Memory { .. } => {
380+ let mem_addr = x86_64_sysv64_va_arg_from_memory ( bx, va_list_addr, layout) ;
381+ return bx. load ( layout. llvm_type ( bx) , mem_addr, layout. align . abi ) ;
382+ }
383+ } ;
384+
385+ // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
386+ // registers. In the case: l->gp_offset > 48 - num_gp * 8 or
387+ // l->fp_offset > 176 - num_fp * 16 go to step 7.
388+
389+ let unsigned_int_offset = 4 ;
390+ let ptr_offset = 8 ;
391+ let gp_offset_ptr = va_list_addr;
392+ let fp_offset_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( unsigned_int_offset) ) ;
393+
394+ let gp_offset_v = bx. load ( bx. type_i32 ( ) , gp_offset_ptr, Align :: from_bytes ( 8 ) . unwrap ( ) ) ;
395+ let fp_offset_v = bx. load ( bx. type_i32 ( ) , fp_offset_ptr, Align :: from_bytes ( 4 ) . unwrap ( ) ) ;
396+
397+ let mut use_regs = bx. const_bool ( false ) ;
398+
399+ if num_gp_registers > 0 {
400+ let max_offset_val = 48u32 - num_gp_registers * 8 ;
401+ let fits_in_gp = bx. icmp ( IntPredicate :: IntULE , gp_offset_v, bx. const_u32 ( max_offset_val) ) ;
402+ use_regs = fits_in_gp;
403+ }
404+
405+ if num_fp_registers > 0 {
406+ let max_offset_val = 176u32 - num_fp_registers * 16 ;
407+ let fits_in_fp = bx. icmp ( IntPredicate :: IntULE , fp_offset_v, bx. const_u32 ( max_offset_val) ) ;
408+ use_regs = if num_gp_registers > 0 { bx. and ( use_regs, fits_in_fp) } else { fits_in_fp } ;
409+ }
410+
411+ let in_reg = bx. append_sibling_block ( "va_arg.in_reg" ) ;
412+ let in_mem = bx. append_sibling_block ( "va_arg.in_mem" ) ;
413+ let end = bx. append_sibling_block ( "va_arg.end" ) ;
414+
415+ bx. cond_br ( use_regs, in_reg, in_mem) ;
416+
417+ // Emit code to load the value if it was passed in a register.
418+ bx. switch_to_block ( in_reg) ;
419+
420+ // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
421+ // an offset of l->gp_offset and/or l->fp_offset. This may require
422+ // copying to a temporary location in case the parameter is passed
423+ // in different register classes or requires an alignment greater
424+ // than 8 for general purpose registers and 16 for XMM registers.
425+ //
426+ // FIXME(llvm): This really results in shameful code when we end up needing to
427+ // collect arguments from different places; often what should result in a
428+ // simple assembling of a structure from scattered addresses has many more
429+ // loads than necessary. Can we clean this up?
430+ let reg_save_area_ptr =
431+ bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( 2 * unsigned_int_offset + ptr_offset) ) ;
432+ let reg_save_area_v = bx. load ( bx. type_ptr ( ) , reg_save_area_ptr, dl. pointer_align . abi ) ;
433+
434+ let reg_addr = match layout. layout . backend_repr ( ) {
435+ BackendRepr :: Scalar ( scalar) => match scalar. primitive ( ) {
436+ Primitive :: Int ( _, _) | Primitive :: Pointer ( _) => {
437+ let reg_addr = bx. inbounds_ptradd ( reg_save_area_v, gp_offset_v) ;
438+
439+ // Copy into a temporary if the type is more aligned than the register save area.
440+ copy_to_temporary_if_more_aligned ( bx, reg_addr, layout)
441+ }
442+ Primitive :: Float ( _) => bx. inbounds_ptradd ( reg_save_area_v, fp_offset_v) ,
443+ } ,
444+ BackendRepr :: ScalarPair ( scalar1, scalar2) => {
445+ let ty_lo = bx. cx ( ) . scalar_pair_element_backend_type ( layout, 0 , false ) ;
446+ let ty_hi = bx. cx ( ) . scalar_pair_element_backend_type ( layout, 1 , false ) ;
447+
448+ let align_lo = layout. field ( bx. cx , 0 ) . layout . align ( ) . abi ;
449+ let align_hi = layout. field ( bx. cx , 1 ) . layout . align ( ) . abi ;
450+
451+ match ( scalar1. primitive ( ) , scalar2. primitive ( ) ) {
452+ ( Primitive :: Float ( _) , Primitive :: Float ( _) ) => {
453+ // SSE registers are spaced 16 bytes apart in the register save
454+ // area, we need to collect the two eightbytes together.
455+ // The ABI isn't explicit about this, but it seems reasonable
456+ // to assume that the slots are 16-byte aligned, since the stack is
457+ // naturally 16-byte aligned and the prologue is expected to store
458+ // all the SSE registers to the RSA.
459+ let reg_lo_addr = bx. inbounds_ptradd ( reg_save_area_v, fp_offset_v) ;
460+ let reg_hi_addr = bx. inbounds_ptradd ( reg_lo_addr, bx. const_i32 ( 16 ) ) ;
461+
462+ let align = layout. layout . align ( ) . abi ;
463+ let tmp = bx. alloca ( layout. layout . size ( ) , align) ;
464+
465+ let reg_lo = bx. load ( ty_lo, reg_lo_addr, align_lo) ;
466+ let reg_hi = bx. load ( ty_hi, reg_hi_addr, align_hi) ;
467+
468+ let offset = scalar1. size ( bx. cx ) . align_to ( align_hi) . bytes ( ) ;
469+ let field0 = tmp;
470+ let field1 = bx. inbounds_ptradd ( tmp, bx. const_u32 ( offset as u32 ) ) ;
471+
472+ bx. store ( reg_lo, field0, align) ;
473+ bx. store ( reg_hi, field1, align) ;
474+
475+ tmp
476+ }
477+ ( Primitive :: Float ( _) , _) | ( _, Primitive :: Float ( _) ) => {
478+ let gp_addr = bx. inbounds_ptradd ( reg_save_area_v, gp_offset_v) ;
479+ let fp_addr = bx. inbounds_ptradd ( reg_save_area_v, fp_offset_v) ;
480+
481+ let ( reg_lo_addr, reg_hi_addr) = match scalar1. primitive ( ) {
482+ Primitive :: Float ( _) => ( fp_addr, gp_addr) ,
483+ Primitive :: Int ( _, _) | Primitive :: Pointer ( _) => ( gp_addr, fp_addr) ,
484+ } ;
485+
486+ let tmp = bx. alloca ( layout. layout . size ( ) , layout. layout . align ( ) . abi ) ;
487+
488+ let reg_lo = bx. load ( ty_lo, reg_lo_addr, align_lo) ;
489+ let reg_hi = bx. load ( ty_hi, reg_hi_addr, align_hi) ;
490+
491+ let offset = scalar1. size ( bx. cx ) . align_to ( align_hi) . bytes ( ) ;
492+ let field0 = tmp;
493+ let field1 = bx. inbounds_ptradd ( tmp, bx. const_u32 ( offset as u32 ) ) ;
494+
495+ bx. store ( reg_lo, field0, align_lo) ;
496+ bx. store ( reg_hi, field1, align_hi) ;
497+
498+ tmp
499+ }
500+ ( _, _) => {
501+ // Two integer/pointer values are just contiguous in memory.
502+ let reg_addr = bx. inbounds_ptradd ( reg_save_area_v, gp_offset_v) ;
503+
504+ // Copy into a temporary if the type is more aligned than the register save area.
505+ copy_to_temporary_if_more_aligned ( bx, reg_addr, layout)
506+ }
507+ }
508+ }
509+ BackendRepr :: SimdVector { .. } => {
510+ unreachable ! ( "panics in the previous match on `backend_repr`" )
511+ }
512+ BackendRepr :: Memory { .. } => {
513+ unreachable ! ( "early returns in the previous match on `backend_repr`" )
514+ }
515+ } ;
516+
517+ // AMD64-ABI 3.5.7p5: Step 5. Set:
518+ // l->gp_offset = l->gp_offset + num_gp * 8
519+ if num_gp_registers > 0 {
520+ let offset = bx. const_u32 ( num_gp_registers * 8 ) ;
521+ let sum = bx. add ( gp_offset_v, offset) ;
522+ bx. store ( sum, gp_offset_ptr, Align :: from_bytes ( 8 ) . unwrap ( ) ) ;
523+ }
524+
525+ // l->fp_offset = l->fp_offset + num_fp * 16.
526+ if num_fp_registers > 0 {
527+ let offset = bx. const_u32 ( num_fp_registers * 16 ) ;
528+ let sum = bx. add ( fp_offset_v, offset) ;
529+ bx. store ( sum, fp_offset_ptr, Align :: from_bytes ( 4 ) . unwrap ( ) ) ;
530+ }
531+
532+ bx. br ( end) ;
533+
534+ bx. switch_to_block ( in_mem) ;
535+ let mem_addr = x86_64_sysv64_va_arg_from_memory ( bx, va_list_addr, layout) ;
536+ bx. br ( end) ;
537+
538+ bx. switch_to_block ( end) ;
539+
540+ let val_type = layout. llvm_type ( bx) ;
541+ let val_addr = bx. phi ( bx. type_ptr ( ) , & [ reg_addr, mem_addr] , & [ in_reg, in_mem] ) ;
542+
543+ bx. load ( val_type, val_addr, layout. align . abi )
544+ }
545+
546+ /// Copy into a temporary if the type is more aligned than the register save area.
547+ fn copy_to_temporary_if_more_aligned < ' ll , ' tcx > (
548+ bx : & mut Builder < ' _ , ' ll , ' tcx > ,
549+ reg_addr : & ' ll Value ,
550+ layout : TyAndLayout < ' tcx , Ty < ' tcx > > ,
551+ ) -> & ' ll Value {
552+ if layout. layout . align . abi . bytes ( ) > 8 {
553+ let tmp = bx. alloca ( layout. layout . size ( ) , layout. layout . align ( ) . abi ) ;
554+ bx. memcpy (
555+ tmp,
556+ layout. layout . align . abi ,
557+ reg_addr,
558+ Align :: from_bytes ( 8 ) . unwrap ( ) ,
559+ bx. const_u32 ( layout. layout . size ( ) . bytes ( ) as u32 ) ,
560+ MemFlags :: empty ( ) ,
561+ ) ;
562+ tmp
563+ } else {
564+ reg_addr
565+ }
566+ }
567+
568+ fn x86_64_sysv64_va_arg_from_memory < ' ll , ' tcx > (
569+ bx : & mut Builder < ' _ , ' ll , ' tcx > ,
570+ va_list_addr : & ' ll Value ,
571+ layout : TyAndLayout < ' tcx , Ty < ' tcx > > ,
572+ ) -> & ' ll Value {
573+ let dl = bx. cx . data_layout ( ) ;
574+
575+ let overflow_arg_area_ptr = bx. inbounds_ptradd ( va_list_addr, bx. const_usize ( 8 ) ) ;
576+
577+ let overflow_arg_area_v = bx. load ( bx. type_ptr ( ) , overflow_arg_area_ptr, dl. pointer_align . abi ) ;
578+ // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
579+ // byte boundary if alignment needed by type exceeds 8 byte boundary.
580+ // It isn't stated explicitly in the standard, but in practice we use
581+ // alignment greater than 16 where necessary.
582+ if layout. layout . align . abi . bytes ( ) > 8 {
583+ unreachable ! ( "all instances of VaArgSafe have an alignment <= 8" ) ;
584+ }
585+
586+ // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
587+ let mem_addr = overflow_arg_area_v;
588+
589+ // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
590+ // l->overflow_arg_area + sizeof(type).
591+ // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
592+ // an 8 byte boundary.
593+ let size_in_bytes = layout. layout . size ( ) . bytes ( ) ;
594+ let offset = bx. const_i32 ( size_in_bytes. next_multiple_of ( 8 ) as i32 ) ;
595+ let overflow_arg_area = bx. inbounds_ptradd ( overflow_arg_area_v, offset) ;
596+ bx. store ( overflow_arg_area, overflow_arg_area_ptr, dl. pointer_align . abi ) ;
597+
598+ mem_addr
599+ }
600+
306601fn emit_xtensa_va_arg < ' ll , ' tcx > (
307602 bx : & mut Builder < ' _ , ' ll , ' tcx > ,
308603 list : OperandRef < ' tcx , & ' ll Value > ,
@@ -334,8 +629,7 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
334629 // (*va).va_ndx
335630 let va_reg_offset = 4 ;
336631 let va_ndx_offset = va_reg_offset + 4 ;
337- let offset_ptr =
338- bx. inbounds_gep ( bx. type_i8 ( ) , va_list_addr, & [ bx. cx . const_usize ( va_ndx_offset) ] ) ;
632+ let offset_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( va_ndx_offset) ) ;
339633
340634 let offset = bx. load ( bx. type_i32 ( ) , offset_ptr, bx. tcx ( ) . data_layout . i32_align . abi ) ;
341635 let offset = round_up_to_alignment ( bx, offset, layout. align . abi ) ;
@@ -356,11 +650,10 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
356650 bx. store ( offset_next, offset_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
357651
358652 // (*va).va_reg
359- let regsave_area_ptr =
360- bx. inbounds_gep ( bx. type_i8 ( ) , va_list_addr, & [ bx. cx . const_usize ( va_reg_offset) ] ) ;
653+ let regsave_area_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( va_reg_offset) ) ;
361654 let regsave_area =
362655 bx. load ( bx. type_ptr ( ) , regsave_area_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
363- let regsave_value_ptr = bx. inbounds_gep ( bx . type_i8 ( ) , regsave_area, & [ offset] ) ;
656+ let regsave_value_ptr = bx. inbounds_ptradd ( regsave_area, offset) ;
364657 bx. br ( end) ;
365658
366659 bx. switch_to_block ( from_stack) ;
@@ -381,9 +674,9 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
381674 bx. store ( offset_next_corrected, offset_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
382675
383676 // let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) };
384- let stack_area_ptr = bx. inbounds_gep ( bx . type_i8 ( ) , va_list_addr, & [ bx. cx . const_usize ( 0 ) ] ) ;
677+ let stack_area_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( 0 ) ) ;
385678 let stack_area = bx. load ( bx. type_ptr ( ) , stack_area_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
386- let stack_value_ptr = bx. inbounds_gep ( bx . type_i8 ( ) , stack_area, & [ offset_corrected] ) ;
679+ let stack_value_ptr = bx. inbounds_ptradd ( stack_area, offset_corrected) ;
387680 bx. br ( end) ;
388681
389682 bx. switch_to_block ( end) ;
@@ -449,6 +742,8 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
449742 AllowHigherAlign :: No ,
450743 )
451744 }
745+ // This includes `target.is_like_darwin`, which on x86_64 targets is like sysv64.
746+ "x86_64" => emit_x86_64_sysv64_va_arg ( bx, addr, target_ty) ,
452747 "xtensa" => emit_xtensa_va_arg ( bx, addr, target_ty) ,
453748 // For all other architecture/OS combinations fall back to using
454749 // the LLVM va_arg instruction.
0 commit comments