From 347487fffa0266d43bf18f1f91878410881f596e Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 16 Jun 2023 16:55:12 +0100 Subject: [PATCH 22/30] aarch64: Add bytes_below_hard_fp to frame info The frame layout code currently hard-codes the assumption that the number of bytes below the saved registers is equal to the size of the outgoing arguments. This patch abstracts that value into a new field of aarch64_frame. gcc/ * config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New field. * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize it, and use it instead of crtl->outgoing_args_size. (aarch64_get_separate_components): Use bytes_below_hard_fp instead of outgoing_args_size. (aarch64_process_components): Likewise. --- gcc/config/aarch64/aarch64.c | 50 +++++++++++++++++++----------------- gcc/config/aarch64/aarch64.h | 6 ++++- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index e9dad682738f..25cf10cc4b94 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -4684,6 +4684,8 @@ aarch64_layout_frame (void) last_fp_reg = regno; } + cfun->machine->frame.bytes_below_hard_fp = crtl->outgoing_args_size; + if (cfun->machine->frame.emit_frame_chain) { /* FP and LR are placed in the linkage record. */ @@ -4751,11 +4753,11 @@ aarch64_layout_frame (void) STACK_BOUNDARY / BITS_PER_UNIT); /* Both these values are already aligned. */ - gcc_assert (multiple_p (crtl->outgoing_args_size, + gcc_assert (multiple_p (cfun->machine->frame.bytes_below_hard_fp, STACK_BOUNDARY / BITS_PER_UNIT)); cfun->machine->frame.frame_size = (cfun->machine->frame.hard_fp_offset - + crtl->outgoing_args_size); + + cfun->machine->frame.bytes_below_hard_fp); cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size; @@ -4775,23 +4777,23 @@ aarch64_layout_frame (void) cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size; else if (cfun->machine->frame.frame_size.is_constant (&const_size) && const_size < max_push_offset - && known_eq (crtl->outgoing_args_size, 0)) + && known_eq (cfun->machine->frame.bytes_below_hard_fp, 0)) { - /* Simple, small frame with no outgoing arguments: + /* Simple, small frame with no data below the saved registers. stp reg1, reg2, [sp, -frame_size]! stp reg3, reg4, [sp, 16] */ cfun->machine->frame.callee_adjust = const_size; } - else if (known_lt (crtl->outgoing_args_size + else if (known_lt (cfun->machine->frame.bytes_below_hard_fp + cfun->machine->frame.saved_regs_size, 512) && !(cfun->calls_alloca && known_lt (cfun->machine->frame.hard_fp_offset, max_push_offset))) { - /* Frame with small outgoing arguments: + /* Frame with small area below the saved registers: sub sp, sp, frame_size - stp reg1, reg2, [sp, outgoing_args_size] - stp reg3, reg4, [sp, outgoing_args_size + 16] */ + stp reg1, reg2, [sp, bytes_below_hard_fp] + stp reg3, reg4, [sp, bytes_below_hard_fp + 16] */ cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size; cfun->machine->frame.callee_offset = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset; @@ -4799,22 +4801,23 @@ aarch64_layout_frame (void) else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset) && const_fp_offset < max_push_offset) { - /* Frame with large outgoing arguments but a small local area: + /* Frame with large area below the saved registers, but with a + small area above: stp reg1, reg2, [sp, -hard_fp_offset]! stp reg3, reg4, [sp, 16] - sub sp, sp, outgoing_args_size */ + sub sp, sp, bytes_below_hard_fp */ cfun->machine->frame.callee_adjust = const_fp_offset; cfun->machine->frame.final_adjust = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust; } else { - /* Frame with large local area and outgoing arguments using frame pointer: + /* General case: sub sp, sp, hard_fp_offset stp x29, x30, [sp, 0] add x29, sp, 0 stp reg3, reg4, [sp, 16] - sub sp, sp, outgoing_args_size */ + sub sp, sp, bytes_below_hard_fp */ cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset; cfun->machine->frame.final_adjust = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust; @@ -5243,9 +5246,11 @@ aarch64_get_separate_components (void) if (aarch64_register_saved_on_entry (regno)) { poly_int64 offset = cfun->machine->frame.reg_offset[regno]; + + /* Get the offset relative to the register we'll use. */ if (!frame_pointer_needed) - offset += cfun->machine->frame.frame_size - - cfun->machine->frame.hard_fp_offset; + offset += cfun->machine->frame.bytes_below_hard_fp; + /* Check that we can access the stack slot of the register with one direct load with no adjustments needed. */ if (offset_12bit_unsigned_scaled_p (DImode, offset)) @@ -5367,8 +5372,8 @@ aarch64_process_components (sbitmap components, bool prologue_p) rtx reg = gen_rtx_REG (mode, regno); poly_int64 offset = cfun->machine->frame.reg_offset[regno]; if (!frame_pointer_needed) - offset += cfun->machine->frame.frame_size - - cfun->machine->frame.hard_fp_offset; + offset += cfun->machine->frame.bytes_below_hard_fp; + rtx addr = plus_constant (Pmode, ptr_reg, offset); rtx mem = gen_frame_mem (mode, addr); @@ -5410,8 +5415,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) /* REGNO2 can be saved/restored in a pair with REGNO. */ rtx reg2 = gen_rtx_REG (mode, regno2); if (!frame_pointer_needed) - offset2 += cfun->machine->frame.frame_size - - cfun->machine->frame.hard_fp_offset; + offset2 += cfun->machine->frame.bytes_below_hard_fp; rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); rtx mem2 = gen_frame_mem (mode, addr2); rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) @@ -5478,10 +5482,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void) registers. If POLY_SIZE is not large enough to require a probe this function will only adjust the stack. When allocating the stack space FRAME_RELATED_P is then used to indicate if the allocation is frame related. - FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing - arguments. If we are then we ensure that any allocation larger than the ABI - defined buffer needs a probe so that the invariant of having a 1KB buffer is - maintained. + FINAL_ADJUSTMENT_P indicates whether we are allocating the area below + the saved registers. If we are then we ensure that any allocation + larger than the ABI defined buffer needs a probe so that the + invariant of having a 1KB buffer is maintained. We emit barriers after each stack adjustment to prevent optimizations from breaking the invariant that we never drop the stack more than a page. This @@ -5671,7 +5675,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to be probed. This maintains the requirement that each page is probed at least once. For initial probing we probe only if the allocation is - more than GUARD_SIZE - buffer, and for the outgoing arguments we probe + more than GUARD_SIZE - buffer, and below the saved registers we probe if the amount is larger than buffer. GUARD_SIZE - buffer + buffer == GUARD_SIZE. This works that for any allocation that is large enough to trigger a probe here, we'll have at least one, and if they're not large diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index af0bc3f18817..95831637ba73 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -712,9 +712,13 @@ struct GTY (()) aarch64_frame HOST_WIDE_INT saved_varargs_size; /* The size of the saved callee-save int/FP registers. */ - HOST_WIDE_INT saved_regs_size; + /* The number of bytes between the bottom of the static frame (the bottom + of the outgoing arguments) and the hard frame pointer. This value is + always a multiple of STACK_BOUNDARY. */ + poly_int64 bytes_below_hard_fp; + /* Offset from the base of the frame (incomming SP) to the top of the locals area. This value is always a multiple of STACK_BOUNDARY. */ -- 2.42.0