diff options
author | CoprDistGit <infra@openeuler.org> | 2024-08-01 14:23:42 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2024-08-01 14:23:42 +0000 |
commit | 82711f6567ef069eebb942e382e2c3fa61fbf538 (patch) | |
tree | 22200b7326b32ca672ffb6e4ce6d19a09dc476e5 /gcc48-rh1469697-16.patch | |
parent | 5d624aa0d36abe76a344f0593eae5cf36d083b15 (diff) |
automatic import of compat-libgfortran-48openeuler24.03_LTSopeneuler23.09
Diffstat (limited to 'gcc48-rh1469697-16.patch')
-rw-r--r-- | gcc48-rh1469697-16.patch | 462 |
1 files changed, 462 insertions, 0 deletions
diff --git a/gcc48-rh1469697-16.patch b/gcc48-rh1469697-16.patch new file mode 100644 index 0000000..423f448 --- /dev/null +++ b/gcc48-rh1469697-16.patch @@ -0,0 +1,462 @@ +commit 5d7a77ede3e91948ee125bd82533d7e692543fff +Author: Jeff Law <law@redhat.com> +Date: Mon Oct 2 13:43:01 2017 -0600 + + aarch64 support + +diff --git a/gcc/config/aarch64/.aarch64.c.rej.swp b/gcc/config/aarch64/.aarch64.c.rej.swp +new file mode 100644 +index 00000000000..b899e21b855 +Binary files /dev/null and b/gcc/config/aarch64/.aarch64.c.rej.swp differ +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index cadf193cfcf..e08632ffa88 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -45,6 +45,8 @@ + #include "gimple.h" + #include "optabs.h" + #include "dwarf2.h" ++#include "params.h" ++#include "dumpfile.h" + + /* Classifies an address. + +@@ -1696,7 +1698,14 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2) + output_asm_insn ("sub\t%0, %0, %1", xops); + + /* Probe at TEST_ADDR. */ +- output_asm_insn ("str\txzr, [%0]", xops); ++ if (flag_stack_clash_protection) ++ { ++ gcc_assert (xops[0] == stack_pointer_rtx); ++ xops[1] = GEN_INT (PROBE_INTERVAL - 8); ++ output_asm_insn ("str\txzr, [%0, %1]", xops); ++ } ++ else ++ output_asm_insn ("str\txzr, [%0]", xops); + + /* Test if TEST_ADDR == LAST_ADDR. */ + xops[1] = reg2; +@@ -2001,6 +2010,123 @@ aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset, + base_rtx, cfi_ops); + } + ++/* Allocate SIZE bytes of stack space using SCRATCH_REG as a scratch ++ register. */ ++ ++static void ++aarch64_allocate_and_probe_stack_space (int scratchreg, HOST_WIDE_INT size) ++{ ++ HOST_WIDE_INT probe_interval ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); ++ HOST_WIDE_INT guard_size ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE); ++ HOST_WIDE_INT guard_used_by_caller = 1024; ++ ++ /* SIZE should be large enough to require probing here. ie, it ++ must be larger than GUARD_SIZE - GUARD_USED_BY_CALLER. ++ ++ We can allocate GUARD_SIZE - GUARD_USED_BY_CALLER as a single chunk ++ without any probing. */ ++ gcc_assert (size >= guard_size - guard_used_by_caller); ++ aarch64_sub_sp (scratchreg, guard_size - guard_used_by_caller, true); ++ HOST_WIDE_INT orig_size = size; ++ size -= (guard_size - guard_used_by_caller); ++ ++ HOST_WIDE_INT rounded_size = size & -probe_interval; ++ HOST_WIDE_INT residual = size - rounded_size; ++ ++ /* We can handle a small number of allocations/probes inline. Otherwise ++ punt to a loop. */ ++ if (rounded_size && rounded_size <= 4 * probe_interval) ++ { ++ /* We don't use aarch64_sub_sp here because we don't want to ++ repeatedly load SCRATCHREG. */ ++ rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg); ++ if (probe_interval > ARITH_FACTOR) ++ emit_move_insn (scratch_rtx, GEN_INT (-probe_interval)); ++ else ++ scratch_rtx = GEN_INT (-probe_interval); ++ ++ for (HOST_WIDE_INT i = 0; i < rounded_size; i += probe_interval) ++ { ++ rtx insn = emit_insn (gen_add2_insn (stack_pointer_rtx, scratch_rtx)); ++ add_reg_note (insn, REG_STACK_CHECK, const0_rtx); ++ ++ if (probe_interval > ARITH_FACTOR) ++ { ++ RTX_FRAME_RELATED_P (insn) = 1; ++ rtx adj = plus_constant (Pmode, stack_pointer_rtx, -probe_interval); ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, ++ gen_rtx_SET (VOIDmode, stack_pointer_rtx, adj)); ++ } ++ ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ (probe_interval ++ - GET_MODE_SIZE (word_mode)))); ++ emit_insn (gen_blockage ()); ++ } ++ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size); ++ } ++ else if (rounded_size) ++ { ++ /* Compute the ending address. */ ++ rtx temp = gen_rtx_REG (word_mode, scratchreg); ++ emit_move_insn (temp, GEN_INT (-rounded_size)); ++ rtx insn = emit_insn (gen_add3_insn (temp, stack_pointer_rtx, temp)); ++ ++ /* For the initial allocation, we don't have a frame pointer ++ set up, so we always need CFI notes. If we're doing the ++ final allocation, then we may have a frame pointer, in which ++ case it is the CFA, otherwise we need CFI notes. ++ ++ We can determine which allocation we are doing by looking at ++ the temporary register. IP0 is the initial allocation, IP1 ++ is the final allocation. */ ++ if (scratchreg == IP0_REGNUM || !frame_pointer_needed) ++ { ++ /* We want the CFA independent of the stack pointer for the ++ duration of the loop. */ ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ plus_constant (Pmode, temp, ++ (rounded_size + (orig_size - size)))); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ ++ /* This allocates and probes the stack. ++ ++ It also probes at a 4k interval regardless of the value of ++ PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL. */ ++ insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx, ++ stack_pointer_rtx, temp)); ++ ++ /* Now reset the CFA register if needed. */ ++ if (scratchreg == IP0_REGNUM || !frame_pointer_needed) ++ { ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ plus_constant (Pmode, stack_pointer_rtx, ++ (rounded_size + (orig_size - size)))); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ ++ emit_insn (gen_blockage ()); ++ dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); ++ } ++ else ++ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size); ++ ++ /* Handle any residuals. ++ Note that any residual must be probed. */ ++ if (residual) ++ { ++ aarch64_sub_sp (scratchreg, residual, true); ++ add_reg_note (get_last_insn (), REG_STACK_CHECK, const0_rtx); ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ (residual - GET_MODE_SIZE (word_mode)))); ++ emit_insn (gen_blockage ()); ++ } ++ return; ++} ++ + /* AArch64 stack frames generated by this compiler look like: + + +-------------------------------+ +@@ -2073,6 +2199,44 @@ aarch64_expand_prologue (void) + - original_frame_size + - cfun->machine->frame.saved_regs_size); + ++ /* We do not fully protect aarch64 against stack clash style attacks ++ as doing so would be prohibitively expensive with less utility over ++ time as newer compilers are deployed. ++ ++ We assume the guard is at least 64k. Furthermore, we assume that ++ the caller has not pushed the stack pointer more than 1k into ++ the guard. A caller that pushes the stack pointer than 1k into ++ the guard is considered invalid. ++ ++ Note that the caller's ability to push the stack pointer into the ++ guard is a function of the number and size of outgoing arguments and/or ++ dynamic stack allocations due to the mandatory save of the link register ++ in the caller's frame. ++ ++ With those assumptions the callee can allocate up to 63k of stack ++ space without probing. ++ ++ When probing is needed, we emit a probe at the start of the prologue ++ and every PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes thereafter. ++ ++ We have to track how much space has been allocated, but we do not ++ track stores into the stack as implicit probes except for the ++ fp/lr store. */ ++ HOST_WIDE_INT guard_size ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE); ++ HOST_WIDE_INT guard_used_by_caller = 1024; ++ HOST_WIDE_INT final_adjust = crtl->outgoing_args_size; ++ HOST_WIDE_INT initial_adjust = frame_size; ++ ++ if (flag_stack_clash_protection) ++ { ++ if (initial_adjust == 0) ++ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); ++ else if (offset < guard_size - guard_used_by_caller ++ && final_adjust < guard_size - guard_used_by_caller) ++ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); ++ } ++ + /* Store pairs and load pairs have a range only -512 to 504. */ + if (offset >= 512) + { +@@ -2089,7 +2253,10 @@ aarch64_expand_prologue (void) + frame_size -= (offset + crtl->outgoing_args_size); + fp_offset = 0; + +- if (frame_size >= 0x1000000) ++ if (flag_stack_clash_protection ++ && frame_size >= guard_size - guard_used_by_caller) ++ aarch64_allocate_and_probe_stack_space (IP0_REGNUM, frame_size); ++ else if (frame_size >= 0x1000000) + { + rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); + emit_move_insn (op0, GEN_INT (-frame_size)); +@@ -2206,10 +2373,30 @@ aarch64_expand_prologue (void) + { + if (crtl->outgoing_args_size > 0) + { +- insn = emit_insn (gen_add2_insn +- (stack_pointer_rtx, +- GEN_INT (- crtl->outgoing_args_size))); +- RTX_FRAME_RELATED_P (insn) = 1; ++ if (flag_stack_clash_protection) ++ { ++ /* First probe if the final adjustment is larger than the ++ guard size less the amount of guard reserved for use by ++ the caller's outgoing args. */ ++ if (final_adjust >= guard_size - guard_used_by_caller) ++ aarch64_allocate_and_probe_stack_space (IP1_REGNUM, ++ final_adjust); ++ else ++ aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed); ++ ++ /* We must also probe if the final adjustment is larger than the ++ guard that is assumed used by the caller. This may be ++ sub-optimal. */ ++ if (final_adjust >= guard_used_by_caller) ++ { ++ if (dump_file) ++ fprintf (dump_file, ++ "Stack clash aarch64 large outgoing arg, probing\n"); ++ emit_stack_probe (stack_pointer_rtx); ++ } ++ } ++ else ++ aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed); + } + } + } +@@ -5088,6 +5275,12 @@ aarch64_override_options (void) + #endif + } + ++ /* We assume the guard page is 64k. */ ++ maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE, ++ 16, ++ global_options.x_param_values, ++ global_options_set.x_param_values); ++ + aarch64_override_options_after_change (); + } + +@@ -8161,6 +8354,28 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, + return ret; + } + ++/* It has been decided that to allow up to 1kb of outgoing argument ++ space to be allocated w/o probing. If more than 1kb of outgoing ++ argment space is allocated, then it must be probed and the last ++ probe must occur no more than 1kbyte away from the end of the ++ allocated space. ++ ++ This implies that the residual part of an alloca allocation may ++ need probing in cases where the generic code might not otherwise ++ think a probe is needed. ++ ++ This target hook returns TRUE when allocating RESIDUAL bytes of ++ alloca space requires an additional probe, otherwise FALSE is ++ returned. */ ++ ++static bool ++aarch64_stack_clash_protection_final_dynamic_probe (rtx residual) ++{ ++ return (residual == CONST0_RTX (Pmode) ++ || GET_CODE (residual) != CONST_INT ++ || INTVAL (residual) >= 1024); ++} ++ + #undef TARGET_ADDRESS_COST + #define TARGET_ADDRESS_COST aarch64_address_cost + +@@ -8378,6 +8593,10 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, + #undef TARGET_FIXED_CONDITION_CODE_REGS + #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs + ++#undef TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE ++#define TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE \ ++ aarch64_stack_clash_protection_final_dynamic_probe ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + #include "gt-aarch64.h" +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index a085c6acaf5..5485a5f70b1 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -3401,7 +3401,7 @@ + ) + + (define_insn "probe_stack_range" +- [(set (match_operand:DI 0 "register_operand" "=r") ++ [(set (match_operand:DI 0 "register_operand" "=rk") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "register_operand" "r")] + UNSPECV_PROBE_STACK_RANGE))] +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-12.c b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c +new file mode 100644 +index 00000000000..2ce38483b6b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++extern void arf (unsigned long int *, unsigned long int *); ++void ++frob () ++{ ++ unsigned long int num[1000]; ++ unsigned long int den[1000]; ++ arf (den, num); ++} ++ ++/* This verifies that the scheduler did not break the dependencies ++ by adjusting the offsets within the probe and that the scheduler ++ did not reorder around the stack probes. */ ++/* { dg-final { scan-assembler-times "sub\\tsp, sp, #4096\\n\\tstr\\txzr, .sp, 4088." 3 } } */ ++ ++ ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-13.c b/gcc/testsuite/gcc.target/aarch64/stack-check-13.c +new file mode 100644 +index 00000000000..d8886835989 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-13.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++#define ARG32(X) X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X ++#define ARG192(X) ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X) ++void out1(ARG192(__int128)); ++int t1(int); ++ ++int t3(int x) ++{ ++ if (x < 1000) ++ return t1 (x) + 1; ++ ++ out1 (ARG192(1)); ++ return 0; ++} ++ ++ ++ ++/* This test creates a large (> 1k) outgoing argument area that needs ++ to be probed. We don't test the exact size of the space or the ++ exact offset to make the test a little less sensitive to trivial ++ output changes. */ ++/* { dg-final { scan-assembler-times "sub\\tsp, sp, #....\\n\\tstr\\txzr, \\\[sp" 1 } } */ ++ ++ ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-14.c b/gcc/testsuite/gcc.target/aarch64/stack-check-14.c +new file mode 100644 +index 00000000000..59ffe01376d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-14.c +@@ -0,0 +1,25 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++int t1(int); ++ ++int t2(int x) ++{ ++ char *p = __builtin_alloca (4050); ++ x = t1 (x); ++ return p[x]; ++} ++ ++ ++/* This test has a constant sized alloca that is smaller than the ++ probe interval. But it actually requires two probes instead ++ of one because of the optimistic assumptions we made in the ++ aarch64 prologue code WRT probing state. ++ ++ The form can change quite a bit so we just check for two ++ probes without looking at the actual address. */ ++/* { dg-final { scan-assembler-times "str\\txzr," 2 } } */ ++ ++ ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-15.c b/gcc/testsuite/gcc.target/aarch64/stack-check-15.c +new file mode 100644 +index 00000000000..e06db6dc2f0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-15.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++int t1(int); ++ ++int t2(int x) ++{ ++ char *p = __builtin_alloca (x); ++ x = t1 (x); ++ return p[x]; ++} ++ ++ ++/* This test has a variable sized alloca. It requires 3 probes. ++ One in the loop, one for the residual and at the end of the ++ alloca area. ++ ++ The form can change quite a bit so we just check for two ++ probes without looking at the actual address. */ ++/* { dg-final { scan-assembler-times "str\\txzr," 3 } } */ ++ ++ ++ +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index aba99513ed0..a8451c98b08 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -5420,14 +5420,9 @@ proc check_effective_target_autoincdec { } { + # + proc check_effective_target_supports_stack_clash_protection { } { + +- # Temporary until the target bits are fully ACK'd. +-# if { [istarget aarch*-*-*] } { +-# return 1 +-# } +- + if { [istarget x86_64-*-*] || [istarget i?86-*-*] + || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] +- || [istarget s390*-*-*] } { ++ || [istarget aarch64*-**] || [istarget s390*-*-*] } { + return 1 + } + return 0 |