summaryrefslogtreecommitdiff
path: root/gcc48-rh1469697-16.patch
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2024-08-01 14:23:42 +0000
committerCoprDistGit <infra@openeuler.org>2024-08-01 14:23:42 +0000
commit82711f6567ef069eebb942e382e2c3fa61fbf538 (patch)
tree22200b7326b32ca672ffb6e4ce6d19a09dc476e5 /gcc48-rh1469697-16.patch
parent5d624aa0d36abe76a344f0593eae5cf36d083b15 (diff)
automatic import of compat-libgfortran-48openeuler24.03_LTSopeneuler23.09
Diffstat (limited to 'gcc48-rh1469697-16.patch')
-rw-r--r--gcc48-rh1469697-16.patch462
1 files changed, 462 insertions, 0 deletions
diff --git a/gcc48-rh1469697-16.patch b/gcc48-rh1469697-16.patch
new file mode 100644
index 0000000..423f448
--- /dev/null
+++ b/gcc48-rh1469697-16.patch
@@ -0,0 +1,462 @@
+commit 5d7a77ede3e91948ee125bd82533d7e692543fff
+Author: Jeff Law <law@redhat.com>
+Date: Mon Oct 2 13:43:01 2017 -0600
+
+ aarch64 support
+
+diff --git a/gcc/config/aarch64/.aarch64.c.rej.swp b/gcc/config/aarch64/.aarch64.c.rej.swp
+new file mode 100644
+index 00000000000..b899e21b855
+Binary files /dev/null and b/gcc/config/aarch64/.aarch64.c.rej.swp differ
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index cadf193cfcf..e08632ffa88 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -45,6 +45,8 @@
+ #include "gimple.h"
+ #include "optabs.h"
+ #include "dwarf2.h"
++#include "params.h"
++#include "dumpfile.h"
+
+ /* Classifies an address.
+
+@@ -1696,7 +1698,14 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
+ output_asm_insn ("sub\t%0, %0, %1", xops);
+
+ /* Probe at TEST_ADDR. */
+- output_asm_insn ("str\txzr, [%0]", xops);
++ if (flag_stack_clash_protection)
++ {
++ gcc_assert (xops[0] == stack_pointer_rtx);
++ xops[1] = GEN_INT (PROBE_INTERVAL - 8);
++ output_asm_insn ("str\txzr, [%0, %1]", xops);
++ }
++ else
++ output_asm_insn ("str\txzr, [%0]", xops);
+
+ /* Test if TEST_ADDR == LAST_ADDR. */
+ xops[1] = reg2;
+@@ -2001,6 +2010,123 @@ aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
+ base_rtx, cfi_ops);
+ }
+
++/* Allocate SIZE bytes of stack space using SCRATCH_REG as a scratch
++ register. */
++
++static void
++aarch64_allocate_and_probe_stack_space (int scratchreg, HOST_WIDE_INT size)
++{
++ HOST_WIDE_INT probe_interval
++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
++ HOST_WIDE_INT guard_size
++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
++ HOST_WIDE_INT guard_used_by_caller = 1024;
++
++ /* SIZE should be large enough to require probing here. ie, it
++ must be larger than GUARD_SIZE - GUARD_USED_BY_CALLER.
++
++ We can allocate GUARD_SIZE - GUARD_USED_BY_CALLER as a single chunk
++ without any probing. */
++ gcc_assert (size >= guard_size - guard_used_by_caller);
++ aarch64_sub_sp (scratchreg, guard_size - guard_used_by_caller, true);
++ HOST_WIDE_INT orig_size = size;
++ size -= (guard_size - guard_used_by_caller);
++
++ HOST_WIDE_INT rounded_size = size & -probe_interval;
++ HOST_WIDE_INT residual = size - rounded_size;
++
++ /* We can handle a small number of allocations/probes inline. Otherwise
++ punt to a loop. */
++ if (rounded_size && rounded_size <= 4 * probe_interval)
++ {
++ /* We don't use aarch64_sub_sp here because we don't want to
++ repeatedly load SCRATCHREG. */
++ rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
++ if (probe_interval > ARITH_FACTOR)
++ emit_move_insn (scratch_rtx, GEN_INT (-probe_interval));
++ else
++ scratch_rtx = GEN_INT (-probe_interval);
++
++ for (HOST_WIDE_INT i = 0; i < rounded_size; i += probe_interval)
++ {
++ rtx insn = emit_insn (gen_add2_insn (stack_pointer_rtx, scratch_rtx));
++ add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
++
++ if (probe_interval > ARITH_FACTOR)
++ {
++ RTX_FRAME_RELATED_P (insn) = 1;
++ rtx adj = plus_constant (Pmode, stack_pointer_rtx, -probe_interval);
++ add_reg_note (insn, REG_CFA_ADJUST_CFA,
++ gen_rtx_SET (VOIDmode, stack_pointer_rtx, adj));
++ }
++
++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
++ (probe_interval
++ - GET_MODE_SIZE (word_mode))));
++ emit_insn (gen_blockage ());
++ }
++ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
++ }
++ else if (rounded_size)
++ {
++ /* Compute the ending address. */
++ rtx temp = gen_rtx_REG (word_mode, scratchreg);
++ emit_move_insn (temp, GEN_INT (-rounded_size));
++ rtx insn = emit_insn (gen_add3_insn (temp, stack_pointer_rtx, temp));
++
++ /* For the initial allocation, we don't have a frame pointer
++ set up, so we always need CFI notes. If we're doing the
++ final allocation, then we may have a frame pointer, in which
++ case it is the CFA, otherwise we need CFI notes.
++
++ We can determine which allocation we are doing by looking at
++ the temporary register. IP0 is the initial allocation, IP1
++ is the final allocation. */
++ if (scratchreg == IP0_REGNUM || !frame_pointer_needed)
++ {
++ /* We want the CFA independent of the stack pointer for the
++ duration of the loop. */
++ add_reg_note (insn, REG_CFA_DEF_CFA,
++ plus_constant (Pmode, temp,
++ (rounded_size + (orig_size - size))));
++ RTX_FRAME_RELATED_P (insn) = 1;
++ }
++
++ /* This allocates and probes the stack.
++
++ It also probes at a 4k interval regardless of the value of
++ PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL. */
++ insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx,
++ stack_pointer_rtx, temp));
++
++ /* Now reset the CFA register if needed. */
++ if (scratchreg == IP0_REGNUM || !frame_pointer_needed)
++ {
++ add_reg_note (insn, REG_CFA_DEF_CFA,
++ plus_constant (Pmode, stack_pointer_rtx,
++ (rounded_size + (orig_size - size))));
++ RTX_FRAME_RELATED_P (insn) = 1;
++ }
++
++ emit_insn (gen_blockage ());
++ dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
++ }
++ else
++ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
++
++ /* Handle any residuals.
++ Note that any residual must be probed. */
++ if (residual)
++ {
++ aarch64_sub_sp (scratchreg, residual, true);
++ add_reg_note (get_last_insn (), REG_STACK_CHECK, const0_rtx);
++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
++ (residual - GET_MODE_SIZE (word_mode))));
++ emit_insn (gen_blockage ());
++ }
++ return;
++}
++
+ /* AArch64 stack frames generated by this compiler look like:
+
+ +-------------------------------+
+@@ -2073,6 +2199,44 @@ aarch64_expand_prologue (void)
+ - original_frame_size
+ - cfun->machine->frame.saved_regs_size);
+
++ /* We do not fully protect aarch64 against stack clash style attacks
++ as doing so would be prohibitively expensive with less utility over
++ time as newer compilers are deployed.
++
++ We assume the guard is at least 64k. Furthermore, we assume that
++ the caller has not pushed the stack pointer more than 1k into
++ the guard. A caller that pushes the stack pointer than 1k into
++ the guard is considered invalid.
++
++ Note that the caller's ability to push the stack pointer into the
++ guard is a function of the number and size of outgoing arguments and/or
++ dynamic stack allocations due to the mandatory save of the link register
++ in the caller's frame.
++
++ With those assumptions the callee can allocate up to 63k of stack
++ space without probing.
++
++ When probing is needed, we emit a probe at the start of the prologue
++ and every PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes thereafter.
++
++ We have to track how much space has been allocated, but we do not
++ track stores into the stack as implicit probes except for the
++ fp/lr store. */
++ HOST_WIDE_INT guard_size
++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
++ HOST_WIDE_INT guard_used_by_caller = 1024;
++ HOST_WIDE_INT final_adjust = crtl->outgoing_args_size;
++ HOST_WIDE_INT initial_adjust = frame_size;
++
++ if (flag_stack_clash_protection)
++ {
++ if (initial_adjust == 0)
++ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
++ else if (offset < guard_size - guard_used_by_caller
++ && final_adjust < guard_size - guard_used_by_caller)
++ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
++ }
++
+ /* Store pairs and load pairs have a range only -512 to 504. */
+ if (offset >= 512)
+ {
+@@ -2089,7 +2253,10 @@ aarch64_expand_prologue (void)
+ frame_size -= (offset + crtl->outgoing_args_size);
+ fp_offset = 0;
+
+- if (frame_size >= 0x1000000)
++ if (flag_stack_clash_protection
++ && frame_size >= guard_size - guard_used_by_caller)
++ aarch64_allocate_and_probe_stack_space (IP0_REGNUM, frame_size);
++ else if (frame_size >= 0x1000000)
+ {
+ rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
+ emit_move_insn (op0, GEN_INT (-frame_size));
+@@ -2206,10 +2373,30 @@ aarch64_expand_prologue (void)
+ {
+ if (crtl->outgoing_args_size > 0)
+ {
+- insn = emit_insn (gen_add2_insn
+- (stack_pointer_rtx,
+- GEN_INT (- crtl->outgoing_args_size)));
+- RTX_FRAME_RELATED_P (insn) = 1;
++ if (flag_stack_clash_protection)
++ {
++ /* First probe if the final adjustment is larger than the
++ guard size less the amount of guard reserved for use by
++ the caller's outgoing args. */
++ if (final_adjust >= guard_size - guard_used_by_caller)
++ aarch64_allocate_and_probe_stack_space (IP1_REGNUM,
++ final_adjust);
++ else
++ aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
++
++ /* We must also probe if the final adjustment is larger than the
++ guard that is assumed used by the caller. This may be
++ sub-optimal. */
++ if (final_adjust >= guard_used_by_caller)
++ {
++ if (dump_file)
++ fprintf (dump_file,
++ "Stack clash aarch64 large outgoing arg, probing\n");
++ emit_stack_probe (stack_pointer_rtx);
++ }
++ }
++ else
++ aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
+ }
+ }
+ }
+@@ -5088,6 +5275,12 @@ aarch64_override_options (void)
+ #endif
+ }
+
++ /* We assume the guard page is 64k. */
++ maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE,
++ 16,
++ global_options.x_param_values,
++ global_options_set.x_param_values);
++
+ aarch64_override_options_after_change ();
+ }
+
+@@ -8161,6 +8354,28 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+ return ret;
+ }
+
++/* It has been decided that to allow up to 1kb of outgoing argument
++ space to be allocated w/o probing. If more than 1kb of outgoing
++ argment space is allocated, then it must be probed and the last
++ probe must occur no more than 1kbyte away from the end of the
++ allocated space.
++
++ This implies that the residual part of an alloca allocation may
++ need probing in cases where the generic code might not otherwise
++ think a probe is needed.
++
++ This target hook returns TRUE when allocating RESIDUAL bytes of
++ alloca space requires an additional probe, otherwise FALSE is
++ returned. */
++
++static bool
++aarch64_stack_clash_protection_final_dynamic_probe (rtx residual)
++{
++ return (residual == CONST0_RTX (Pmode)
++ || GET_CODE (residual) != CONST_INT
++ || INTVAL (residual) >= 1024);
++}
++
+ #undef TARGET_ADDRESS_COST
+ #define TARGET_ADDRESS_COST aarch64_address_cost
+
+@@ -8378,6 +8593,10 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+ #undef TARGET_FIXED_CONDITION_CODE_REGS
+ #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
+
++#undef TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE
++#define TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE \
++ aarch64_stack_clash_protection_final_dynamic_probe
++
+ struct gcc_target targetm = TARGET_INITIALIZER;
+
+ #include "gt-aarch64.h"
+diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+index a085c6acaf5..5485a5f70b1 100644
+--- a/gcc/config/aarch64/aarch64.md
++++ b/gcc/config/aarch64/aarch64.md
+@@ -3401,7 +3401,7 @@
+ )
+
+ (define_insn "probe_stack_range"
+- [(set (match_operand:DI 0 "register_operand" "=r")
++ [(set (match_operand:DI 0 "register_operand" "=rk")
+ (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "register_operand" "r")]
+ UNSPECV_PROBE_STACK_RANGE))]
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-12.c b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c
+new file mode 100644
+index 00000000000..2ce38483b6b
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c
+@@ -0,0 +1,20 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
++/* { dg-require-effective-target supports_stack_clash_protection } */
++
++extern void arf (unsigned long int *, unsigned long int *);
++void
++frob ()
++{
++ unsigned long int num[1000];
++ unsigned long int den[1000];
++ arf (den, num);
++}
++
++/* This verifies that the scheduler did not break the dependencies
++ by adjusting the offsets within the probe and that the scheduler
++ did not reorder around the stack probes. */
++/* { dg-final { scan-assembler-times "sub\\tsp, sp, #4096\\n\\tstr\\txzr, .sp, 4088." 3 } } */
++
++
++
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-13.c b/gcc/testsuite/gcc.target/aarch64/stack-check-13.c
+new file mode 100644
+index 00000000000..d8886835989
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-13.c
+@@ -0,0 +1,28 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
++/* { dg-require-effective-target supports_stack_clash_protection } */
++
++#define ARG32(X) X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
++#define ARG192(X) ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X)
++void out1(ARG192(__int128));
++int t1(int);
++
++int t3(int x)
++{
++ if (x < 1000)
++ return t1 (x) + 1;
++
++ out1 (ARG192(1));
++ return 0;
++}
++
++
++
++/* This test creates a large (> 1k) outgoing argument area that needs
++ to be probed. We don't test the exact size of the space or the
++ exact offset to make the test a little less sensitive to trivial
++ output changes. */
++/* { dg-final { scan-assembler-times "sub\\tsp, sp, #....\\n\\tstr\\txzr, \\\[sp" 1 } } */
++
++
++
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-14.c b/gcc/testsuite/gcc.target/aarch64/stack-check-14.c
+new file mode 100644
+index 00000000000..59ffe01376d
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-14.c
+@@ -0,0 +1,25 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
++/* { dg-require-effective-target supports_stack_clash_protection } */
++
++int t1(int);
++
++int t2(int x)
++{
++ char *p = __builtin_alloca (4050);
++ x = t1 (x);
++ return p[x];
++}
++
++
++/* This test has a constant sized alloca that is smaller than the
++ probe interval. But it actually requires two probes instead
++ of one because of the optimistic assumptions we made in the
++ aarch64 prologue code WRT probing state.
++
++ The form can change quite a bit so we just check for two
++ probes without looking at the actual address. */
++/* { dg-final { scan-assembler-times "str\\txzr," 2 } } */
++
++
++
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-15.c b/gcc/testsuite/gcc.target/aarch64/stack-check-15.c
+new file mode 100644
+index 00000000000..e06db6dc2f0
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-15.c
+@@ -0,0 +1,24 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
++/* { dg-require-effective-target supports_stack_clash_protection } */
++
++int t1(int);
++
++int t2(int x)
++{
++ char *p = __builtin_alloca (x);
++ x = t1 (x);
++ return p[x];
++}
++
++
++/* This test has a variable sized alloca. It requires 3 probes.
++ One in the loop, one for the residual and at the end of the
++ alloca area.
++
++ The form can change quite a bit so we just check for two
++ probes without looking at the actual address. */
++/* { dg-final { scan-assembler-times "str\\txzr," 3 } } */
++
++
++
+diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
+index aba99513ed0..a8451c98b08 100644
+--- a/gcc/testsuite/lib/target-supports.exp
++++ b/gcc/testsuite/lib/target-supports.exp
+@@ -5420,14 +5420,9 @@ proc check_effective_target_autoincdec { } {
+ #
+ proc check_effective_target_supports_stack_clash_protection { } {
+
+- # Temporary until the target bits are fully ACK'd.
+-# if { [istarget aarch*-*-*] } {
+-# return 1
+-# }
+-
+ if { [istarget x86_64-*-*] || [istarget i?86-*-*]
+ || [istarget powerpc*-*-*] || [istarget rs6000*-*-*]
+- || [istarget s390*-*-*] } {
++ || [istarget aarch64*-**] || [istarget s390*-*-*] } {
+ return 1
+ }
+ return 0