summaryrefslogtreecommitdiff
path: root/0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch
diff options
context:
space:
mode:
Diffstat (limited to '0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch')
-rw-r--r--0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch250
1 files changed, 250 insertions, 0 deletions
diff --git a/0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch b/0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch
new file mode 100644
index 0000000..77f70f4
--- /dev/null
+++ b/0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch
@@ -0,0 +1,250 @@
+From da06b276b6ae281efad2ec3b982e09b1f4015917 Mon Sep 17 00:00:00 2001
+From: Tamar Christina <tamar.christina@arm.com>
+Date: Mon, 12 Dec 2022 15:18:56 +0000
+Subject: [PATCH 082/157] [Backport][SME] AArch64: Support new tbranch optab.
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=17ae956c0fa6baac3d22764019d5dd5ebf5c2b11
+
+This implements the new tbranch optab for AArch64.
+
+we cannot emit one big RTL for the final instruction immediately.
+The reason that all comparisons in the AArch64 backend expand to separate CC
+compares, and separate testing of the operands is for ifcvt.
+
+The separate CC compare is needed so ifcvt can produce csel, cset etc from the
+compares. Unlike say combine, ifcvt can not do recog on a parallel with a
+clobber. Should we emit the instruction directly then ifcvt will not be able
+to say, make a csel, because we have no patterns which handle zero_extract and
+compare. (unlike combine ifcvt cannot transform the extract into an AND).
+
+While you could provide various patterns for this (and I did try) you end up
+with broken patterns because you can't add the clobber to the CC register. If
+you do, ifcvt recog fails.
+
+i.e.
+
+int
+f1 (int x)
+{
+ if (x & 1)
+ return 1;
+ return x;
+}
+
+We lose csel here.
+
+Secondly the reason the compare with an explicit CC mode is needed is so that
+ifcvt can transform the operation into a version that doesn't require the flags
+to be set. But it only does so if it know the explicit usage of the CC reg.
+
+For instance
+
+int
+foo (int a, int b)
+{
+ return ((a & (1 << 25)) ? 5 : 4);
+}
+
+Doesn't require a comparison, the optimal form is:
+
+foo(int, int):
+ ubfx x0, x0, 25, 1
+ add w0, w0, 4
+ ret
+
+and no compare is actually needed. If you represent the instruction using an
+ANDS instead of a zero_extract then you get close, but you end up with an ands
+followed by an add, which is a slower operation.
+
+gcc/ChangeLog:
+
+ * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
+ (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
+ (tbranch_<code><mode>4): New.
+ * config/aarch64/iterators.md(ZEROM, zerom): New.
+
+gcc/testsuite/ChangeLog:
+
+ * gcc.target/aarch64/tbz_1.c: New test.
+---
+ gcc/config/aarch64/aarch64.md | 33 ++++++--
+ gcc/config/aarch64/iterators.md | 2 +
+ gcc/testsuite/gcc.target/aarch64/tbz_1.c | 95 ++++++++++++++++++++++++
+ 3 files changed, 122 insertions(+), 8 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/tbz_1.c
+
+diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+index 079c8a3f9..2becc888e 100644
+--- a/gcc/config/aarch64/aarch64.md
++++ b/gcc/config/aarch64/aarch64.md
+@@ -953,12 +953,29 @@
+ (const_int 1)))]
+ )
+
+-(define_insn "*tb<optab><mode>1"
++(define_expand "tbranch_<code><mode>3"
+ [(set (pc) (if_then_else
+- (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
+- (const_int 1)
+- (match_operand 1
+- "aarch64_simd_shift_imm_<mode>" "n"))
++ (EQL (match_operand:ALLI 0 "register_operand")
++ (match_operand 1 "aarch64_simd_shift_imm_<mode>"))
++ (label_ref (match_operand 2 ""))
++ (pc)))]
++ ""
++{
++ rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
++ rtx reg = gen_lowpart (<ZEROM>mode, operands[0]);
++ rtx val = GEN_INT (1UL << UINTVAL (operands[1]));
++ emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
++ operands[1] = const0_rtx;
++ operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
++ operands[1]);
++})
++
++(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
++ [(set (pc) (if_then_else
++ (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
++ (const_int 1)
++ (match_operand 1
++ "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
+ (const_int 0))
+ (label_ref (match_operand 2 "" ""))
+ (pc)))
+@@ -969,15 +986,15 @@
+ {
+ if (get_attr_far_branch (insn) == 1)
+ return aarch64_gen_far_branch (operands, 2, "Ltb",
+- "<inv_tb>\\t%<w>0, %1, ");
++ "<inv_tb>\\t%<ALLI:w>0, %1, ");
+ else
+ {
+ operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
+- return "tst\t%<w>0, %1\;<bcond>\t%l2";
++ return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
+ }
+ }
+ else
+- return "<tbz>\t%<w>0, %1, %l2";
++ return "<tbz>\t%<ALLI:w>0, %1, %l2";
+ }
+ [(set_attr "type" "branch")
+ (set (attr "length")
+diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
+index 226dea48a..b616f5c9a 100644
+--- a/gcc/config/aarch64/iterators.md
++++ b/gcc/config/aarch64/iterators.md
+@@ -1104,6 +1104,8 @@
+
+ ;; Give the number of bits in the mode
+ (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
++(define_mode_attr ZEROM [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
++(define_mode_attr zerom [(QI "si") (HI "si") (SI "si") (DI "di")])
+
+ ;; Give the ordinal of the MSB in the mode
+ (define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")
+diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
+new file mode 100644
+index 000000000..39deb58e2
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
+@@ -0,0 +1,95 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-O2 -std=c99 -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
++/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
++
++#include <stdbool.h>
++
++void h(void);
++
++/*
++** g1:
++** tbnz w[0-9]+, #?0, .L([0-9]+)
++** ret
++** ...
++*/
++void g1(bool x)
++{
++ if (__builtin_expect (x, 0))
++ h ();
++}
++
++/*
++** g2:
++** tbz w[0-9]+, #?0, .L([0-9]+)
++** b h
++** ...
++*/
++void g2(bool x)
++{
++ if (__builtin_expect (x, 1))
++ h ();
++}
++
++/*
++** g3_ge:
++** tbnz w[0-9]+, #?31, .L[0-9]+
++** b h
++** ...
++*/
++void g3_ge(int x)
++{
++ if (__builtin_expect (x >= 0, 1))
++ h ();
++}
++
++/*
++** g3_gt:
++** cmp w[0-9]+, 0
++** ble .L[0-9]+
++** b h
++** ...
++*/
++void g3_gt(int x)
++{
++ if (__builtin_expect (x > 0, 1))
++ h ();
++}
++
++/*
++** g3_lt:
++** tbz w[0-9]+, #?31, .L[0-9]+
++** b h
++** ...
++*/
++void g3_lt(int x)
++{
++ if (__builtin_expect (x < 0, 1))
++ h ();
++}
++
++/*
++** g3_le:
++** cmp w[0-9]+, 0
++** bgt .L[0-9]+
++** b h
++** ...
++*/
++void g3_le(int x)
++{
++ if (__builtin_expect (x <= 0, 1))
++ h ();
++}
++
++/*
++** g5:
++** mov w[0-9]+, 65279
++** tst w[0-9]+, w[0-9]+
++** beq .L[0-9]+
++** b h
++** ...
++*/
++void g5(int x)
++{
++ if (__builtin_expect (x & 0xfeff, 1))
++ h ();
++}
+--
+2.33.0
+