From 637e6469f2225b6f6f6b0c84b4e7abcd8dfd7ca4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=C3=BCllner?= Date: Wed, 28 Sep 2022 11:19:06 +0200 Subject: [PATCH 127/157] [Backport][SME] riscv: Add support for strlen inline expansion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=df48285b2484eb4f8e0570c566677114eb0e553a Note: Only introduce the definitions of function emit_likely_jump_insn and emit_unlikely_jump_insn, and drop others. This patch implements the expansion of the strlen builtin for RV32/RV64 for xlen-aligned aligned strings if Zbb or XTheadBb instructions are available. The inserted sequences are: rv32gc_zbb (RV64 is similar): add a3,a0,4 li a4,-1 .L1: lw a5,0(a0) add a0,a0,4 orc.b a5,a5 beq a5,a4,.L1 not a5,a5 ctz a5,a5 srl a5,a5,0x3 add a0,a0,a5 sub a0,a0,a3 rv64gc_xtheadbb (RV32 is similar): add a4,a0,8 .L2: ld a5,0(a0) add a0,a0,8 th.tstnbz a5,a5 beqz a5,.L2 th.rev a5,a5 th.ff1 a5,a5 srl a5,a5,0x3 add a0,a0,a5 sub a0,a0,a4 This allows to inline calls to strlen(), with optimized code for xlen-aligned strings, resulting in the following benefits over a call to libc: * no call/ret instructions * no stack frame allocation * no register saving/restoring * no alignment test The inlining mechanism is gated by a new switch ('-minline-strlen') and by the variable 'optimize_size'. Tested using the glibc string tests. Signed-off-by: Christoph Müllner gcc/ChangeLog: * config.gcc: Add new object riscv-string.o. riscv-string.cc. * config/riscv/riscv-protos.h (riscv_expand_strlen): New function. * config/riscv/riscv.md (strlen): New expand INSN. * config/riscv/riscv.opt: New flag 'minline-strlen'. * config/riscv/t-riscv: Add new object riscv-string.o. * config/riscv/thead.md (th_rev2): Export INSN name. (th_rev2): Likewise. (th_tstnbz2): New INSN. * doc/invoke.texi: Document '-minline-strlen'. * emit-rtl.cc (emit_likely_jump_insn): New helper function. (emit_unlikely_jump_insn): Likewise. * rtl.h (emit_likely_jump_insn): New prototype. (emit_unlikely_jump_insn): Likewise. * config/riscv/riscv-string.cc: New file. gcc/testsuite/ChangeLog: * gcc.target/riscv/xtheadbb-strlen-unaligned.c: New test. * gcc.target/riscv/xtheadbb-strlen.c: New test. * gcc.target/riscv/zbb-strlen-disabled-2.c: New test. * gcc.target/riscv/zbb-strlen-disabled.c: New test. * gcc.target/riscv/zbb-strlen-unaligned.c: New test. * gcc.target/riscv/zbb-strlen.c: New test. --- gcc/emit-rtl.cc | 24 ++++++++++++++++++++++++ gcc/rtl.h | 2 ++ 2 files changed, 26 insertions(+) diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc index 1e02ae254..2df5ff414 100644 --- a/gcc/emit-rtl.cc +++ b/gcc/emit-rtl.cc @@ -5163,6 +5163,30 @@ emit_jump_insn (rtx x) return last; } +/* Make an insn of code JUMP_INSN with pattern X, + add a REG_BR_PROB note that indicates very likely probability, + and add it to the end of the doubly-linked list. */ + +rtx_insn * +emit_likely_jump_insn (rtx x) +{ + rtx_insn *jump = emit_jump_insn (x); + add_reg_br_prob_note (jump, profile_probability::very_likely ()); + return jump; +} + +/* Make an insn of code JUMP_INSN with pattern X, + add a REG_BR_PROB note that indicates very unlikely probability, + and add it to the end of the doubly-linked list. */ + +rtx_insn * +emit_unlikely_jump_insn (rtx x) +{ + rtx_insn *jump = emit_jump_insn (x); + add_reg_br_prob_note (jump, profile_probability::very_unlikely ()); + return jump; +} + /* Make an insn of code CALL_INSN with pattern X and add it to the end of the doubly-linked list. */ diff --git a/gcc/rtl.h b/gcc/rtl.h index 488016bb4..a0db225cb 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -3333,6 +3333,8 @@ extern rtx_note *emit_note_after (enum insn_note, rtx_insn *); extern rtx_insn *emit_insn (rtx); extern rtx_insn *emit_debug_insn (rtx); extern rtx_insn *emit_jump_insn (rtx); +extern rtx_insn *emit_likely_jump_insn (rtx); +extern rtx_insn *emit_unlikely_jump_insn (rtx); extern rtx_insn *emit_call_insn (rtx); extern rtx_code_label *emit_label (rtx); extern rtx_jump_table_data *emit_jump_table_data (rtx); -- 2.33.0