summaryrefslogtreecommitdiff
path: root/0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch
diff options
context:
space:
mode:
Diffstat (limited to '0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch')
-rw-r--r--0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch342
1 files changed, 342 insertions, 0 deletions
diff --git a/0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch b/0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch
new file mode 100644
index 0000000..6f99e5c
--- /dev/null
+++ b/0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch
@@ -0,0 +1,342 @@
+From 19ded9dad06b22b9b7aa9e3902e3e7a38a2256ab Mon Sep 17 00:00:00 2001
+From: dingguangya <dingguangya1@huawei.com>
+Date: Sat, 29 Jul 2023 18:27:10 +0800
+Subject: [PATCH 18/22] [ccmp] Add another optimization opportunity for ccmp
+ instruction
+
+Add flag -fccmp2.
+Enables the use of the ccmp instruction by creating a new conflict
+relationship for instances where temporary expressions replacement
+cannot be effectively created.
+---
+ gcc/ccmp.cc | 33 ++++
+ gcc/ccmp.h | 1 +
+ gcc/common.opt | 4 +
+ gcc/testsuite/gcc.target/aarch64/ccmp_3.c | 15 ++
+ gcc/tree-ssa-coalesce.cc | 197 ++++++++++++++++++++++
+ 5 files changed, 250 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
+
+diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
+index 3db0a264e..e34f3bcc6 100644
+--- a/gcc/ccmp.cc
++++ b/gcc/ccmp.cc
+@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see
+ #include "cfgexpand.h"
+ #include "ccmp.h"
+ #include "predict.h"
++#include "gimple-iterator.h"
+
+ /* Check whether T is a simple boolean variable or a SSA name
+ set by a comparison operator in the same basic block. */
+@@ -129,6 +130,38 @@ ccmp_candidate_p (gimple *g)
+ return false;
+ }
+
++/* Check whether bb is a potential conditional compare candidate. */
++bool
++check_ccmp_candidate (basic_block bb)
++{
++ gimple_stmt_iterator gsi;
++ gimple *bb_last_stmt, *stmt;
++ tree op0, op1;
++
++ gsi = gsi_last_bb (bb);
++ bb_last_stmt = gsi_stmt (gsi);
++
++ if (bb_last_stmt && gimple_code (bb_last_stmt) == GIMPLE_COND)
++ {
++ op0 = gimple_cond_lhs (bb_last_stmt);
++ op1 = gimple_cond_rhs (bb_last_stmt);
++
++ if (TREE_CODE (op0) == SSA_NAME
++ && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
++ && TREE_CODE (op1) == INTEGER_CST
++ && ((gimple_cond_code (bb_last_stmt) == NE_EXPR)
++ || (gimple_cond_code (bb_last_stmt) == EQ_EXPR)))
++ {
++ stmt = SSA_NAME_DEF_STMT (op0);
++ if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN)
++ {
++ return ccmp_candidate_p (stmt);
++ }
++ }
++ }
++ return false;
++}
++
+ /* Extract the comparison we want to do from the tree. */
+ void
+ get_compare_parts (tree t, int *up, rtx_code *rcode,
+diff --git a/gcc/ccmp.h b/gcc/ccmp.h
+index 1799d5fed..efe3a1c14 100644
+--- a/gcc/ccmp.h
++++ b/gcc/ccmp.h
+@@ -21,5 +21,6 @@ along with GCC; see the file COPYING3. If not see
+ #define GCC_CCMP_H
+
+ extern rtx expand_ccmp_expr (gimple *, machine_mode);
++extern bool check_ccmp_candidate (basic_block bb);
+
+ #endif /* GCC_CCMP_H */
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 4d91ce8cf..0aa516719 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -2017,6 +2017,10 @@ fira-verbose=
+ Common RejectNegative Joined UInteger Var(flag_ira_verbose) Init(5)
+ -fira-verbose=<number> Control IRA's level of diagnostic messages.
+
++fccmp2
++Common Var(flag_ccmp2) Init(0) Optimization
++Optimize potential ccmp instruction in complex scenarios.
++
+ fivopts
+ Common Var(flag_ivopts) Init(1) Optimization
+ Optimize induction variables on trees.
+diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
+new file mode 100644
+index 000000000..b509ba810
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
+@@ -0,0 +1,15 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-options "-O -fdump-rtl-expand-details -fccmp2" } */
++
++int func (int a, int b, int c)
++{
++ while(1)
++ {
++ if(a-- == 0 || b >= c)
++ {
++ return 1;
++ }
++ }
++}
++
++/* { dg-final { scan-assembler-times "\tccmp\t" 1} } */
+diff --git a/gcc/tree-ssa-coalesce.cc b/gcc/tree-ssa-coalesce.cc
+index dccf41ab8..195e06428 100644
+--- a/gcc/tree-ssa-coalesce.cc
++++ b/gcc/tree-ssa-coalesce.cc
+@@ -38,6 +38,9 @@ along with GCC; see the file COPYING3. If not see
+ #include "explow.h"
+ #include "tree-dfa.h"
+ #include "stor-layout.h"
++#include "ccmp.h"
++#include "target.h"
++#include "tree-outof-ssa.h"
+
+ /* This set of routines implements a coalesce_list. This is an object which
+ is used to track pairs of ssa_names which are desirable to coalesce
+@@ -854,6 +857,198 @@ live_track_clear_base_vars (live_track *ptr)
+ bitmap_clear (&ptr->live_base_var);
+ }
+
++/* Return true if gimple is a copy assignment. */
++
++static inline bool
++gimple_is_assign_copy_p (gimple *gs)
++{
++ return (is_gimple_assign (gs) && gimple_assign_copy_p (gs)
++ && TREE_CODE (gimple_assign_lhs (gs)) == SSA_NAME
++ && TREE_CODE (gimple_assign_rhs1 (gs)) == SSA_NAME);
++}
++
++#define MAX_CCMP_CONFLICT_NUM 5
++
++/* Clear high-cost conflict graphs. */
++
++static void
++remove_high_cost_graph_for_ccmp (ssa_conflicts *conflict_graph)
++{
++ unsigned x = 0;
++ int add_conflict_num = 0;
++ bitmap b;
++ FOR_EACH_VEC_ELT (conflict_graph->conflicts, x, b)
++ {
++ if (b)
++ {
++ add_conflict_num++;
++ }
++ }
++ if (add_conflict_num >= MAX_CCMP_CONFLICT_NUM)
++ {
++ conflict_graph->conflicts.release ();
++ }
++}
++
++/* Adding a new conflict graph to the original graph. */
++
++static void
++process_add_graph (live_track *live, basic_block bb,
++ ssa_conflicts *conflict_graph)
++{
++ tree use, def;
++ ssa_op_iter iter;
++ gimple *first_visit_stmt = NULL;
++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
++ gsi_next (&gsi))
++ {
++ if (gimple_visited_p (gsi_stmt (gsi)))
++ {
++ first_visit_stmt = gsi_stmt (gsi);
++ break;
++ }
++ }
++ if (!first_visit_stmt)
++ return;
++
++ for (gimple_stmt_iterator gsi = gsi_last_bb (bb);
++ gsi_stmt (gsi) != first_visit_stmt; gsi_prev (&gsi))
++ {
++ gimple *stmt = gsi_stmt (gsi);
++ if (gimple_visited_p (gsi_stmt (gsi)) && is_gimple_debug (stmt))
++ {
++ continue;
++ }
++ if (gimple_is_assign_copy_p (stmt))
++ {
++ live_track_clear_var (live, gimple_assign_rhs1 (stmt));
++ }
++ FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
++ {
++ live_track_process_def (live, def, conflict_graph);
++ }
++ FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
++ {
++ live_track_process_use (live, use);
++ }
++ }
++}
++
++/* Build a conflict graph based on ccmp candidate. */
++
++static void
++add_ccmp_conflict_graph (ssa_conflicts *conflict_graph,
++ tree_live_info_p liveinfo, var_map map, basic_block bb)
++{
++ live_track *live;
++ tree use, def;
++ ssa_op_iter iter;
++ live = new_live_track (map);
++ live_track_init (live, live_on_exit (liveinfo, bb));
++
++ gimple *last_stmt = gsi_stmt (gsi_last_bb (bb));
++ gcc_assert (gimple_cond_lhs (last_stmt));
++
++ auto_vec<tree> stack;
++ stack.safe_push (gimple_cond_lhs (last_stmt));
++ while (!stack.is_empty ())
++ {
++ tree op = stack.pop ();
++ gimple *op_stmt = SSA_NAME_DEF_STMT (op);
++ if (!op_stmt || gimple_bb (op_stmt) != bb
++ || !is_gimple_assign (op_stmt)
++ || !ssa_is_replaceable_p (op_stmt))
++ {
++ continue;
++ }
++ if (gimple_is_assign_copy_p (op_stmt))
++ {
++ live_track_clear_var (live, gimple_assign_rhs1 (op_stmt));
++ }
++ gimple_set_visited (op_stmt, true);
++ FOR_EACH_SSA_TREE_OPERAND (def, op_stmt, iter, SSA_OP_DEF)
++ {
++ live_track_process_def (live, def, conflict_graph);
++ }
++ FOR_EACH_SSA_TREE_OPERAND (use, op_stmt, iter, SSA_OP_USE)
++ {
++ stack.safe_push (use);
++ live_track_process_use (live, use);
++ }
++ }
++
++ process_add_graph (live, bb, conflict_graph);
++ delete_live_track (live);
++ remove_high_cost_graph_for_ccmp (conflict_graph);
++}
++
++/* Determine whether the ccmp conflict graph can be added.
++ i.e,
++
++ ;; basic block 3, loop depth 1
++ ;; pred: 2
++ ;; 3
++ # ivtmp.5_10 = PHI <ivtmp.5_12 (2), ivtmp.5_11 (3)>
++ _7 = b_4 (D) >= c_5 (D);
++ _8 = ivtmp.5_10 == 0;
++ _9 = _7 | _8;
++ ivtmp.5_11 = ivtmp.5_10 - 1;
++ if (_9 != 0)
++ goto <bb 4>; [10.70%]
++ else
++ goto <bb 3>; [89.30%]
++
++ In the above loop, the expression will be replaced:
++
++ _7 replaced by b_4 (D) >= c_5 (D)
++ _8 replaced by ivtmp.5_10 == 0
++
++ If the current case want use the ccmp instruction, then
++
++ _9 can replaced by _7 | _8
++
++ So this requires that ivtmp.5_11 and ivtmp.5_10 be divided into different
++ partitions.
++
++ Now this function can achieve this ability. */
++
++static void
++determine_add_ccmp_conflict_graph (basic_block bb, tree_live_info_p liveinfo,
++ var_map map, ssa_conflicts *graph)
++{
++ if (!flag_ccmp2 || !targetm.gen_ccmp_first || !check_ccmp_candidate (bb))
++ return;
++ for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
++ gsi_next (&bsi))
++ {
++ gimple_set_visited (gsi_stmt (bsi), false);
++ }
++ ssa_conflicts *ccmp_conflict_graph;
++ ccmp_conflict_graph = ssa_conflicts_new (num_var_partitions (map));
++ add_ccmp_conflict_graph (ccmp_conflict_graph, liveinfo, map, bb);
++ unsigned x;
++ bitmap b;
++ if (ccmp_conflict_graph)
++ {
++ FOR_EACH_VEC_ELT (ccmp_conflict_graph->conflicts, x, b)
++ {
++ if (!b)
++ continue;
++ unsigned y = bitmap_first_set_bit (b);
++ if (!graph->conflicts[x] || !bitmap_bit_p (graph->conflicts[x], y))
++ {
++ ssa_conflicts_add (graph, x, y);
++ if (dump_file && (dump_flags & TDF_DETAILS))
++ {
++ fprintf (dump_file, "potential ccmp: add additional "
++ "conflict-ssa : bb[%d] %d:%d\n",
++ bb->index, x, y);
++ }
++ }
++ }
++ }
++ ssa_conflicts_delete (ccmp_conflict_graph);
++}
+
+ /* Build a conflict graph based on LIVEINFO. Any partitions which are in the
+ partition view of the var_map liveinfo is based on get entries in the
+@@ -938,6 +1133,8 @@ build_ssa_conflict_graph (tree_live_info_p liveinfo)
+ live_track_process_use (live, var);
+ }
+
++ determine_add_ccmp_conflict_graph (bb, liveinfo, map, graph);
++
+ /* If result of a PHI is unused, looping over the statements will not
+ record any conflicts since the def was never live. Since the PHI node
+ is going to be translated out of SSA form, it will insert a copy.
+--
+2.33.0
+