diff options
Diffstat (limited to '0046-ArrayWidenCompare-Add-a-new-optimization-for-array-c.patch')
-rw-r--r-- | 0046-ArrayWidenCompare-Add-a-new-optimization-for-array-c.patch | 1982 |
1 files changed, 1982 insertions, 0 deletions
diff --git a/0046-ArrayWidenCompare-Add-a-new-optimization-for-array-c.patch b/0046-ArrayWidenCompare-Add-a-new-optimization-for-array-c.patch new file mode 100644 index 0000000..4a3da78 --- /dev/null +++ b/0046-ArrayWidenCompare-Add-a-new-optimization-for-array-c.patch @@ -0,0 +1,1982 @@ +From 8072fe107c04778de78db90bf6fdb7baf474e24a Mon Sep 17 00:00:00 2001 +From: dingguangya <dingguangya1@huawei.com> +Date: Thu, 2 Jun 2022 12:48:17 +0800 +Subject: [PATCH 12/12] [ArrayWidenCompare] Add a new optimization for array + comparison scenarios + +Add option farray-widen-compare. +For an array pointer whose element is a single-byte type, +by changing the pointer type to a long-byte type, the elements +can be combined and compared after loading. +--- + gcc/Makefile.in | 1 + + gcc/common.opt | 5 + + gcc/doc/invoke.texi | 13 +- + gcc/passes.def | 1 + + .../gcc.dg/tree-ssa/awiden-compare-1.c | 19 + + .../gcc.dg/tree-ssa/awiden-compare-2.c | 90 + + .../gcc.dg/tree-ssa/awiden-compare-3.c | 22 + + .../gcc.dg/tree-ssa/awiden-compare-4.c | 22 + + .../gcc.dg/tree-ssa/awiden-compare-5.c | 19 + + .../gcc.dg/tree-ssa/awiden-compare-6.c | 19 + + .../gcc.dg/tree-ssa/awiden-compare-7.c | 22 + + .../gcc.dg/tree-ssa/awiden-compare-8.c | 24 + + gcc/timevar.def | 1 + + gcc/tree-pass.h | 1 + + gcc/tree-ssa-loop-array-widen-compare.c | 1555 +++++++++++++++++ + 15 files changed, 1813 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c + create mode 100644 gcc/tree-ssa-loop-array-widen-compare.c + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 23394c64b..2b2bf474a 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -1591,6 +1591,7 @@ OBJS = \ + tree-ssa-loop-ivopts.o \ + tree-ssa-loop-manip.o \ + tree-ssa-loop-niter.o \ ++ tree-ssa-loop-array-widen-compare.o \ + tree-ssa-loop-prefetch.o \ + tree-ssa-loop-split.o \ + tree-ssa-loop-unswitch.o \ +diff --git a/gcc/common.opt b/gcc/common.opt +index 24834cf60..2985a5791 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1060,6 +1060,11 @@ fasynchronous-unwind-tables + Common Report Var(flag_asynchronous_unwind_tables) Optimization + Generate unwind tables that are exact at each instruction boundary. + ++farray-widen-compare ++Common Report Var(flag_array_widen_compare) Optimization ++Extends types for pointers to arrays to improve array comparsion performance. ++In some extreme situations this may result in unsafe behavior. ++ + fauto-inc-dec + Common Report Var(flag_auto_inc_dec) Init(1) Optimization + Generate auto-inc/dec instructions. +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 4b0fd2ffb..44f1f8a2e 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -459,7 +459,7 @@ Objective-C and Objective-C++ Dialects}. + -falign-loops[=@var{n}[:@var{m}:[@var{n2}[:@var{m2}]]]] @gol + -fno-allocation-dce -fallow-store-data-races @gol + -fassociative-math -fauto-profile -fauto-profile[=@var{path}] @gol +--fauto-inc-dec -fbranch-probabilities @gol ++-farray-widen-compare -fauto-inc-dec -fbranch-probabilities @gol + -fcaller-saves @gol + -fcombine-stack-adjustments -fconserve-stack @gol + -fcompare-elim -fcprop-registers -fcrossjumping @gol +@@ -9710,6 +9710,17 @@ This pass is always skipped on architectures that do not have + instructions to support this. Enabled by default at @option{-O} and + higher on architectures that support this. + ++@item -farray-widen-compare ++@opindex farray-widen-compare ++In the narrow-byte array comparison scenario, the types of pointers ++pointing to array are extended so that elements of multiple bytes can ++be loaded at a time when a wide type is used to dereference an array, ++thereby improving the performance of this comparison scenario. In some ++extreme situations this may result in unsafe behavior. ++ ++This option may generate better or worse code; results are highly dependent ++on the structure of loops within the source code. ++ + @item -fdce + @opindex fdce + Perform dead code elimination (DCE) on RTL@. +diff --git a/gcc/passes.def b/gcc/passes.def +index e9c91d26e..797b803ca 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -91,6 +91,7 @@ along with GCC; see the file COPYING3. If not see + NEXT_PASS (pass_dse); + NEXT_PASS (pass_cd_dce); + NEXT_PASS (pass_phiopt, true /* early_p */); ++ NEXT_PASS (pass_array_widen_compare); + NEXT_PASS (pass_tail_recursion); + NEXT_PASS (pass_convert_switch); + NEXT_PASS (pass_cleanup_eh); +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c +new file mode 100644 +index 000000000..27b69b0e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (++len != len_limit) ++ if (pb[len] != cur[len]) ++ break; ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c +new file mode 100644 +index 000000000..d102364f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c +@@ -0,0 +1,90 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define EMPTY_HASH_VALUE 0 ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++#define true 1 ++ ++typedef struct { ++ uint32_t len; ++ uint32_t dist; ++} lzma_match; ++ ++ ++lzma_match * ++func ( ++ const uint32_t len_limit, ++ const uint32_t pos, ++ const uint8_t *const cur, ++ uint32_t cur_match, ++ uint32_t depth, ++ uint32_t *const son, ++ const uint32_t cyclic_pos, ++ const uint32_t cyclic_size, ++ lzma_match *matches, ++ uint32_t len_best) ++{ ++ uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; ++ uint32_t *ptr1 = son + (cyclic_pos << 1); ++ ++ uint32_t len0 = 0; ++ uint32_t len1 = 0; ++ ++ while (true) ++ { ++ const uint32_t delta = pos - cur_match; ++ if (depth-- == 0 || delta >= cyclic_size) ++ { ++ *ptr0 = EMPTY_HASH_VALUE; ++ *ptr1 = EMPTY_HASH_VALUE; ++ return matches; ++ } ++ ++ uint32_t *const pair = son + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0)) << 1); ++ ++ const uint8_t *const pb = cur -delta; ++ uint32_t len = my_min(len0, len1); ++ ++ if (pb[len] == cur[len]) ++ { ++ while (++len != len_limit) ++ if (pb[len] != cur[len]) ++ break; ++ ++ if (len_best < len) ++ { ++ len_best = len; ++ matches->len = len; ++ matches->dist = delta - 1; ++ ++matches; ++ ++ if (len == len_limit) ++ { ++ *ptr1 = pair[0]; ++ *ptr0 = pair[1]; ++ return matches; ++ } ++ } ++ } ++ ++ if (pb[len] < cur[len]) ++ { ++ *ptr1 = cur_match; ++ ptr1 = pair + 1; ++ cur_match = *ptr1; ++ len1 = len; ++ } ++ else ++ { ++ *ptr0 = cur_match; ++ ptr0 = pair; ++ cur_match = *ptr0; ++ len0 = len; ++ } ++ } ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c +new file mode 100644 +index 000000000..52dd6b02b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len != len_limit) ++ { ++ if (pb[len] != cur[len]) ++ break; ++ len = len + 1; ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c +new file mode 100644 +index 000000000..d3185d326 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len != len_limit) ++ { ++ if (pb[len] != cur[len]) ++ break; ++ len = len + 2; ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c +new file mode 100644 +index 000000000..9743dc623 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (++len != len_limit) ++ if (pb[len] != cur[len-1]) ++ break; ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c +new file mode 100644 +index 000000000..2323d5bf7 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len++ != len_limit) ++ if (pb[len] != cur[len]) ++ break; ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c +new file mode 100644 +index 000000000..33db62fa4 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len != len_limit) ++ { ++ len = len + 1; ++ if (pb[len] != cur[len]) ++ break; ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c +new file mode 100644 +index 000000000..8c96d24a1 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include <stdint.h> ++#include <stdio.h> ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (++len != len_limit) ++ { ++ if (pb[len] != cur[len]) ++ { ++ len = len - 1; ++ break; ++ } ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/timevar.def b/gcc/timevar.def +index e873747a8..6d90bb6e1 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -215,6 +215,7 @@ DEFTIMEVAR (TV_TREE_NRV , "tree NRV optimization") + DEFTIMEVAR (TV_TREE_COPY_RENAME , "tree rename SSA copies") + DEFTIMEVAR (TV_TREE_SSA_VERIFY , "tree SSA verifier") + DEFTIMEVAR (TV_TREE_STMT_VERIFY , "tree STMT verifier") ++DEFTIMEVAR (TV_TREE_ARRAY_WIDEN_COMPARE, "tree array widen compare") + DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch conversion") + DEFTIMEVAR (TV_TREE_SWITCH_LOWERING, "tree switch lowering") + DEFTIMEVAR (TV_TREE_RECIP , "gimple CSE reciprocals") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index be6387768..aca0b83f2 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -436,6 +436,7 @@ extern gimple_opt_pass *make_pass_cselim (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_phiopt (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_forwprop (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_phiprop (gcc::context *ctxt); ++extern gimple_opt_pass *make_pass_array_widen_compare (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_tree_ifcombine (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_dse (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_nrv (gcc::context *ctxt); +diff --git a/gcc/tree-ssa-loop-array-widen-compare.c b/gcc/tree-ssa-loop-array-widen-compare.c +new file mode 100644 +index 000000000..ba51d785d +--- /dev/null ++++ b/gcc/tree-ssa-loop-array-widen-compare.c +@@ -0,0 +1,1555 @@ ++/* Array widen compare. ++ Copyright (C) 2022-2022 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by the ++Free Software Foundation; either version 3, or (at your option) any ++later version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "target.h" ++#include "tree.h" ++#include "gimple.h" ++#include "tree-pass.h" ++#include "gimple-ssa.h" ++#include "tree-pretty-print.h" ++#include "fold-const.h" ++#include "gimplify.h" ++#include "gimple-iterator.h" ++#include "tree-ssa-loop-manip.h" ++#include "tree-ssa-loop.h" ++#include "ssa.h" ++#include "tree-into-ssa.h" ++#include "cfganal.h" ++#include "cfgloop.h" ++#include "gimple-pretty-print.h" ++#include "tree-cfg.h" ++#include "cgraph.h" ++#include "print-tree.h" ++#include "cfghooks.h" ++#include "gimple-fold.h" ++ ++/* This pass handles scenarios similar to the following: ++ ++ uint32_t ++ func (uint32_t len0, uint32_t len1, const uint32_t len_limit, ++ const uint8_t *const pb, const uint8_t *const cur) ++ { ++ uint32_t len = my_min (len0, len1); ++ while (++len != len_limit) ++ if (pb[len] != cur[len]) ++ break; ++ return len; ++ } ++ ++ Features of this type of loop: ++ 1) the loop has two exits; ++ 2) One of the exits comes from the comparison result of the array; ++ ++ From the source code point of view, the pass completes the conversion of the ++ above scenario into: ++ ++ uint32_t ++ func (uint32_t len0, uint32_t len1, const uint32_t len_limit, ++ const uint8_t *const pb, const uint8_t *const cur) ++ { ++ uint32_t len = my_min (len0, len1); ++ // align_loop ++ for(++len; len + sizeof(uint64_t) <= len_limit; len += sizeof (uint64_t)) ++ { ++ uint64_t a = *((uint64_t*)(cur+len)); ++ uint64_t b = *((uint64_t*)(pb+len)); ++ if (a != b) ++ { ++ int lz = __builtin_ctzll (a ^ b); ++ len += lz / 8; ++ return len; ++ } ++ } ++ // epilogue_loop ++ for (;len != len_limit; ++len) ++ if (pb[len] != cur[len]) ++ break; ++ return len; ++ } ++ ++ This pass is to complete the conversion of such scenarios from the internal ++ perspective of the compiler: ++ 1) determine_loop_form: The function completes the screening of such ++ scenarios; ++ 2) convert_to_new_loop: The function completes the conversion of ++ origin_loop to new loops, and removes origin_loop; ++ 3) origin_loop_info: The structure is used to record important information ++ of origin_loop: such as loop exit, growth step size ++ of loop induction variable, initial value ++ of induction variable, etc; ++ 4) create_new_loops: The function is used as the key content of the pass ++ to complete the creation of new loops. */ ++ ++/* The useful information of origin loop. */ ++ ++struct origin_loop_info ++{ ++ tree base; /* The initial index of the array in the old loop. */ ++ tree limit; /* The limit index of the array in the old loop. */ ++ tree arr1; /* Array 1 in the old loop. */ ++ tree arr2; /* Array 2 in the old loop. */ ++ edge entry_edge; /* The edge into the old loop. */ ++ basic_block exit_bb1; ++ basic_block exit_bb2; ++ edge exit_e1; ++ edge exit_e2; ++ gimple *cond_stmt1; ++ gimple *cond_stmt2; ++ gimple *update_stmt; ++ bool exist_prolog_assgin; ++ /* Whether the marker has an initial value assigned ++ to the array index. */ ++ unsigned HOST_WIDE_INT step; ++ /* The growth step of the loop induction variable. */ ++}; ++ ++typedef struct origin_loop_info origin_loop_info; ++ ++static origin_loop_info origin_loop; ++hash_map <basic_block, tree> defs_map; ++ ++/* Dump the bb information in a loop. */ ++ ++static void ++dump_loop_bb (struct loop *loop) ++{ ++ basic_block *body = get_loop_body_in_dom_order (loop); ++ basic_block bb = NULL; ++ ++ for (unsigned i = 0; i < loop->num_nodes; i++) ++ { ++ bb = body[i]; ++ if (bb->loop_father != loop) ++ { ++ continue; ++ } ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "===== the %dth bb of loop ==========:\n", i); ++ gimple_dump_bb (dump_file, bb, 0, dump_flags); ++ fprintf (dump_file, "\n"); ++ } ++ } ++ free (body); ++} ++ ++/* Return true if the loop has precisely one backedge. */ ++ ++static bool ++loop_single_backedge_p (class loop *loop) ++{ ++ basic_block latch = loop->latch; ++ if (!single_succ_p (latch)) ++ return false; ++ ++ edge e = single_succ_edge (latch); ++ edge backedge = find_edge (latch, loop->header); ++ ++ if (e != backedge) ++ return false; ++ ++ return true; ++} ++ ++/* Return true if the loop has precisely one preheader BB. */ ++ ++static bool ++loop_single_preheader_bb (class loop *loop) ++{ ++ basic_block header = loop->header; ++ if (EDGE_COUNT (header->preds) != 2) ++ return false; ++ ++ edge e1 = EDGE_PRED (header, 0); ++ edge e2 = EDGE_PRED (header, 1); ++ ++ if ((e1->src == loop->latch && e2->src->loop_father != loop) ++ || (e2->src == loop->latch && e1->src->loop_father != loop)) ++ return true; ++ ++ return false; ++} ++ ++/* Initialize the origin_loop structure. */ ++static void ++init_origin_loop_structure () ++{ ++ origin_loop.base = NULL; ++ origin_loop.limit = NULL; ++ origin_loop.arr1 = NULL; ++ origin_loop.arr2 = NULL; ++ origin_loop.exit_e1 = NULL; ++ origin_loop.exit_e2 = NULL; ++ origin_loop.exit_bb1 = NULL; ++ origin_loop.exit_bb2 =NULL; ++ origin_loop.entry_edge = NULL; ++ origin_loop.cond_stmt1 = NULL; ++ origin_loop.cond_stmt2 = NULL; ++ origin_loop.update_stmt = NULL; ++ origin_loop.exist_prolog_assgin = false; ++ origin_loop.step = 0; ++} ++ ++/* Get the edge that first entered the loop. */ ++ ++static edge ++get_loop_preheader_edge (class loop *loop) ++{ ++ edge e; ++ edge_iterator ei; ++ ++ FOR_EACH_EDGE (e, ei, loop->header->preds) ++ if (e->src != loop->latch) ++ break; ++ ++ if (!e) ++ { ++ gcc_assert (!loop_outer (loop)); ++ return single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); ++ } ++ ++ return e; ++} ++ ++/* Make sure the exit condition stmt satisfies a specific form. */ ++ ++static bool ++check_cond_stmt (gimple *stmt) ++{ ++ if (!stmt) ++ return false; ++ if (gimple_code (stmt) != GIMPLE_COND) ++ return false; ++ ++ if (gimple_cond_code (stmt) != NE_EXPR && gimple_cond_code (stmt) != EQ_EXPR) ++ return false; ++ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ ++ /* The parameter that does not support the cond statement is not SSA_NAME. ++ eg: if (len_1 != 100). */ ++ if (TREE_CODE (lhs) != SSA_NAME || TREE_CODE (rhs) != SSA_NAME) ++ return false; ++ ++ return true; ++} ++ ++/* Record the exit information in the original loop including exit edge, ++ exit bb block, exit condition stmt, ++ eg: exit_eX origin_exit_bbX cond_stmtX. */ ++ ++static bool ++record_origin_loop_exit_info (class loop *loop) ++{ ++ bool found = false; ++ edge e = NULL; ++ unsigned i = 0; ++ gimple *stmt; ++ ++ if (origin_loop.exit_e1 != NULL || origin_loop.exit_bb1 != NULL ++ || origin_loop.exit_e2 != NULL || origin_loop.exit_bb2 != NULL ++ || origin_loop.cond_stmt1 != NULL || origin_loop.cond_stmt2 != NULL) ++ return false; ++ ++ vec<edge> exit_edges = get_loop_exit_edges (loop); ++ if (exit_edges == vNULL) ++ return false; ++ ++ if (exit_edges.length () != 2) ++ goto fail; ++ ++ FOR_EACH_VEC_ELT (exit_edges, i, e) ++ { ++ if (e->src == loop->header) ++ { ++ origin_loop.exit_e1 = e; ++ origin_loop.exit_bb1 = e->dest; ++ stmt = gsi_stmt (gsi_last_bb (e->src)); ++ if (check_cond_stmt (stmt)) ++ origin_loop.cond_stmt1 = stmt; ++ } ++ else ++ { ++ origin_loop.exit_e2 = e; ++ origin_loop.exit_bb2 = e->dest; ++ stmt = gsi_stmt (gsi_last_bb (e->src)); ++ if (check_cond_stmt (stmt)) ++ origin_loop.cond_stmt2 = stmt; ++ } ++ } ++ ++ if (origin_loop.exit_e1 != NULL && origin_loop.exit_bb1 != NULL ++ && origin_loop.exit_e2 != NULL && origin_loop.exit_bb2 != NULL ++ && origin_loop.cond_stmt1 != NULL && origin_loop.cond_stmt2 != NULL) ++ found = true; ++ ++fail: ++ exit_edges.release (); ++ return found; ++} ++ ++/* Returns true if t is SSA_NAME and user variable exists. */ ++ ++static bool ++ssa_name_var_p (tree t) ++{ ++ if (!t || TREE_CODE (t) != SSA_NAME) ++ return false; ++ if (SSA_NAME_VAR (t)) ++ return true; ++ return false; ++} ++ ++/* Returns true if t1 and t2 are SSA_NAME and belong to the same variable. */ ++ ++static bool ++same_ssa_name_var_p (tree t1, tree t2) ++{ ++ if (!ssa_name_var_p (t1) || !ssa_name_var_p (t2)) ++ return false; ++ if (SSA_NAME_VAR (t1) == SSA_NAME_VAR (t2)) ++ return true; ++ return false; ++} ++ ++/* Get origin loop induction variable upper bound. */ ++ ++static bool ++get_iv_upper_bound (gimple *stmt) ++{ ++ if (origin_loop.limit != NULL) ++ return false; ++ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ ++ if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE ++ || TREE_CODE (TREE_TYPE (rhs)) != INTEGER_TYPE) ++ return false; ++ ++ gimple *g = SSA_NAME_DEF_STMT (rhs); ++ ++ /* TODO: Currently, the input restrictions on lhs and rhs are implemented ++ through PARM_DECL. We may consider releasing the restrictions later, and ++ we need to consider the overall adaptation scenario and adding test ++ cases. */ ++ if (ssa_name_var_p (rhs) && TREE_CODE (SSA_NAME_VAR (rhs)) == PARM_DECL ++ && g && gimple_code (g) == GIMPLE_NOP ++ && (ssa_name_var_p (lhs) && TREE_CODE (SSA_NAME_VAR (lhs)) != PARM_DECL)) ++ { ++ origin_loop.limit = rhs; ++ } ++ else ++ return false; ++ ++ if (origin_loop.limit != NULL) ++ return true; ++ ++ return false; ++} ++ ++/* Returns true only when the expression on the rhs code of stmt is PLUS_EXPR, ++ rhs1 is SSA_NAME with the same var as origin_loop base, and rhs2 is ++ INTEGER_CST. */ ++ ++static bool ++check_update_stmt (gimple *stmt) ++{ ++ if (!stmt) ++ return false; ++ ++ if (gimple_assign_rhs_code (stmt) == PLUS_EXPR) ++ { ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == INTEGER_CST ++ && same_ssa_name_var_p (rhs1, origin_loop.base)) ++ { ++ origin_loop.step = tree_to_uhwi (rhs2); ++ if (origin_loop.step == 1) ++ return true; ++ } ++ } ++ return false; ++} ++ ++/* Get origin loop induction variable initial value. */ ++ ++static bool ++get_iv_base (gimple *stmt) ++{ ++ tree lhs = gimple_cond_lhs (stmt); ++ if (origin_loop.base != NULL || origin_loop.update_stmt != NULL) ++ return false; ++ ++ basic_block header = gimple_bb (stmt); ++ ++ gphi_iterator gsi; ++ edge e; ++ edge_iterator ei; ++ tree iv_after; ++ ++ for (gsi = gsi_start_phis (header); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gphi *phi = gsi.phi (); ++ tree res = gimple_phi_result (phi); ++ if (!same_ssa_name_var_p (res, lhs)) ++ continue; ++ tree base = PHI_ARG_DEF_FROM_EDGE (phi, origin_loop.entry_edge); ++ if (!same_ssa_name_var_p (base, lhs)) ++ return false; ++ origin_loop.base = base; ++ FOR_EACH_EDGE (e, ei, header->preds) ++ { ++ if (e != origin_loop.entry_edge) ++ { ++ iv_after = PHI_ARG_DEF_FROM_EDGE (phi, e); ++ gimple *update = SSA_NAME_DEF_STMT (iv_after); ++ if (!check_update_stmt (update)) ++ return false; ++ origin_loop.update_stmt = update; ++ if (gimple_bb (update) == header && iv_after == lhs) ++ origin_loop.exist_prolog_assgin = true; ++ } ++ } ++ } ++ ++ if (origin_loop.base != NULL && origin_loop.update_stmt != NULL) ++ return true; ++ ++ return false; ++} ++ ++/* Record the upper bound and initial value of the induction variable in the ++ original loop; When prolog_assign is present, make sure loop header is in ++ simple form; And the interpretation of prolog_assign is as follows: ++ eg: while (++len != limit) ++ ...... ++ For such a loop, ++len will be processed before entering header_bb, and the ++ assign is regarded as the prolog_assign of the loop. */ ++ ++static bool ++record_origin_loop_header (class loop *loop) ++{ ++ basic_block header = loop->header; ++ ++ if (origin_loop.entry_edge != NULL || origin_loop.base != NULL ++ || origin_loop.update_stmt != NULL || origin_loop.limit != NULL) ++ return false; ++ origin_loop.entry_edge = get_loop_preheader_edge (loop); ++ ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ ++ for (gsi = gsi_last_bb (header); !gsi_end_p (gsi); gsi_prev (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt && is_gimple_debug (stmt)) ++ continue; ++ if (stmt && gimple_code (stmt) == GIMPLE_COND) ++ { ++ if (!get_iv_upper_bound (stmt)) ++ return false; ++ if (!get_iv_base (stmt)) ++ return false; ++ } ++ else if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN) ++ { ++ if (stmt != origin_loop.update_stmt || !origin_loop.exist_prolog_assgin) ++ return false; ++ } ++ else ++ return false; ++ } ++ ++ if (origin_loop.entry_edge != NULL && origin_loop.base != NULL ++ && origin_loop.update_stmt != NULL && origin_loop.limit != NULL) ++ return true; ++ ++ return false; ++} ++ ++/* When prolog_assign does not exist, make sure that update_stmt exists in the ++ loop latch, and its form is a specific form, eg: ++ len_2 = len_1 + 1. */ ++ ++static bool ++record_origin_loop_latch (class loop *loop) ++{ ++ basic_block latch = loop->latch; ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ ++ gsi = gsi_start_bb (latch); ++ ++ if (origin_loop.exist_prolog_assgin) ++ { ++ if (gsi_end_p (gsi)) ++ return true; ++ } ++ else ++ { ++ if (gsi_one_before_end_p (gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt == origin_loop.update_stmt) ++ return true; ++ } ++ } ++ return false; ++} ++ ++/* Returns true when the DEF_STMT corresponding to arg0 of the mem_ref tree ++ satisfies the POINTER_PLUS_EXPR type. */ ++ ++static bool ++check_body_mem_ref (tree mem_ref) ++{ ++ tree arg0 = TREE_OPERAND (mem_ref , 0); ++ tree arg1 = TREE_OPERAND (mem_ref , 1); ++ ++ if (TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE ++ && TREE_CODE (arg1) == INTEGER_CST ++ && tree_to_uhwi (arg1) == 0) ++ { ++ gimple *tmp_g = SSA_NAME_DEF_STMT (arg0); ++ if (tmp_g && gimple_assign_rhs_code (tmp_g) == POINTER_PLUS_EXPR) ++ return true; ++ } ++ return false; ++} ++ ++/* Returns true if the rh2 of the current stmt comes from the base in the ++ original loop. */ ++ ++static bool ++check_body_pointer_plus (gimple *stmt, tree &tmp_index) ++{ ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ if (TREE_CODE (TREE_TYPE (rhs1)) == POINTER_TYPE) ++ { ++ gimple *g = SSA_NAME_DEF_STMT (rhs2); ++ if (g && gimple_assign_rhs_code (g) == NOP_EXPR) ++ { ++ tree nop_rhs = gimple_assign_rhs1 (g); ++ if (same_ssa_name_var_p (nop_rhs, origin_loop.base)) ++ { ++ if (!origin_loop.arr1) ++ { ++ origin_loop.arr1 = rhs1; ++ tmp_index = rhs2; ++ } ++ else if (!origin_loop.arr2) ++ { ++ origin_loop.arr2 = rhs1; ++ if (tmp_index != rhs2) ++ return false; ++ } ++ else ++ return false; ++ return true; ++ } ++ } ++ } ++ return false; ++} ++ ++/* Record the array comparison information in the original loop, while ensuring ++ that there are only statements related to cont_stmt in the loop body. */ ++ ++static bool ++record_origin_loop_body (class loop *loop) ++{ ++ basic_block body = gimple_bb (origin_loop.cond_stmt2); ++ ++ if (origin_loop.arr1 != NULL || origin_loop.arr2 != NULL) ++ return false; ++ ++ gimple_stmt_iterator gsi; ++ for (gsi = gsi_start_bb (body); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gimple_set_visited (gsi_stmt (gsi), false); ++ } ++ ++ tree cond_lhs = gimple_cond_lhs (origin_loop.cond_stmt2); ++ tree cond_rhs = gimple_cond_rhs (origin_loop.cond_stmt2); ++ if (TREE_CODE (TREE_TYPE (cond_lhs)) != INTEGER_TYPE ++ || TREE_CODE (TREE_TYPE (cond_rhs)) != INTEGER_TYPE) ++ return false; ++ ++ auto_vec<tree> stack; ++ tree tmp_index = NULL; ++ stack.safe_push (cond_lhs); ++ stack.safe_push (cond_rhs); ++ gimple_set_visited (origin_loop.cond_stmt2, true); ++ ++ while (!stack.is_empty ()) ++ { ++ tree op = stack.pop (); ++ gimple *g = SSA_NAME_DEF_STMT (op); ++ if (!g || gimple_bb (g) != body || !is_gimple_assign (g)) ++ continue; ++ gimple_set_visited (g, true); ++ if (gimple_assign_rhs_code (g) == MEM_REF) ++ { ++ tree mem_ref = gimple_assign_rhs1 (g); ++ if (!check_body_mem_ref (mem_ref)) ++ return false; ++ stack.safe_push (TREE_OPERAND (mem_ref , 0)); ++ } ++ else if (gimple_assign_rhs_code (g) == POINTER_PLUS_EXPR) ++ { ++ tree rhs2 = gimple_assign_rhs2 (g); ++ if (!check_body_pointer_plus (g, tmp_index)) ++ return false; ++ stack.safe_push (rhs2); ++ } ++ else if (gimple_assign_rhs_code (g) == NOP_EXPR) ++ { ++ tree rhs = gimple_assign_rhs1 (g); ++ if (!same_ssa_name_var_p (rhs, origin_loop.base)) ++ return false; ++ stack.safe_push (rhs); ++ } ++ else ++ return false; ++ } ++ bool allvisited = true; ++ for (gsi = gsi_start_bb (body); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ if (!gimple_visited_p (gsi_stmt (gsi)) ++ && !is_gimple_debug (gsi_stmt (gsi))) ++ allvisited = false; ++ } ++ if (allvisited) ++ { ++ if (origin_loop.arr1 != NULL && origin_loop.arr2 != NULL) ++ return true; ++ } ++ return false; ++} ++ ++/* Dump the original loop information to see if the origin loop ++ form matches. */ ++ ++static void ++dump_origin_loop_info () ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nThe origin loop info:\n"); ++ fprintf (dump_file, "\n the origin_loop.limit is:\n"); ++ print_node (dump_file, "", origin_loop.limit, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.base is:\n"); ++ print_node (dump_file, "", origin_loop.base, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.arr1 is:\n"); ++ print_node (dump_file, "", origin_loop.arr1, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.arr2 is:\n"); ++ print_node (dump_file, "", origin_loop.arr2, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.cond_stmt1 is:\n"); ++ print_gimple_stmt (dump_file, origin_loop.cond_stmt1, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.cond_stmt2 is:\n"); ++ print_gimple_stmt (dump_file, origin_loop.cond_stmt2, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.update_stmt is:\n"); ++ print_gimple_stmt (dump_file, origin_loop.update_stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++} ++ ++/* Returns true only if the exit bb of the original loop is unique and its phi ++ node parameter comes from the same variable. */ ++ ++static bool ++check_exit_bb (class loop *loop) ++{ ++ if (origin_loop.exit_bb1 != origin_loop.exit_bb2 ++ || flow_bb_inside_loop_p (loop, origin_loop.exit_bb1)) ++ return false; ++ ++ gphi_iterator gsi; ++ for (gsi = gsi_start_phis (origin_loop.exit_bb1); !gsi_end_p (gsi); ++ gsi_next (&gsi)) ++ { ++ gphi *phi = gsi.phi (); ++ tree res = gimple_phi_result (phi); ++ if (!same_ssa_name_var_p (res, origin_loop.base)) ++ continue; ++ if (gimple_phi_num_args (phi) == 2) ++ { ++ tree arg0 = gimple_phi_arg_def (phi, 0); ++ tree arg1 = gimple_phi_arg_def (phi, 1); ++ if (arg0 == arg1) ++ return true; ++ } ++ } ++ return false; ++} ++ ++/* Make sure that the recorded origin_loop information meets the ++ relative requirements. */ ++ ++static bool ++check_origin_loop_info (class loop *loop) ++{ ++ dump_origin_loop_info (); ++ tree arr1_elem_size, arr2_elem_size; ++ ++ if (!check_exit_bb (loop)) ++ return false; ++ ++ if (TREE_CODE (origin_loop.base) != SSA_NAME) ++ return false; ++ ++ if (!TYPE_READONLY (TREE_TYPE (origin_loop.limit))) ++ return false; ++ ++ if (!TYPE_READONLY (TREE_TYPE (TREE_TYPE (origin_loop.arr1)))) ++ return false; ++ ++ if (!TYPE_READONLY (TREE_TYPE (TREE_TYPE (origin_loop.arr2)))) ++ return false; ++ ++ if (TREE_CODE (TREE_TYPE (origin_loop.arr1)) != POINTER_TYPE ++ || TREE_CODE (TREE_TYPE (origin_loop.arr2)) != POINTER_TYPE ++ || TREE_CODE (TREE_TYPE (TREE_TYPE (origin_loop.arr1))) != INTEGER_TYPE ++ || TREE_CODE (TREE_TYPE (TREE_TYPE (origin_loop.arr2))) != INTEGER_TYPE) ++ return false; ++ ++ arr1_elem_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (origin_loop.arr1))); ++ arr2_elem_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (origin_loop.arr2))); ++ ++ if (tree_to_uhwi (arr1_elem_size) != 8 || tree_to_uhwi (arr2_elem_size) != 8) ++ return false; ++ ++ return true; ++} ++ ++/* Record the useful information of the original loop and judge whether the ++ information meets the specified conditions. */ ++ ++static bool ++check_record_loop_form (class loop *loop) ++{ ++ if (!record_origin_loop_exit_info (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop exit information.\n"); ++ } ++ return false; ++ } ++ ++ if (!record_origin_loop_header (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop header information.\n"); ++ } ++ return false; ++ } ++ ++ if (!record_origin_loop_latch (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop latch information.\n"); ++ } ++ return false; ++ } ++ ++ if (!record_origin_loop_body (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop body information.\n"); ++ } ++ return false; ++ } ++ ++ if (!check_origin_loop_info (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to check origin loop information.\n"); ++ } ++ return false; ++ } ++ ++ return true; ++} ++ ++/* The main entry for judging whether the loop meets some conditions. */ ++ ++static bool ++determine_loop_form (class loop *loop) ++{ ++ /* Currently only standard loops are processed, that is, only loop_header, ++ loop_latch, loop_body 3 bb blocks are included. */ ++ if (loop->inner || loop->num_nodes != 3) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, there is inner loop or" ++ "redundant bb.\n"); ++ } ++ return false; ++ } ++ ++ if (single_exit (loop) || !loop->latch) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, only one exit or loop_latch" ++ "does not exist.\n"); ++ } ++ return false; ++ } ++ ++ /* Support loop with only one backedge. */ ++ if (!loop_single_backedge_p (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, loop back edges are not" ++ "unique.\n"); ++ } ++ return false; ++ } ++ ++ /* Support loop with only one preheader BB. */ ++ if (!loop_single_preheader_bb (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, loop preheader bb are not" ++ "unique.\n"); ++ } ++ return false; ++ } ++ ++ init_origin_loop_structure (); ++ if (!check_record_loop_form (loop)) ++ return false; ++ ++ return true; ++} ++ ++/* Create prolog bb for newly constructed loop; When prolog_assign exists in ++ the original loop, the corresponding assign needs to be added to prolog_bb; ++ eg: <bb 7> ++ len_16 = len_10 + 1 ++ Create simple copy statement when prolog_assign does not exist; ++ eg: <bb 7> ++ len_16 = len_10 ++ ++ The IR of bb is as above. */ ++ ++static void ++create_prolog_bb (basic_block &prolog_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer, edge entry_edge) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ tree lhs1; ++ ++ prolog_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (prolog_bb, outer); ++ redirect_edge_and_branch (entry_edge, prolog_bb); ++ set_immediate_dominator (CDI_DOMINATORS, prolog_bb, dominator_bb); ++ gsi = gsi_last_bb (prolog_bb); ++ lhs1 = copy_ssa_name (origin_loop.base); ++ ++ if (origin_loop.exist_prolog_assgin) ++ g = gimple_build_assign (lhs1, PLUS_EXPR, origin_loop.base, ++ build_int_cst (TREE_TYPE (origin_loop.base), origin_loop.step)); ++ else ++ g = gimple_build_assign (lhs1, NOP_EXPR, origin_loop.base); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ set_current_def (origin_loop.base, lhs1); ++ defs_map.put (prolog_bb, lhs1); ++} ++ ++/* Create preheader bb for new loop; In order to ensure the standard form of ++ the loop, add a preheader_bb before loop_header. */ ++ ++static void ++create_loop_pred_bb (basic_block &loop_pred_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ loop_pred_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (loop_pred_bb, outer); ++ set_immediate_dominator (CDI_DOMINATORS, loop_pred_bb, dominator_bb); ++ defs_map.put (loop_pred_bb, get_current_def (origin_loop.base)); ++} ++ ++/* Add phi_arg for bb with phi node. */ ++ ++static void ++rewrite_add_phi_arg (basic_block bb) ++{ ++ edge e; ++ edge_iterator ei; ++ gphi *phi; ++ gphi_iterator gsi; ++ tree res; ++ location_t loc; ++ ++ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ phi = gsi.phi (); ++ res = gimple_phi_result (phi); ++ ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ { ++ if (PHI_ARG_DEF_FROM_EDGE (phi, e)) ++ continue; ++ tree var = *(defs_map.get (e->src)); ++ if (!same_ssa_name_var_p (var, res)) ++ continue; ++ if (virtual_operand_p (var)) ++ loc = UNKNOWN_LOCATION; ++ else ++ loc = gimple_location (SSA_NAME_DEF_STMT (var)); ++ add_phi_arg (phi, var, e, loc); ++ } ++ } ++} ++ ++/* Create loop_header BB for align_loop. ++ eg: <bb 9> ++ _18 = (long unsigned int) len_17; ++ _19 = _18 + 8; ++ _20 = (long unsigned int) len_limit_12 (D); ++ if (_19 <= _20) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_header (basic_block &align_loop_header, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gcond *cond_stmt; ++ gphi *phi; ++ tree res; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ align_loop_header = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_header, outer); ++ make_single_succ_edge (after_bb, align_loop_header, EDGE_FALLTHRU); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_header, dominator_bb); ++ gsi = gsi_last_bb (align_loop_header); ++ phi = create_phi_node (NULL_TREE, align_loop_header); ++ create_new_def_for (entry_node, phi, gimple_phi_result_ptr (phi)); ++ res = gimple_phi_result (phi); ++ ++ tree lhs1 = gimple_build (&stmts, NOP_EXPR, long_unsigned_type_node, res); ++ tree lhs2 = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (lhs1), lhs1, ++ build_int_cst (TREE_TYPE (lhs1), 8)); ++ tree lhs3 = gimple_build (&stmts, NOP_EXPR, long_unsigned_type_node, ++ origin_loop.limit); ++ cond_stmt = gimple_build_cond (LE_EXPR, lhs2, lhs3, NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ ++ set_current_def (origin_loop.base, res); ++ defs_map.put (align_loop_header, res); ++} ++ ++/* Create loop body BB for align_loop. ++ eg: <bb 10> ++ _21 = (sizetype) len_17; ++ _22 = cur_15 (D) + _21; ++ _23 = MEM[(long unsigned int *)_22]; ++ _24 = pb_13 (D) + _21; ++ _25 = MEM[(long unsigned int *)_24]; ++ if (_23 != _25) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_body_bb (basic_block &align_loop_body_bb, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ gcond *cond_stmt; ++ tree lhs1, lhs2; ++ ++ align_loop_body_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_body_bb, outer); ++ make_edge (after_bb, align_loop_body_bb, EDGE_TRUE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_body_bb, dominator_bb); ++ gsi = gsi_last_bb (align_loop_body_bb); ++ ++ tree var = gimple_build (&stmts, NOP_EXPR, sizetype, ++ get_current_def (origin_loop.base)); ++ lhs1 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr2), ++ origin_loop.arr2, var); ++ g = gimple_build_assign (make_ssa_name (long_unsigned_type_node), ++ fold_build2 (MEM_REF, long_unsigned_type_node, lhs1, ++ build_int_cst (build_pointer_type (long_unsigned_type_node), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs1 = gimple_assign_lhs (g); ++ lhs2 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr1), ++ origin_loop.arr1, var); ++ g = gimple_build_assign (make_ssa_name (long_unsigned_type_node), ++ fold_build2 (MEM_REF, long_unsigned_type_node, lhs2, ++ build_int_cst (build_pointer_type (long_unsigned_type_node), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs2 = gimple_assign_lhs (g); ++ cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt2), ++ lhs1, lhs2, NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++} ++ ++/* Create loop_latch BB for align_loop. ++ eg: <bb 11> ++ len_26 = len_17 + 8; ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_latch (basic_block &align_loop_latch, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ tree res; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ align_loop_latch = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_latch, outer); ++ make_edge (after_bb, align_loop_latch, EDGE_FALSE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_latch, dominator_bb); ++ gsi = gsi_last_bb (align_loop_latch); ++ res = copy_ssa_name (entry_node); ++ g = gimple_build_assign (res, PLUS_EXPR, entry_node, ++ build_int_cst (TREE_TYPE (entry_node), 8)); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ defs_map.put (align_loop_latch, res); ++} ++ ++/* Create a new loop and add it to outer_loop and return. */ ++ ++static class loop * ++init_new_loop (class loop *outer_loop, basic_block header, basic_block latch) ++{ ++ class loop *new_loop; ++ new_loop = alloc_loop (); ++ new_loop->header = header; ++ new_loop->latch = latch; ++ add_loop (new_loop, outer_loop); ++ ++ return new_loop; ++} ++ ++/* Create necessary exit BB for align_loop. ++ eg: <bb 12> ++ _27 = _23 ^ _25; ++ _28 = __builtin_ctzll (_27); ++ _29 = _28 >> 3; ++ len_30 = _29 + len_17; ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_exit_bb (basic_block &align_loop_exit_bb, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ gimple *cond_stmt; ++ tree lhs1, lhs2; ++ tree cond_lhs, cond_rhs; ++ gcall *build_ctzll; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ align_loop_exit_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_exit_bb, outer); ++ make_edge (after_bb, align_loop_exit_bb, EDGE_TRUE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_exit_bb, dominator_bb); ++ gsi = gsi_last_bb (align_loop_exit_bb); ++ ++ cond_stmt = gsi_stmt (gsi_last_bb (after_bb)); ++ cond_lhs = gimple_cond_lhs (cond_stmt); ++ cond_rhs = gimple_cond_rhs (cond_stmt); ++ ++ lhs1 = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (cond_lhs), cond_lhs, ++ cond_rhs); ++ build_ctzll = gimple_build_call (builtin_decl_explicit (BUILT_IN_CTZLL), 1, ++ lhs1); ++ lhs1 = make_ssa_name (integer_type_node); ++ gimple_call_set_lhs (build_ctzll, lhs1); ++ gimple_seq_add_stmt (&stmts, build_ctzll); ++ lhs2 = copy_ssa_name (lhs1); ++ g = gimple_build_assign (lhs2, RSHIFT_EXPR, lhs1, ++ build_int_cst (TREE_TYPE (lhs1), 3)); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs1 = gimple_build (&stmts, NOP_EXPR, TREE_TYPE (entry_node), lhs2); ++ lhs2 = copy_ssa_name (entry_node); ++ g = gimple_build_assign (lhs2, PLUS_EXPR, lhs1, entry_node); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ defs_map.put (align_loop_exit_bb, lhs2); ++} ++ ++/* Create loop_header BB for epilogue_loop. ++ eg: <bb 14> ++ # len_31 = PHI <len_17 (13), len_37 (16)> ++ if (len_31 != len_limit_12 (D)) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_epilogue_loop_header (basic_block &epilogue_loop_header, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gcond *cond_stmt; ++ tree res; ++ gphi *phi; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ epilogue_loop_header = create_empty_bb (after_bb); ++ add_bb_to_loop (epilogue_loop_header, outer); ++ make_single_succ_edge (after_bb, epilogue_loop_header, EDGE_FALLTHRU); ++ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_header, dominator_bb); ++ gsi = gsi_last_bb (epilogue_loop_header); ++ phi = create_phi_node (NULL_TREE, epilogue_loop_header); ++ create_new_def_for (entry_node, phi, gimple_phi_result_ptr (phi)); ++ res = gimple_phi_result (phi); ++ cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt1), res, ++ origin_loop.limit, NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ ++ set_current_def (origin_loop.base, res); ++ defs_map.put (epilogue_loop_header, res); ++} ++ ++/* Create loop body BB for epilogue_loop. ++ eg: <bb 15> ++ _32 = (sizetype) len_31; ++ _33 = pb_13 (D) + _32; ++ _34 = *_33; ++ _35 = cur_15 (D) + _32; ++ _36 = *_35; ++ if (_34 != _36) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_epilogue_loop_body_bb (basic_block &epilogue_loop_body_bb, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ gcond *cond_stmt; ++ tree lhs1, lhs2, lhs3; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ epilogue_loop_body_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (epilogue_loop_body_bb, outer); ++ make_edge (after_bb, epilogue_loop_body_bb, EDGE_TRUE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_body_bb, dominator_bb); ++ gsi = gsi_last_bb (epilogue_loop_body_bb); ++ lhs1 = gimple_build (&stmts, NOP_EXPR, sizetype, entry_node); ++ lhs2 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr1), ++ origin_loop.arr1, lhs1); ++ g = gimple_build_assign (make_ssa_name (unsigned_char_type_node), ++ fold_build2 (MEM_REF, unsigned_char_type_node, lhs2, ++ build_int_cst (TREE_TYPE (lhs2), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs2 = gimple_assign_lhs (g); ++ lhs3 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr2), ++ origin_loop.arr2, lhs1); ++ g = gimple_build_assign (make_ssa_name (unsigned_char_type_node), ++ fold_build2 (MEM_REF, unsigned_char_type_node, lhs3, ++ build_int_cst (TREE_TYPE (lhs3), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs3 = gimple_assign_lhs (g); ++ cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt2), lhs2, ++ lhs3, NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ defs_map.put (epilogue_loop_body_bb, get_current_def (origin_loop.base)); ++} ++ ++/* Create loop_latch BB for epilogue_loop. ++ eg: <bb 16> ++ len_37 = len_31 + 1; ++ ++ The IR of bb is as above. */ ++ ++static void ++create_epilogue_loop_latch (basic_block &epilogue_loop_latch, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ tree res; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ epilogue_loop_latch = create_empty_bb (after_bb); ++ add_bb_to_loop (epilogue_loop_latch, outer); ++ make_edge (after_bb, epilogue_loop_latch, EDGE_FALSE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_latch, dominator_bb); ++ gsi = gsi_last_bb (epilogue_loop_latch); ++ res = copy_ssa_name (entry_node); ++ g = gimple_build_assign (res, PLUS_EXPR, entry_node, ++ build_int_cst (TREE_TYPE (entry_node), origin_loop.step)); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ defs_map.put (epilogue_loop_latch, res); ++} ++ ++/* convert_to_new_loop ++ | | ++ | | ++ | | entry_edge ++ | ______ | ++ | / V V ++ | | -----origin_loop_header--- ++ | | | | ++ | | -------------------------\ ++ | | | \ ++ | | V \___ ___ ___ ___ ___ ___ ___ ++ | | -----origin_loop_body----- | ++ | | | | | ++ | | -------------------------\ | ++ | | | \___ ___ ___ ___ | ++ | | V V V ++ | | -----origin_loop_latch---- -----exit_bb------ ++ | | | | | | ++ | | /-------------------------- ------------------ ++ | \ __ / ++ | ++ | | ++ | ====> |entry_edge ++ | V ++ | -------prolog_bb----- ++ | | | ++ | --------------------- ++ | | ++ | V ++ | -----align_loop_header---- ++ | /-----------------> | | ++ |/ -------------------------- ++ || / \ ++ || V V ++ || ---align_loop_body--- ---epilogue_loop_header-- ++ || | | -------| |<---| ++ || --------------------\ / ------------------------- | ++ || | \____ | | | ++ || V | | V | ++ || ---align_loop_latch--- | | ---epilogue_loop_body---- | ++ || | | | | ----| | | ++ || ---------------------- | | / ------------------------- | ++ || / __________/ | | | | ++ || / | | | V | ++ | \ __________/ | | | ---epilogue_loop_latch--- | ++ | | | | | | | ++ | | | | ------------------------- / ++ | V | | | / ++ | -align_loop_exit_bb- | | \______________/ ++ | | | | | ++ | -------------------- | | ++ | | | | ++ | | V V ++ | | -----exit_bb------ ++ | |---->| | ++ | ------------------ ++ ++ The origin_loop conversion process starts from entry_edge and ends at ++ exit_bb; The execution logic of origin_loop is completely replaced by ++ align_loop + epilogue_loop: ++ 1) align_loop mainly implements the idea of using wide-type dereference ++ and comparison on array elements, so as to achieve the effect of ++ acceleration; For the corresponding source code understanding, please ++ refer to the description of the pass at the beginning; ++ 2) epilogue_loop processes the previous loop remaining array element ++ comparison. */ ++ ++static void ++create_new_loops (edge entry_edge) ++{ ++ basic_block prolog_bb; ++ basic_block align_loop_header, align_loop_latch, align_loop_body_bb; ++ basic_block align_pred_bb, align_loop_exit_bb; ++ basic_block epilogue_loop_header, epilogue_loop_latch, epilogue_loop_body_bb; ++ basic_block epilogue_loop_pred_bb; ++ class loop *align_loop; ++ class loop *epilogue_loop; ++ ++ class loop *outer = entry_edge->src->loop_father; ++ ++ create_prolog_bb (prolog_bb, entry_edge->src, entry_edge->src, outer, ++ entry_edge); ++ ++ create_loop_pred_bb (align_pred_bb, prolog_bb, prolog_bb, outer); ++ make_single_succ_edge (prolog_bb, align_pred_bb, EDGE_FALLTHRU); ++ ++ create_align_loop_header (align_loop_header, align_pred_bb, ++ align_pred_bb, outer); ++ ++ create_align_loop_body_bb (align_loop_body_bb, align_loop_header, ++ align_loop_header, outer); ++ ++ create_align_loop_latch (align_loop_latch, align_loop_body_bb, ++ align_loop_body_bb, outer); ++ make_edge (align_loop_latch, align_loop_header, EDGE_FALLTHRU); ++ rewrite_add_phi_arg (align_loop_header); ++ ++ align_loop = init_new_loop (outer, align_loop_header, align_loop_latch); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nPrint byte align loop %d:\n", align_loop->num); ++ flow_loop_dump (align_loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ ++ create_align_loop_exit_bb (align_loop_exit_bb, align_loop_body_bb, ++ align_loop_body_bb, outer); ++ ++ create_loop_pred_bb (epilogue_loop_pred_bb, align_loop_header, ++ align_loop_header, outer); ++ make_edge (align_loop_header, epilogue_loop_pred_bb, EDGE_FALSE_VALUE); ++ ++ create_epilogue_loop_header (epilogue_loop_header, epilogue_loop_pred_bb, ++ epilogue_loop_pred_bb, outer); ++ ++ create_epilogue_loop_body_bb (epilogue_loop_body_bb, epilogue_loop_header, ++ epilogue_loop_header, outer); ++ ++ create_epilogue_loop_latch (epilogue_loop_latch, epilogue_loop_body_bb, ++ epilogue_loop_body_bb, outer); ++ make_single_succ_edge (epilogue_loop_latch, epilogue_loop_header, ++ EDGE_FALLTHRU); ++ rewrite_add_phi_arg (epilogue_loop_header); ++ ++ epilogue_loop = init_new_loop (outer, epilogue_loop_header, ++ epilogue_loop_latch); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nPrint epilogue loop %d:\n", epilogue_loop->num); ++ flow_loop_dump (epilogue_loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ make_single_succ_edge (align_loop_exit_bb, origin_loop.exit_bb1, ++ EDGE_FALLTHRU); ++ set_immediate_dominator (CDI_DOMINATORS, origin_loop.exit_bb1, ++ entry_edge->src); ++ make_edge (epilogue_loop_body_bb, origin_loop.exit_bb1, EDGE_TRUE_VALUE); ++ ++ make_edge (epilogue_loop_header, origin_loop.exit_bb2, EDGE_FALSE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, origin_loop.exit_bb2, ++ entry_edge->src); ++ ++ rewrite_add_phi_arg (origin_loop.exit_bb1); ++ rewrite_add_phi_arg (origin_loop.exit_bb2); ++ ++ remove_edge (origin_loop.exit_e1); ++ remove_edge (origin_loop.exit_e2); ++} ++ ++/* Make sure that the dominance relationship of the newly inserted cfg ++ is not missing. */ ++ ++static void ++update_loop_dominator (cdi_direction dir) ++{ ++ gcc_assert (dom_info_available_p (dir)); ++ ++ basic_block bb; ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ basic_block imm_bb = get_immediate_dominator (dir, bb); ++ if (!imm_bb || bb == origin_loop.exit_bb1) ++ { ++ set_immediate_dominator (CDI_DOMINATORS, bb, ++ recompute_dominator (CDI_DOMINATORS, bb)); ++ continue; ++ } ++ } ++} ++ ++/* Clear information about the original loop. */ ++ ++static void ++remove_origin_loop (class loop *loop) ++{ ++ basic_block *body; ++ ++ body = get_loop_body_in_dom_order (loop); ++ unsigned n = loop->num_nodes; ++ for (unsigned i = 0; i < n; i++) ++ { ++ delete_basic_block (body[i]); ++ } ++ free (body); ++ delete_loop (loop); ++} ++ ++/* Perform the conversion of origin_loop to new_loop. */ ++ ++static void ++convert_to_new_loop (class loop *loop) ++{ ++ create_new_loops (origin_loop.entry_edge); ++ remove_origin_loop (loop); ++ update_loop_dominator (CDI_DOMINATORS); ++ update_ssa (TODO_update_ssa); ++} ++ ++/* The main entry of array-widen-compare optimizes. */ ++ ++static unsigned int ++tree_ssa_array_widen_compare () ++{ ++ unsigned int todo = 0; ++ class loop *loop; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ flow_loops_dump (dump_file, NULL, 1); ++ fprintf (dump_file, "\nConfirm which loop can be optimized using" ++ " array-widen-compare\n"); ++ } ++ ++ enum li_flags LI = LI_FROM_INNERMOST; ++ FOR_EACH_LOOP (loop, LI) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "======================================\n"); ++ fprintf (dump_file, "Processing loop %d:\n", loop->num); ++ fprintf (dump_file, "======================================\n"); ++ flow_loop_dump (loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ ++ if (determine_loop_form (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "The %dth loop form is success matched," ++ "and the loop can be optimized.\n", ++ loop->num); ++ dump_loop_bb (loop); ++ } ++ ++ convert_to_new_loop (loop); ++ } ++ } ++ ++ todo |= (TODO_update_ssa); ++ return todo; ++} ++ ++/* Array widen compare. */ ++ ++namespace { ++ ++const pass_data pass_data_tree_array_widen_compare = ++{ ++ GIMPLE_PASS, ++ "awiden_compare", ++ OPTGROUP_LOOP, ++ TV_TREE_ARRAY_WIDEN_COMPARE, ++ (PROP_cfg | PROP_ssa), ++ 0, ++ 0, ++ 0, ++ (TODO_update_ssa | TODO_verify_all) ++}; ++ ++class pass_array_widen_compare : public gimple_opt_pass ++{ ++public: ++ pass_array_widen_compare (gcc::context *ctxt) ++ : gimple_opt_pass (pass_data_tree_array_widen_compare, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *); ++ virtual unsigned int execute (function *); ++ ++}; // class pass_array_widen_compare ++ ++bool ++pass_array_widen_compare::gate (function *) ++{ ++ return (flag_array_widen_compare > 0 && optimize >= 3); ++} ++ ++unsigned int ++pass_array_widen_compare::execute (function *fun) ++{ ++ if (number_of_loops (fun) <= 1) ++ return 0; ++ ++ /* Only supports LP64 data mode. */ ++ if (TYPE_PRECISION (long_integer_type_node) != 64 ++ || POINTER_SIZE != 64 || TYPE_PRECISION (integer_type_node) != 32) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "The current data mode is not supported," ++ "only the LP64 date mode is supported.\n"); ++ return 0; ++ } ++ ++ return tree_ssa_array_widen_compare (); ++} ++ ++} // anon namespace ++ ++gimple_opt_pass * ++make_pass_array_widen_compare (gcc::context *ctxt) ++{ ++ return new pass_array_widen_compare (ctxt); ++} +\ No newline at end of file +-- +2.27.0.windows.1 + |