summaryrefslogtreecommitdiff
path: root/0025-AutoPrefetch-Support-cache-misses-profile.patch
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2023-10-17 02:15:03 +0000
committerCoprDistGit <infra@openeuler.org>2023-10-17 02:15:03 +0000
commitd82826d1a1c7ea45a761dfbf76b879712c7332ec (patch)
tree973a28470803b27c914f813f43d43f8932763ea3 /0025-AutoPrefetch-Support-cache-misses-profile.patch
parentb868000cf68cec0c9cd45fbf89a83173dea7c5eb (diff)
automatic import of gccopeneuler22.03_LTS
Diffstat (limited to '0025-AutoPrefetch-Support-cache-misses-profile.patch')
-rw-r--r--0025-AutoPrefetch-Support-cache-misses-profile.patch669
1 files changed, 669 insertions, 0 deletions
diff --git a/0025-AutoPrefetch-Support-cache-misses-profile.patch b/0025-AutoPrefetch-Support-cache-misses-profile.patch
new file mode 100644
index 0000000..1daa2db
--- /dev/null
+++ b/0025-AutoPrefetch-Support-cache-misses-profile.patch
@@ -0,0 +1,669 @@
+From 26e4ba63112f55c27b7dd3d5f8c4497ef9a2f459 Mon Sep 17 00:00:00 2001
+From: benniaobufeijiushiji <linda7@huawei.com>
+Date: Thu, 6 Jan 2022 15:33:29 +0800
+Subject: [PATCH 25/28] [AutoPrefetch] Support cache misses profile
+
+Add pass ex-afdo after pass afdo in auto-profile.c.
+Add flag -fcache-misses-profile.
+Read profile of different types of perf events and build maps for
+function and gimple location to its count of each perf event.
+Currently, instruction execution and cahce misses are supported.
+---
+ gcc/auto-profile.c | 415 +++++++++++++++++++++++++++++++++++++++++++++
+ gcc/auto-profile.h | 28 +++
+ gcc/common.opt | 14 ++
+ gcc/opts.c | 26 +++
+ gcc/passes.def | 1 +
+ gcc/timevar.def | 1 +
+ gcc/toplev.c | 6 +
+ gcc/tree-pass.h | 2 +
+ 8 files changed, 493 insertions(+)
+
+diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
+index 7d09887c9..aced8fca5 100644
+--- a/gcc/auto-profile.c
++++ b/gcc/auto-profile.c
+@@ -49,6 +49,9 @@ along with GCC; see the file COPYING3. If not see
+ #include "auto-profile.h"
+ #include "tree-pretty-print.h"
+ #include "gimple-pretty-print.h"
++#include <map>
++#include <vector>
++#include <algorithm>
+
+ /* The following routines implements AutoFDO optimization.
+
+@@ -95,6 +98,7 @@ along with GCC; see the file COPYING3. If not see
+ */
+
+ #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
++#define DEFAULT_CACHE_MISSES_PROFILE_FILE "cmsdata.gcov"
+ #define AUTO_PROFILE_VERSION 1
+
+ namespace autofdo
+@@ -117,6 +121,14 @@ private:
+ bool annotated_;
+ };
+
++/* pair <func_decl, count> */
++static bool
++event_count_cmp (std::pair<unsigned, gcov_type> &a,
++ std::pair<unsigned, gcov_type> &b)
++{
++ return a.second > b.second;
++}
++
+ /* Represent a source location: (function_decl, lineno). */
+ typedef std::pair<tree, unsigned> decl_lineno;
+
+@@ -338,6 +350,206 @@ static autofdo_source_profile *afdo_source_profile;
+ /* gcov_summary structure to store the profile_info. */
+ static gcov_summary *afdo_profile_info;
+
++/* Check opts->x_flags and put file name into EVENT_FILES. */
++
++static bool
++get_all_profile_names (const char **event_files)
++{
++ if (!(flag_auto_profile || flag_cache_misses_profile))
++ {
++ return false;
++ }
++
++ event_files[INST_EXEC] = auto_profile_file;
++
++ if (cache_misses_profile_file == NULL)
++ {
++ cache_misses_profile_file = DEFAULT_CACHE_MISSES_PROFILE_FILE;
++ }
++ event_files[CACHE_MISSES] = cache_misses_profile_file;
++
++ return true;
++}
++
++static void read_profile (void);
++
++/* Maintain multiple profile data of different events with event_loc_count_map
++ and event_func_count_map. */
++
++class extend_auto_profile
++{
++public:
++ bool auto_profile_exist (enum event_type type);
++ gcov_type get_loc_count (location_t, event_type);
++ gcov_type get_func_count (unsigned, event_type);
++ struct rank_info get_func_rank (unsigned, enum event_type);
++ /* There should be only one instance of class EXTEND_AUTO_PROFILE. */
++ static extend_auto_profile *create ()
++ {
++ extend_auto_profile *map = new extend_auto_profile ();
++ if (map->read ())
++ {
++ return map;
++ }
++ delete map;
++ return NULL;
++ }
++private:
++ /* Basic maps of extend_auto_profile. */
++ typedef std::map<location_t, gcov_type> loc_count_map;
++ typedef std::map<unsigned, gcov_type> func_count_map;
++
++ /* Map of function_uid to its descending order rank of counts. */
++ typedef std::map<unsigned, unsigned> rank_map;
++
++ /* Mapping hardware events to corresponding basic maps. */
++ typedef std::map<event_type, loc_count_map> event_loc_count_map;
++ typedef std::map<event_type, func_count_map> event_func_count_map;
++ typedef std::map<event_type, rank_map> event_rank_map;
++
++ extend_auto_profile () {}
++ bool read ();
++ void set_loc_count ();
++ void process_extend_source_profile ();
++ void read_extend_afdo_file (const char*, event_type);
++ void rank_all_func ();
++ void dump_event ();
++ event_loc_count_map event_loc_map;
++ event_func_count_map event_func_map;
++ event_rank_map func_rank;
++ event_type profile_type;
++};
++
++/* Member functions for extend_auto_profile. */
++
++bool
++extend_auto_profile::auto_profile_exist (enum event_type type)
++{
++ switch (type)
++ {
++ case INST_EXEC:
++ return event_func_map.count (INST_EXEC) != 0
++ || event_loc_map.count (INST_EXEC) != 0;
++ case CACHE_MISSES:
++ return event_func_map.count (CACHE_MISSES) != 0
++ || event_loc_map.count (CACHE_MISSES) != 0;
++ default:
++ return false;
++ }
++}
++
++void
++extend_auto_profile::dump_event ()
++{
++ if (dump_file)
++ {
++ switch (profile_type)
++ {
++ case INST_EXEC:
++ fprintf (dump_file, "Processing event instruction execution.\n");
++ break;
++ case CACHE_MISSES:
++ fprintf (dump_file, "Processing event cache misses.\n");
++ break;
++ default:
++ break;
++ }
++ }
++}
++
++/* Return true if any profile data was read. */
++
++bool
++extend_auto_profile::read ()
++{
++ const char *event_files[EVENT_NUMBER] = {NULL};
++ if (!get_all_profile_names (event_files))
++ {
++ return false;
++ }
++
++ /* Backup AFDO_STRING_TABLE and AFDO_SOURCE_PROFILE since we will create
++ new ones for each event_type. */
++ autofdo::string_table *string_table_afdo = afdo_string_table;
++ autofdo::autofdo_source_profile *source_profile_afdo = afdo_source_profile;
++
++ for (unsigned i = 0; i < EVENT_NUMBER; i++)
++ {
++ if (event_files[i] == NULL)
++ {
++ continue;
++ }
++ profile_type = (enum event_type) i;
++ dump_event ();
++ gcov_close ();
++ auto_profile_file = event_files[i];
++ read_profile ();
++ gcov_close ();
++
++ process_extend_source_profile ();
++
++ delete afdo_source_profile;
++ delete afdo_string_table;
++ }
++
++ /* Restore AFDO_STRING_TABLE and AFDO_SOURCE_PROFILE. Function
++ END_AUTO_PROFILE will free them at the end of compilation. */
++ afdo_string_table = string_table_afdo;
++ afdo_source_profile = source_profile_afdo;
++ return true;
++}
++
++/* Helper functions. */
++
++gcov_type
++extend_auto_profile::get_loc_count (location_t loc, event_type type)
++{
++ event_loc_count_map::iterator event_iter = event_loc_map.find (type);
++ if (event_iter != event_loc_map.end ())
++ {
++ loc_count_map::iterator loc_iter = event_iter->second.find (loc);
++ if (loc_iter != event_iter->second.end ())
++ {
++ return loc_iter->second;
++ }
++ }
++ return 0;
++}
++
++struct rank_info
++extend_auto_profile::get_func_rank (unsigned decl_uid, enum event_type type)
++{
++ struct rank_info info = {0, 0};
++ event_rank_map::iterator event_iter = func_rank.find (type);
++ if (event_iter != func_rank.end ())
++ {
++ rank_map::iterator func_iter = event_iter->second.find (decl_uid);
++ if (func_iter != event_iter->second.end ())
++ {
++ info.rank = func_iter->second;
++ info.total = event_iter->second.size ();
++ }
++ }
++ return info;
++}
++
++gcov_type
++extend_auto_profile::get_func_count (unsigned decl_uid, event_type type)
++{
++ event_func_count_map::iterator event_iter = event_func_map.find (type);
++ if (event_iter != event_func_map.end ())
++ {
++ func_count_map::iterator func_iter = event_iter->second.find (decl_uid);
++ if (func_iter != event_iter->second.end ())
++ {
++ return func_iter->second;
++ }
++ }
++ return 0;
++}
++
++static extend_auto_profile *extend_profile;
++
+ /* Helper functions. */
+
+ /* Return the original name of NAME: strip the suffix that starts
+@@ -1654,6 +1866,131 @@ auto_profile (void)
+
+ return TODO_rebuild_cgraph_edges;
+ }
++
++void
++extend_auto_profile::rank_all_func ()
++{
++ std::vector<std::pair<unsigned, gcov_type> > func_sorted;
++ event_func_count_map::iterator event_iter
++ = event_func_map.find (profile_type);
++ if (event_iter != event_func_map.end ())
++ {
++ func_count_map::iterator func_iter;
++ for (func_iter = event_iter->second.begin ();
++ func_iter != event_iter->second.end (); func_iter++)
++ {
++ func_sorted.push_back (std::make_pair (func_iter->first,
++ func_iter->second));
++ }
++
++ std::sort (func_sorted.begin (), func_sorted.end (), event_count_cmp);
++
++ for (unsigned i = 0; i < func_sorted.size (); ++i)
++ {
++ func_rank[profile_type][func_sorted[i].first] = i + 1;
++ }
++ }
++}
++
++/* Iterate stmts in cfun and maintain its count to EVENT_LOC_MAP. */
++
++void
++extend_auto_profile::set_loc_count ()
++{
++ basic_block bb;
++ FOR_EACH_BB_FN (bb, cfun)
++ {
++ gimple_stmt_iterator gsi;
++ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
++ {
++ count_info info;
++ gimple *stmt = gsi_stmt (gsi);
++ if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
++ {
++ continue;
++ }
++ if (afdo_source_profile->get_count_info (stmt, &info))
++ {
++ location_t loc = gimple_location (stmt);
++ event_loc_map[profile_type][loc] += info.count;
++ if (dump_file && (dump_flags & TDF_DETAILS))
++ {
++ fprintf (dump_file, "stmt ");
++ print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
++ fprintf (dump_file, "counts %ld\n",
++ event_loc_map[profile_type][loc]);
++ }
++ }
++ }
++ }
++}
++
++/* Process data in extend_auto_source_profile, save them into two maps.
++ 1. gimple_location to count.
++ 2. function_index to count. */
++void
++extend_auto_profile::process_extend_source_profile ()
++{
++ struct cgraph_node *node;
++ if (symtab->state == FINISHED)
++ {
++ return;
++ }
++ FOR_EACH_FUNCTION (node)
++ {
++ if (!gimple_has_body_p (node->decl) || node->inlined_to)
++ {
++ continue;
++ }
++
++ /* Don't profile functions produced for builtin stuff. */
++ if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
++ {
++ continue;
++ }
++
++ function *fn = DECL_STRUCT_FUNCTION (node->decl);
++ push_cfun (fn);
++
++ const function_instance *s
++ = afdo_source_profile->get_function_instance_by_decl (
++ current_function_decl);
++
++ if (s == NULL)
++ {
++ pop_cfun ();
++ continue;
++ }
++ unsigned int decl_uid = DECL_UID (current_function_decl);
++ gcov_type count = s->total_count ();
++ if (dump_file)
++ {
++ fprintf (dump_file, "Extend auto-profile for function %s.\n",
++ node->dump_name ());
++ }
++ event_func_map[profile_type][decl_uid] += count;
++ set_loc_count ();
++ pop_cfun ();
++ }
++ rank_all_func ();
++}
++
++/* Main entry of extend_auto_profile. */
++
++static void
++extend_source_profile ()
++{
++ extend_profile = autofdo::extend_auto_profile::create ();
++ if (dump_file)
++ {
++ if (extend_profile == NULL)
++ {
++ fprintf (dump_file, "No profile file is found.\n");
++ return;
++ }
++ fprintf (dump_file, "Extend profile info generated.\n");
++ }
++}
+ } /* namespace autofdo. */
+
+ /* Read the profile from the profile data file. */
+@@ -1682,6 +2019,42 @@ end_auto_profile (void)
+ profile_info = NULL;
+ }
+
++/* Extern function to get profile info in other passes. */
++
++bool
++profile_exist (enum event_type type)
++{
++ return autofdo::extend_profile != NULL
++ && autofdo::extend_profile->auto_profile_exist (type);
++}
++
++gcov_type
++event_get_loc_count (location_t loc, event_type type)
++{
++ return autofdo::extend_profile->get_loc_count (loc, type);
++}
++
++gcov_type
++event_get_func_count (unsigned decl_uid, event_type type)
++{
++ return autofdo::extend_profile->get_func_count (decl_uid, type);
++}
++
++struct rank_info
++event_get_func_rank (unsigned decl_uid, enum event_type type)
++{
++ return autofdo::extend_profile->get_func_rank (decl_uid, type);
++}
++
++void
++free_extend_profile_info ()
++{
++ if (autofdo::extend_profile != NULL)
++ {
++ delete autofdo::extend_profile;
++ }
++}
++
+ /* Returns TRUE if EDGE is hot enough to be inlined early. */
+
+ bool
+@@ -1743,8 +2116,50 @@ public:
+
+ } // anon namespace
+
++namespace
++{
++const pass_data pass_data_ipa_extend_auto_profile =
++{
++ SIMPLE_IPA_PASS, /* type */
++ "ex-afdo", /* name */
++ OPTGROUP_NONE, /* optinfo_flags */
++ TV_IPA_EXTEND_AUTO_PROFILE, /* tv_id */
++ 0, /* properties_required */
++ 0, /* properties_provided */
++ 0, /* properties_destroyed */
++ 0, /* todo_flags_start */
++ 0, /* todo_flags_finish */
++};
++
++class pass_ipa_extend_auto_profile : public simple_ipa_opt_pass
++{
++public:
++ pass_ipa_extend_auto_profile (gcc::context *ctxt)
++ : simple_ipa_opt_pass (pass_data_ipa_extend_auto_profile, ctxt)
++ {}
++
++ /* opt_pass methods: */
++ virtual bool gate (function *) {return (flag_ipa_extend_auto_profile > 0);}
++ virtual unsigned int execute (function *);
++
++};
++
++unsigned int
++pass_ipa_extend_auto_profile::execute (function *fun)
++{
++ autofdo::extend_source_profile ();
++ return 0;
++}
++} // anon namespace
++
+ simple_ipa_opt_pass *
+ make_pass_ipa_auto_profile (gcc::context *ctxt)
+ {
+ return new pass_ipa_auto_profile (ctxt);
+ }
++
++simple_ipa_opt_pass *
++make_pass_ipa_extend_auto_profile (gcc::context *ctxt)
++{
++ return new pass_ipa_extend_auto_profile (ctxt);
++}
+\ No newline at end of file
+diff --git a/gcc/auto-profile.h b/gcc/auto-profile.h
+index f5cff091d..230d7e68a 100644
+--- a/gcc/auto-profile.h
++++ b/gcc/auto-profile.h
+@@ -21,6 +21,13 @@ along with GCC; see the file COPYING3. If not see
+ #ifndef AUTO_PROFILE_H
+ #define AUTO_PROFILE_H
+
++enum event_type
++{
++ INST_EXEC = 0,
++ CACHE_MISSES,
++ EVENT_NUMBER
++};
++
+ /* Read, process, finalize AutoFDO data structures. */
+ extern void read_autofdo_file (void);
+ extern void end_auto_profile (void);
+@@ -28,4 +35,25 @@ extern void end_auto_profile (void);
+ /* Returns TRUE if EDGE is hot enough to be inlined early. */
+ extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *);
+
++/* Chcek if profile exists before using this profile. */
++extern bool profile_exist (enum event_type);
++
++/* Given func decl_uid or gimple location and event_type, return count.
++ Count is 0 if function or gimple is not sampled. */
++extern gcov_type event_get_func_count (unsigned, enum event_type);
++extern gcov_type event_get_loc_count (location_t, enum event_type);
++
++struct rank_info
++{
++ unsigned total;
++ unsigned rank;
++};
++
++/* Given function decl_uid and event type, return rank_info. Rank_info
++ is {0, 0} if function was not sampled. */
++extern struct rank_info event_get_func_rank (unsigned, enum event_type);
++
++/* Free memory allocated by autofdo::extern_profile. */
++extern void free_extend_profile_info ();
++
+ #endif /* AUTO_PROFILE_H */
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 73c24f28d..37cbbd8c0 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1074,6 +1074,16 @@ Common Joined RejectNegative Var(auto_profile_file)
+ Use sample profile information for call graph node weights. The profile
+ file is specified in the argument.
+
++fcache-misses-profile
++Common Report Var(flag_cache_misses_profile)
++Use sample profile information for source code cache miss count. The default
++profile file is cmsdata.gcov in `pwd`.
++
++fcache-misses-profile=
++Common Joined RejectNegative Var(cache_misses_profile_file)
++Use sample profile information for source code cache miss count. The profile
++file is specified in the argument.
++
+ ; -fcheck-bounds causes gcc to generate array bounds checks.
+ ; For C, C++ and ObjC: defaults off.
+ ; For Java: defaults to on.
+@@ -1873,6 +1883,10 @@ fipa-struct-reorg
+ Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization
+ Perform structure layout optimizations.
+
++fipa-extend-auto-profile
++Common Report Var(flag_ipa_extend_auto_profile)
++Use sample profile information for source code.
++
+ fipa-vrp
+ Common Report Var(flag_ipa_vrp) Optimization
+ Perform IPA Value Range Propagation.
+diff --git a/gcc/opts.c b/gcc/opts.c
+index 6924a973a..642327296 100644
+--- a/gcc/opts.c
++++ b/gcc/opts.c
+@@ -1742,6 +1742,13 @@ enable_fdo_optimizations (struct gcc_options *opts,
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_tree_loop_distribution, value);
+ }
+
++static void
++set_cache_misses_profile_params (struct gcc_options *opts,
++ struct gcc_options *opts_set)
++{
++ SET_OPTION_IF_UNSET (opts, opts_set, flag_prefetch_loop_arrays, 1);
++}
++
+ /* -f{,no-}sanitize{,-recover}= suboptions. */
+ const struct sanitizer_opts_s sanitizer_opts[] =
+ {
+@@ -2604,6 +2611,25 @@ common_handle_option (struct gcc_options *opts,
+ param_early_inliner_max_iterations, 10);
+ break;
+
++ case OPT_fipa_extend_auto_profile:
++ opts->x_flag_ipa_extend_auto_profile = opts->x_flag_cache_misses_profile
++ ? true : value;
++ break;
++
++ case OPT_fcache_misses_profile_:
++ opts->x_cache_misses_profile_file = xstrdup (arg);
++ opts->x_flag_cache_misses_profile = true;
++ value = true;
++ /* No break here - do -fcache-misses-profile processing. */
++ /* FALLTHRU */
++ case OPT_fcache_misses_profile:
++ opts->x_flag_ipa_extend_auto_profile = value;
++ if (value)
++ {
++ set_cache_misses_profile_params (opts, opts_set);
++ }
++ break;
++
+ case OPT_fprofile_generate_:
+ opts->x_profile_data_prefix = xstrdup (arg);
+ value = true;
+diff --git a/gcc/passes.def b/gcc/passes.def
+index 63303ab65..e9c91d26e 100644
+--- a/gcc/passes.def
++++ b/gcc/passes.def
+@@ -133,6 +133,7 @@ along with GCC; see the file COPYING3. If not see
+
+ NEXT_PASS (pass_target_clone);
+ NEXT_PASS (pass_ipa_auto_profile);
++ NEXT_PASS (pass_ipa_extend_auto_profile);
+ NEXT_PASS (pass_ipa_tree_profile);
+ PUSH_INSERT_PASSES_WITHIN (pass_ipa_tree_profile)
+ NEXT_PASS (pass_feedback_split_functions);
+diff --git a/gcc/timevar.def b/gcc/timevar.def
+index ee25eccbb..e873747a8 100644
+--- a/gcc/timevar.def
++++ b/gcc/timevar.def
+@@ -82,6 +82,7 @@ DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting")
+ DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats")
+ DEFTIMEVAR (TV_IPA_REORDER_FIELDS , "ipa struct reorder fields optimization")
+ DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization")
++DEFTIMEVAR (TV_IPA_EXTEND_AUTO_PROFILE, "ipa extend auto profile")
+ DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations")
+ DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression")
+ DEFTIMEVAR (TV_IPA_LTO_COMPRESS , "lto stream compression")
+diff --git a/gcc/toplev.c b/gcc/toplev.c
+index eaed6f6c7..51e6bd400 100644
+--- a/gcc/toplev.c
++++ b/gcc/toplev.c
+@@ -577,6 +577,12 @@ compile_file (void)
+ targetm.asm_out.output_ident (ident_str);
+ }
+
++ /* Extend auto profile finalization. */
++ if (flag_ipa_extend_auto_profile)
++ {
++ free_extend_profile_info ();
++ }
++
+ /* Auto profile finalization. */
+ if (flag_auto_profile)
+ end_auto_profile ();
+diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
+index eb32c5d44..be6387768 100644
+--- a/gcc/tree-pass.h
++++ b/gcc/tree-pass.h
+@@ -511,6 +511,8 @@ extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_reorder_fields (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt);
++extern simple_ipa_opt_pass *make_pass_ipa_extend_auto_profile (gcc::context
++ *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_target_clone (gcc::context *ctxt);
+--
+2.27.0.windows.1
+