From d82826d1a1c7ea45a761dfbf76b879712c7332ec Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Tue, 17 Oct 2023 02:15:03 +0000 Subject: automatic import of gcc --- ...AutoPrefetch-Support-cache-misses-profile.patch | 669 +++++++++++++++++++++ 1 file changed, 669 insertions(+) create mode 100644 0025-AutoPrefetch-Support-cache-misses-profile.patch (limited to '0025-AutoPrefetch-Support-cache-misses-profile.patch') diff --git a/0025-AutoPrefetch-Support-cache-misses-profile.patch b/0025-AutoPrefetch-Support-cache-misses-profile.patch new file mode 100644 index 0000000..1daa2db --- /dev/null +++ b/0025-AutoPrefetch-Support-cache-misses-profile.patch @@ -0,0 +1,669 @@ +From 26e4ba63112f55c27b7dd3d5f8c4497ef9a2f459 Mon Sep 17 00:00:00 2001 +From: benniaobufeijiushiji +Date: Thu, 6 Jan 2022 15:33:29 +0800 +Subject: [PATCH 25/28] [AutoPrefetch] Support cache misses profile + +Add pass ex-afdo after pass afdo in auto-profile.c. +Add flag -fcache-misses-profile. +Read profile of different types of perf events and build maps for +function and gimple location to its count of each perf event. +Currently, instruction execution and cahce misses are supported. +--- + gcc/auto-profile.c | 415 +++++++++++++++++++++++++++++++++++++++++++++ + gcc/auto-profile.h | 28 +++ + gcc/common.opt | 14 ++ + gcc/opts.c | 26 +++ + gcc/passes.def | 1 + + gcc/timevar.def | 1 + + gcc/toplev.c | 6 + + gcc/tree-pass.h | 2 + + 8 files changed, 493 insertions(+) + +diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c +index 7d09887c9..aced8fca5 100644 +--- a/gcc/auto-profile.c ++++ b/gcc/auto-profile.c +@@ -49,6 +49,9 @@ along with GCC; see the file COPYING3. If not see + #include "auto-profile.h" + #include "tree-pretty-print.h" + #include "gimple-pretty-print.h" ++#include ++#include ++#include + + /* The following routines implements AutoFDO optimization. + +@@ -95,6 +98,7 @@ along with GCC; see the file COPYING3. If not see + */ + + #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo" ++#define DEFAULT_CACHE_MISSES_PROFILE_FILE "cmsdata.gcov" + #define AUTO_PROFILE_VERSION 1 + + namespace autofdo +@@ -117,6 +121,14 @@ private: + bool annotated_; + }; + ++/* pair */ ++static bool ++event_count_cmp (std::pair &a, ++ std::pair &b) ++{ ++ return a.second > b.second; ++} ++ + /* Represent a source location: (function_decl, lineno). */ + typedef std::pair decl_lineno; + +@@ -338,6 +350,206 @@ static autofdo_source_profile *afdo_source_profile; + /* gcov_summary structure to store the profile_info. */ + static gcov_summary *afdo_profile_info; + ++/* Check opts->x_flags and put file name into EVENT_FILES. */ ++ ++static bool ++get_all_profile_names (const char **event_files) ++{ ++ if (!(flag_auto_profile || flag_cache_misses_profile)) ++ { ++ return false; ++ } ++ ++ event_files[INST_EXEC] = auto_profile_file; ++ ++ if (cache_misses_profile_file == NULL) ++ { ++ cache_misses_profile_file = DEFAULT_CACHE_MISSES_PROFILE_FILE; ++ } ++ event_files[CACHE_MISSES] = cache_misses_profile_file; ++ ++ return true; ++} ++ ++static void read_profile (void); ++ ++/* Maintain multiple profile data of different events with event_loc_count_map ++ and event_func_count_map. */ ++ ++class extend_auto_profile ++{ ++public: ++ bool auto_profile_exist (enum event_type type); ++ gcov_type get_loc_count (location_t, event_type); ++ gcov_type get_func_count (unsigned, event_type); ++ struct rank_info get_func_rank (unsigned, enum event_type); ++ /* There should be only one instance of class EXTEND_AUTO_PROFILE. */ ++ static extend_auto_profile *create () ++ { ++ extend_auto_profile *map = new extend_auto_profile (); ++ if (map->read ()) ++ { ++ return map; ++ } ++ delete map; ++ return NULL; ++ } ++private: ++ /* Basic maps of extend_auto_profile. */ ++ typedef std::map loc_count_map; ++ typedef std::map func_count_map; ++ ++ /* Map of function_uid to its descending order rank of counts. */ ++ typedef std::map rank_map; ++ ++ /* Mapping hardware events to corresponding basic maps. */ ++ typedef std::map event_loc_count_map; ++ typedef std::map event_func_count_map; ++ typedef std::map event_rank_map; ++ ++ extend_auto_profile () {} ++ bool read (); ++ void set_loc_count (); ++ void process_extend_source_profile (); ++ void read_extend_afdo_file (const char*, event_type); ++ void rank_all_func (); ++ void dump_event (); ++ event_loc_count_map event_loc_map; ++ event_func_count_map event_func_map; ++ event_rank_map func_rank; ++ event_type profile_type; ++}; ++ ++/* Member functions for extend_auto_profile. */ ++ ++bool ++extend_auto_profile::auto_profile_exist (enum event_type type) ++{ ++ switch (type) ++ { ++ case INST_EXEC: ++ return event_func_map.count (INST_EXEC) != 0 ++ || event_loc_map.count (INST_EXEC) != 0; ++ case CACHE_MISSES: ++ return event_func_map.count (CACHE_MISSES) != 0 ++ || event_loc_map.count (CACHE_MISSES) != 0; ++ default: ++ return false; ++ } ++} ++ ++void ++extend_auto_profile::dump_event () ++{ ++ if (dump_file) ++ { ++ switch (profile_type) ++ { ++ case INST_EXEC: ++ fprintf (dump_file, "Processing event instruction execution.\n"); ++ break; ++ case CACHE_MISSES: ++ fprintf (dump_file, "Processing event cache misses.\n"); ++ break; ++ default: ++ break; ++ } ++ } ++} ++ ++/* Return true if any profile data was read. */ ++ ++bool ++extend_auto_profile::read () ++{ ++ const char *event_files[EVENT_NUMBER] = {NULL}; ++ if (!get_all_profile_names (event_files)) ++ { ++ return false; ++ } ++ ++ /* Backup AFDO_STRING_TABLE and AFDO_SOURCE_PROFILE since we will create ++ new ones for each event_type. */ ++ autofdo::string_table *string_table_afdo = afdo_string_table; ++ autofdo::autofdo_source_profile *source_profile_afdo = afdo_source_profile; ++ ++ for (unsigned i = 0; i < EVENT_NUMBER; i++) ++ { ++ if (event_files[i] == NULL) ++ { ++ continue; ++ } ++ profile_type = (enum event_type) i; ++ dump_event (); ++ gcov_close (); ++ auto_profile_file = event_files[i]; ++ read_profile (); ++ gcov_close (); ++ ++ process_extend_source_profile (); ++ ++ delete afdo_source_profile; ++ delete afdo_string_table; ++ } ++ ++ /* Restore AFDO_STRING_TABLE and AFDO_SOURCE_PROFILE. Function ++ END_AUTO_PROFILE will free them at the end of compilation. */ ++ afdo_string_table = string_table_afdo; ++ afdo_source_profile = source_profile_afdo; ++ return true; ++} ++ ++/* Helper functions. */ ++ ++gcov_type ++extend_auto_profile::get_loc_count (location_t loc, event_type type) ++{ ++ event_loc_count_map::iterator event_iter = event_loc_map.find (type); ++ if (event_iter != event_loc_map.end ()) ++ { ++ loc_count_map::iterator loc_iter = event_iter->second.find (loc); ++ if (loc_iter != event_iter->second.end ()) ++ { ++ return loc_iter->second; ++ } ++ } ++ return 0; ++} ++ ++struct rank_info ++extend_auto_profile::get_func_rank (unsigned decl_uid, enum event_type type) ++{ ++ struct rank_info info = {0, 0}; ++ event_rank_map::iterator event_iter = func_rank.find (type); ++ if (event_iter != func_rank.end ()) ++ { ++ rank_map::iterator func_iter = event_iter->second.find (decl_uid); ++ if (func_iter != event_iter->second.end ()) ++ { ++ info.rank = func_iter->second; ++ info.total = event_iter->second.size (); ++ } ++ } ++ return info; ++} ++ ++gcov_type ++extend_auto_profile::get_func_count (unsigned decl_uid, event_type type) ++{ ++ event_func_count_map::iterator event_iter = event_func_map.find (type); ++ if (event_iter != event_func_map.end ()) ++ { ++ func_count_map::iterator func_iter = event_iter->second.find (decl_uid); ++ if (func_iter != event_iter->second.end ()) ++ { ++ return func_iter->second; ++ } ++ } ++ return 0; ++} ++ ++static extend_auto_profile *extend_profile; ++ + /* Helper functions. */ + + /* Return the original name of NAME: strip the suffix that starts +@@ -1654,6 +1866,131 @@ auto_profile (void) + + return TODO_rebuild_cgraph_edges; + } ++ ++void ++extend_auto_profile::rank_all_func () ++{ ++ std::vector > func_sorted; ++ event_func_count_map::iterator event_iter ++ = event_func_map.find (profile_type); ++ if (event_iter != event_func_map.end ()) ++ { ++ func_count_map::iterator func_iter; ++ for (func_iter = event_iter->second.begin (); ++ func_iter != event_iter->second.end (); func_iter++) ++ { ++ func_sorted.push_back (std::make_pair (func_iter->first, ++ func_iter->second)); ++ } ++ ++ std::sort (func_sorted.begin (), func_sorted.end (), event_count_cmp); ++ ++ for (unsigned i = 0; i < func_sorted.size (); ++i) ++ { ++ func_rank[profile_type][func_sorted[i].first] = i + 1; ++ } ++ } ++} ++ ++/* Iterate stmts in cfun and maintain its count to EVENT_LOC_MAP. */ ++ ++void ++extend_auto_profile::set_loc_count () ++{ ++ basic_block bb; ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ gimple_stmt_iterator gsi; ++ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ count_info info; ++ gimple *stmt = gsi_stmt (gsi); ++ if (gimple_clobber_p (stmt) || is_gimple_debug (stmt)) ++ { ++ continue; ++ } ++ if (afdo_source_profile->get_count_info (stmt, &info)) ++ { ++ location_t loc = gimple_location (stmt); ++ event_loc_map[profile_type][loc] += info.count; ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "stmt "); ++ print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); ++ fprintf (dump_file, "counts %ld\n", ++ event_loc_map[profile_type][loc]); ++ } ++ } ++ } ++ } ++} ++ ++/* Process data in extend_auto_source_profile, save them into two maps. ++ 1. gimple_location to count. ++ 2. function_index to count. */ ++void ++extend_auto_profile::process_extend_source_profile () ++{ ++ struct cgraph_node *node; ++ if (symtab->state == FINISHED) ++ { ++ return; ++ } ++ FOR_EACH_FUNCTION (node) ++ { ++ if (!gimple_has_body_p (node->decl) || node->inlined_to) ++ { ++ continue; ++ } ++ ++ /* Don't profile functions produced for builtin stuff. */ ++ if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) ++ { ++ continue; ++ } ++ ++ function *fn = DECL_STRUCT_FUNCTION (node->decl); ++ push_cfun (fn); ++ ++ const function_instance *s ++ = afdo_source_profile->get_function_instance_by_decl ( ++ current_function_decl); ++ ++ if (s == NULL) ++ { ++ pop_cfun (); ++ continue; ++ } ++ unsigned int decl_uid = DECL_UID (current_function_decl); ++ gcov_type count = s->total_count (); ++ if (dump_file) ++ { ++ fprintf (dump_file, "Extend auto-profile for function %s.\n", ++ node->dump_name ()); ++ } ++ event_func_map[profile_type][decl_uid] += count; ++ set_loc_count (); ++ pop_cfun (); ++ } ++ rank_all_func (); ++} ++ ++/* Main entry of extend_auto_profile. */ ++ ++static void ++extend_source_profile () ++{ ++ extend_profile = autofdo::extend_auto_profile::create (); ++ if (dump_file) ++ { ++ if (extend_profile == NULL) ++ { ++ fprintf (dump_file, "No profile file is found.\n"); ++ return; ++ } ++ fprintf (dump_file, "Extend profile info generated.\n"); ++ } ++} + } /* namespace autofdo. */ + + /* Read the profile from the profile data file. */ +@@ -1682,6 +2019,42 @@ end_auto_profile (void) + profile_info = NULL; + } + ++/* Extern function to get profile info in other passes. */ ++ ++bool ++profile_exist (enum event_type type) ++{ ++ return autofdo::extend_profile != NULL ++ && autofdo::extend_profile->auto_profile_exist (type); ++} ++ ++gcov_type ++event_get_loc_count (location_t loc, event_type type) ++{ ++ return autofdo::extend_profile->get_loc_count (loc, type); ++} ++ ++gcov_type ++event_get_func_count (unsigned decl_uid, event_type type) ++{ ++ return autofdo::extend_profile->get_func_count (decl_uid, type); ++} ++ ++struct rank_info ++event_get_func_rank (unsigned decl_uid, enum event_type type) ++{ ++ return autofdo::extend_profile->get_func_rank (decl_uid, type); ++} ++ ++void ++free_extend_profile_info () ++{ ++ if (autofdo::extend_profile != NULL) ++ { ++ delete autofdo::extend_profile; ++ } ++} ++ + /* Returns TRUE if EDGE is hot enough to be inlined early. */ + + bool +@@ -1743,8 +2116,50 @@ public: + + } // anon namespace + ++namespace ++{ ++const pass_data pass_data_ipa_extend_auto_profile = ++{ ++ SIMPLE_IPA_PASS, /* type */ ++ "ex-afdo", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_IPA_EXTEND_AUTO_PROFILE, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ 0, /* todo_flags_finish */ ++}; ++ ++class pass_ipa_extend_auto_profile : public simple_ipa_opt_pass ++{ ++public: ++ pass_ipa_extend_auto_profile (gcc::context *ctxt) ++ : simple_ipa_opt_pass (pass_data_ipa_extend_auto_profile, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *) {return (flag_ipa_extend_auto_profile > 0);} ++ virtual unsigned int execute (function *); ++ ++}; ++ ++unsigned int ++pass_ipa_extend_auto_profile::execute (function *fun) ++{ ++ autofdo::extend_source_profile (); ++ return 0; ++} ++} // anon namespace ++ + simple_ipa_opt_pass * + make_pass_ipa_auto_profile (gcc::context *ctxt) + { + return new pass_ipa_auto_profile (ctxt); + } ++ ++simple_ipa_opt_pass * ++make_pass_ipa_extend_auto_profile (gcc::context *ctxt) ++{ ++ return new pass_ipa_extend_auto_profile (ctxt); ++} +\ No newline at end of file +diff --git a/gcc/auto-profile.h b/gcc/auto-profile.h +index f5cff091d..230d7e68a 100644 +--- a/gcc/auto-profile.h ++++ b/gcc/auto-profile.h +@@ -21,6 +21,13 @@ along with GCC; see the file COPYING3. If not see + #ifndef AUTO_PROFILE_H + #define AUTO_PROFILE_H + ++enum event_type ++{ ++ INST_EXEC = 0, ++ CACHE_MISSES, ++ EVENT_NUMBER ++}; ++ + /* Read, process, finalize AutoFDO data structures. */ + extern void read_autofdo_file (void); + extern void end_auto_profile (void); +@@ -28,4 +35,25 @@ extern void end_auto_profile (void); + /* Returns TRUE if EDGE is hot enough to be inlined early. */ + extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *); + ++/* Chcek if profile exists before using this profile. */ ++extern bool profile_exist (enum event_type); ++ ++/* Given func decl_uid or gimple location and event_type, return count. ++ Count is 0 if function or gimple is not sampled. */ ++extern gcov_type event_get_func_count (unsigned, enum event_type); ++extern gcov_type event_get_loc_count (location_t, enum event_type); ++ ++struct rank_info ++{ ++ unsigned total; ++ unsigned rank; ++}; ++ ++/* Given function decl_uid and event type, return rank_info. Rank_info ++ is {0, 0} if function was not sampled. */ ++extern struct rank_info event_get_func_rank (unsigned, enum event_type); ++ ++/* Free memory allocated by autofdo::extern_profile. */ ++extern void free_extend_profile_info (); ++ + #endif /* AUTO_PROFILE_H */ +diff --git a/gcc/common.opt b/gcc/common.opt +index 73c24f28d..37cbbd8c0 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1074,6 +1074,16 @@ Common Joined RejectNegative Var(auto_profile_file) + Use sample profile information for call graph node weights. The profile + file is specified in the argument. + ++fcache-misses-profile ++Common Report Var(flag_cache_misses_profile) ++Use sample profile information for source code cache miss count. The default ++profile file is cmsdata.gcov in `pwd`. ++ ++fcache-misses-profile= ++Common Joined RejectNegative Var(cache_misses_profile_file) ++Use sample profile information for source code cache miss count. The profile ++file is specified in the argument. ++ + ; -fcheck-bounds causes gcc to generate array bounds checks. + ; For C, C++ and ObjC: defaults off. + ; For Java: defaults to on. +@@ -1873,6 +1883,10 @@ fipa-struct-reorg + Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization + Perform structure layout optimizations. + ++fipa-extend-auto-profile ++Common Report Var(flag_ipa_extend_auto_profile) ++Use sample profile information for source code. ++ + fipa-vrp + Common Report Var(flag_ipa_vrp) Optimization + Perform IPA Value Range Propagation. +diff --git a/gcc/opts.c b/gcc/opts.c +index 6924a973a..642327296 100644 +--- a/gcc/opts.c ++++ b/gcc/opts.c +@@ -1742,6 +1742,13 @@ enable_fdo_optimizations (struct gcc_options *opts, + SET_OPTION_IF_UNSET (opts, opts_set, flag_tree_loop_distribution, value); + } + ++static void ++set_cache_misses_profile_params (struct gcc_options *opts, ++ struct gcc_options *opts_set) ++{ ++ SET_OPTION_IF_UNSET (opts, opts_set, flag_prefetch_loop_arrays, 1); ++} ++ + /* -f{,no-}sanitize{,-recover}= suboptions. */ + const struct sanitizer_opts_s sanitizer_opts[] = + { +@@ -2604,6 +2611,25 @@ common_handle_option (struct gcc_options *opts, + param_early_inliner_max_iterations, 10); + break; + ++ case OPT_fipa_extend_auto_profile: ++ opts->x_flag_ipa_extend_auto_profile = opts->x_flag_cache_misses_profile ++ ? true : value; ++ break; ++ ++ case OPT_fcache_misses_profile_: ++ opts->x_cache_misses_profile_file = xstrdup (arg); ++ opts->x_flag_cache_misses_profile = true; ++ value = true; ++ /* No break here - do -fcache-misses-profile processing. */ ++ /* FALLTHRU */ ++ case OPT_fcache_misses_profile: ++ opts->x_flag_ipa_extend_auto_profile = value; ++ if (value) ++ { ++ set_cache_misses_profile_params (opts, opts_set); ++ } ++ break; ++ + case OPT_fprofile_generate_: + opts->x_profile_data_prefix = xstrdup (arg); + value = true; +diff --git a/gcc/passes.def b/gcc/passes.def +index 63303ab65..e9c91d26e 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -133,6 +133,7 @@ along with GCC; see the file COPYING3. If not see + + NEXT_PASS (pass_target_clone); + NEXT_PASS (pass_ipa_auto_profile); ++ NEXT_PASS (pass_ipa_extend_auto_profile); + NEXT_PASS (pass_ipa_tree_profile); + PUSH_INSERT_PASSES_WITHIN (pass_ipa_tree_profile) + NEXT_PASS (pass_feedback_split_functions); +diff --git a/gcc/timevar.def b/gcc/timevar.def +index ee25eccbb..e873747a8 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -82,6 +82,7 @@ DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting") + DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats") + DEFTIMEVAR (TV_IPA_REORDER_FIELDS , "ipa struct reorder fields optimization") + DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization") ++DEFTIMEVAR (TV_IPA_EXTEND_AUTO_PROFILE, "ipa extend auto profile") + DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations") + DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression") + DEFTIMEVAR (TV_IPA_LTO_COMPRESS , "lto stream compression") +diff --git a/gcc/toplev.c b/gcc/toplev.c +index eaed6f6c7..51e6bd400 100644 +--- a/gcc/toplev.c ++++ b/gcc/toplev.c +@@ -577,6 +577,12 @@ compile_file (void) + targetm.asm_out.output_ident (ident_str); + } + ++ /* Extend auto profile finalization. */ ++ if (flag_ipa_extend_auto_profile) ++ { ++ free_extend_profile_info (); ++ } ++ + /* Auto profile finalization. */ + if (flag_auto_profile) + end_auto_profile (); +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index eb32c5d44..be6387768 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -511,6 +511,8 @@ extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_reorder_fields (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt); ++extern simple_ipa_opt_pass *make_pass_ipa_extend_auto_profile (gcc::context ++ *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_target_clone (gcc::context *ctxt); +-- +2.27.0.windows.1 + -- cgit v1.2.3