diff options
Diffstat (limited to '0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch')
-rw-r--r-- | 0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch | 550 |
1 files changed, 550 insertions, 0 deletions
diff --git a/0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch b/0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch new file mode 100644 index 0000000..5e16f31 --- /dev/null +++ b/0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch @@ -0,0 +1,550 @@ +From 72531376df5ed93c2d945469368ba5514eca8407 Mon Sep 17 00:00:00 2001 +From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com> +Date: Tue, 5 Dec 2023 15:33:08 +0800 +Subject: [PATCH] [AutoBOLT] Support saving feedback count info to ELF segment + 1/3 + +--- + gcc/common.opt | 8 + + gcc/final.cc | 405 ++++++++++++++++++++++++++++++++++++++++++++++++- + gcc/opts.cc | 61 ++++++++ + 3 files changed, 473 insertions(+), 1 deletion(-) + +diff --git a/gcc/common.opt b/gcc/common.opt +index b01df919e..e69947fc2 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2546,6 +2546,14 @@ freorder-functions + Common Var(flag_reorder_functions) Optimization + Reorder functions to improve code placement. + ++fauto-bolt ++Common Var(flag_auto_bolt) ++Generate profile from AutoFDO or PGO and do BOLT optimization after linkage. ++ ++fauto-bolt= ++Common Joined RejectNegative ++Specify the feedback data directory required by BOLT-plugin. The default is the current directory. ++ + frerun-cse-after-loop + Common Var(flag_rerun_cse_after_loop) Optimization + Add a common subexpression elimination pass after loop optimizations. +diff --git a/gcc/final.cc b/gcc/final.cc +index a9868861b..d4c4fa08f 100644 +--- a/gcc/final.cc ++++ b/gcc/final.cc +@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see + #include "rtl-iter.h" + #include "print-rtl.h" + #include "function-abi.h" ++#include "insn-codes.h" + #include "common/common-target.h" + + #ifdef XCOFF_DEBUGGING_INFO +@@ -4266,7 +4267,403 @@ leaf_renumber_regs_insn (rtx in_rtx) + } + } + #endif +- ++ ++#define ASM_FDO_SECTION_PREFIX ".text.fdo." ++ ++#define ASM_FDO_CALLER_FLAG ".fdo.caller " ++#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size " ++#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind" ++ ++#define ASM_FDO_CALLEE_FLAG ".fdo.callee" ++ ++/* Return the relative offset address of the start instruction of BB, ++ return -1 if it is empty instruction. */ ++ ++static int ++get_bb_start_addr (basic_block bb) ++{ ++ rtx_insn *insn; ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (!INSN_P (insn)) ++ { ++ continue; ++ } ++ /* The jump target of call is not in this function, so ++ it should be excluded. */ ++ if (CALL_P (insn)) ++ { ++ return -1; ++ } ++ ++ int insn_code = recog_memoized (insn); ++ ++ /* The instruction NOP in llvm-bolt belongs to the previous ++ BB, so it needs to be skipped. */ ++ if (insn_code != CODE_FOR_nop) ++ { ++ return INSN_ADDRESSES (INSN_UID (insn)); ++ } ++ } ++ return -1; ++} ++ ++/* Return the relative offet address of the end instruction of BB, ++ return -1 if it is empty or call instruction. */ ++ ++static int ++get_bb_end_addr (basic_block bb) ++{ ++ rtx_insn *insn; ++ int num_succs = EDGE_COUNT (bb->succs); ++ FOR_BB_INSNS_REVERSE (bb, insn) ++ { ++ if (!INSN_P (insn)) ++ { ++ continue; ++ } ++ /* The jump target of call is not in this function, so ++ it should be excluded. */ ++ if (CALL_P (insn)) ++ { ++ return -1; ++ } ++ if ((num_succs == 1) ++ || ((num_succs == 2) && any_condjump_p (insn))) ++ { ++ return INSN_ADDRESSES (INSN_UID (insn)); ++ } ++ else ++ { ++ return -1; ++ } ++ } ++ return -1; ++} ++ ++/* Return the end address of cfun. */ ++ ++static int ++get_function_end_addr () ++{ ++ rtx_insn *insn = get_last_insn (); ++ for (; insn != get_insns (); insn = PREV_INSN (insn)) ++ { ++ if (!INSN_P (insn)) ++ { ++ continue; ++ } ++ return INSN_ADDRESSES (INSN_UID (insn)); ++ } ++ ++ return -1; ++} ++ ++/* Return the function profile status string. */ ++ ++static const char * ++get_function_profile_status () ++{ ++ const char *profile_status[] = { ++ "PROFILE_ABSENT", ++ "PROFILE_GUESSED", ++ "PROFILE_READ", ++ "PROFILE_LAST" /* Last value, used by profile streaming. */ ++ }; ++ ++ return profile_status[profile_status_for_fn (cfun)]; ++} ++ ++/* Return the count from the feedback data, such as PGO or ADDO. */ ++ ++inline static gcov_type ++get_fdo_count (profile_count count) ++{ ++ return count.quality () >= GUESSED ++ ? count.to_gcov_type () : 0; ++} ++ ++/* Return the profile quality string. */ ++ ++static const char * ++get_fdo_count_quality (profile_count count) ++{ ++ const char *profile_quality[] = { ++ "UNINITIALIZED_PROFILE", ++ "GUESSED_LOCAL", ++ "GUESSED_GLOBAL0", ++ "GUESSED_GLOBAL0_ADJUSTED", ++ "GUESSED", ++ "AFDO", ++ "ADJUSTED", ++ "PRECISE" ++ }; ++ ++ return profile_quality[count.quality ()]; ++} ++ ++static const char * ++alias_local_functions (const char *fnname) ++{ ++ if (TREE_PUBLIC (cfun->decl)) ++ { ++ return fnname; ++ } ++ return concat (fnname, "/", lbasename (dump_base_name), NULL); ++} ++ ++/* Return function bind type string. */ ++ ++static const char * ++simple_get_function_bind () ++{ ++ const char *function_bind[] = { ++ "GLOBAL", ++ "WEAK", ++ "LOCAL", ++ "UNKNOWN" ++ }; ++ ++ if (TREE_PUBLIC (cfun->decl)) ++ { ++ if (!(DECL_WEAK (cfun->decl))) ++ { ++ return function_bind[0]; ++ } ++ else ++ { ++ return function_bind[1]; ++ } ++ } ++ else ++ { ++ return function_bind[2]; ++ } ++ ++ return function_bind[3]; ++} ++ ++/* Dumo the callee functions insn in bb by CALL_P (insn). */ ++ ++static void ++dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count) ++{ ++ rtx_insn *insn; ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (insn && CALL_P (insn)) ++ { ++ tree callee = get_call_fndecl (insn); ++ ++ if (callee) ++ { ++ fprintf (asm_out_file, "\t.string \"%x\"\n", ++ INSN_ADDRESSES (INSN_UID (insn))); ++ ++ fprintf (asm_out_file, "\t.string \"%s%s\"\n", ++ ASM_FDO_CALLEE_FLAG, ++ alias_local_functions (get_fnname_from_decl (callee))); ++ ++ fprintf (asm_out_file, ++ "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n", ++ call_count); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "call: %x --> %s \n", ++ INSN_ADDRESSES (INSN_UID (insn)), ++ alias_local_functions ++ (get_fnname_from_decl (callee))); ++ } ++ } ++ } ++ } ++} ++ ++/* Dump the edge info into asm. */ ++static int ++dump_edge_jump_info_to_asm (basic_block bb, gcov_type bb_count) ++{ ++ edge e; ++ edge_iterator ei; ++ gcov_type edge_total_count = 0; ++ ++ FOR_EACH_EDGE (e, ei, bb->succs) ++ { ++ gcov_type edge_count = get_fdo_count (e->count ()); ++ edge_total_count += edge_count; ++ ++ int edge_start_addr = get_bb_end_addr (e->src); ++ int edge_end_addr = get_bb_start_addr(e->dest); ++ ++ if (edge_start_addr == -1 || edge_end_addr == -1) ++ { ++ continue; ++ } ++ ++ /* This is a reserved assert for the original design. If this ++ assert is found, use the address of the previous instruction ++ as edge_start_addr. */ ++ gcc_assert (edge_start_addr != edge_end_addr); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "edge: %x --> %x = (%ld)\n", ++ edge_start_addr, edge_end_addr, edge_count); ++ } ++ ++ if (edge_count > 0) ++ { ++ fprintf(asm_out_file, "\t.string \"%x\"\n", edge_start_addr); ++ fprintf(asm_out_file, "\t.string \"%x\"\n", edge_end_addr); ++ fprintf(asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n", ++ edge_count); ++ } ++ } ++ ++ gcov_type call_count = MAX (edge_total_count, bb_count); ++ if (call_count > 0) ++ { ++ dump_direct_callee_info_to_asm (bb, call_count); ++ } ++} ++ ++/* Dump the bb info into asm. */ ++ ++static void ++dump_bb_info_to_asm (basic_block bb, gcov_type bb_count) ++{ ++ int bb_start_addr = get_bb_start_addr (bb); ++ if (bb_start_addr != -1) ++ { ++ fprintf (asm_out_file, "\t.string \"%x\"\n", bb_start_addr); ++ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n", ++ bb_count); ++ } ++} ++ ++/* Dump the function info into asm. */ ++ ++static void ++dump_function_info_to_asm (const char *fnname) ++{ ++ fprintf (asm_out_file, "\t.string \"%s%s\"\n", ++ ASM_FDO_CALLER_FLAG, alias_local_functions (fnname)); ++ fprintf (asm_out_file, "\t.string \"%s%d\"\n", ++ ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ()); ++ fprintf (asm_out_file, "\t.string \"%s%s\"\n", ++ ASM_FDO_CALLER_BIND_FLAG, simple_get_function_bind ()); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "\n FUNC_NAME: %s\n", ++ alias_local_functions (fnname)); ++ fprintf (dump_file, " file: %s\n", ++ dump_base_name); ++ fprintf (dump_file, "profile_status: %s\n", ++ get_function_profile_status ()); ++ fprintf (dump_file, " size: %x\n", ++ get_function_end_addr ()); ++ fprintf (dump_file, " function_bind: %s\n", ++ simple_get_function_bind ()); ++ } ++} ++ ++/* Dump function profile into form AutoFDO or PGO to asm. */ ++ ++static void ++dump_fdo_info_to_asm (const char *fnname) ++{ ++ basic_block bb; ++ ++ dump_function_info_to_asm (fnname); ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ gcov_type bb_count = get_fdo_count (bb->count); ++ if (bb_count == 0) ++ { ++ continue; ++ } ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "BB: %x --> %x = (%ld) [%s]\n", ++ get_bb_start_addr (bb), get_bb_end_addr (bb), ++ bb_count, get_fdo_count_quality (bb->count)); ++ } ++ ++ if (flag_profile_use) ++ { ++ dump_edge_jump_info_to_asm (bb, bb_count); ++ } ++ else if (flag_auto_profile) ++ { ++ dump_bb_info_to_asm (bb, bb_count); ++ } ++ } ++} ++ ++/* When -fauto-bolt option is turnded on, the .text.fdo section ++ will be generated in the *.s file if there is feedback information ++ from PGO or AutoFDO. This section will parserd in BOLT-plugin. */ ++ ++static void ++dump_profile_to_elf_sections () ++{ ++ if (!flag_function_sections) ++ { ++ error ("-fauto-bolt should work with -ffunction-section"); ++ return; ++ } ++ if (!flag_ipa_ra) ++ { ++ error ("-fauto-bolt should work with -fipa-ra"); ++ return; ++ } ++ if (flag_align_jumps) ++ { ++ error ("-fauto-bolt is not supported with -falign-jumps"); ++ return; ++ } ++ if (flag_align_labels) ++ { ++ error ("-fauto-bolt is not spported with -falign-loops"); ++ return; ++ } ++ if (flag_align_loops) ++ { ++ error ("-fauto-bolt is not supported with -falign-loops"); ++ return; ++ } ++ ++ /* Return if no feedback data. */ ++ if (!flag_profile_use && !flag_auto_profile) ++ { ++ error ("-fauto-bolt should use with -profile-use or -fauto-profile"); ++ return; ++ } ++ ++ /* Avoid empty functions. */ ++ if (TREE_CODE (cfun->decl) != FUNCTION_DECL) ++ { ++ return; ++ } ++ int flags = SECTION_DEBUG | SECTION_EXCLUDE; ++ const char *fnname = get_fnname_from_decl (current_function_decl); ++ char *profile_fnname = NULL; ++ ++ asprintf (&profile_fnname, "%s%s", ASM_FDO_SECTION_PREFIX, fnname); ++ switch_to_section (get_section (profile_fnname, flags, NULL)); ++ dump_fdo_info_to_asm (fnname); ++ ++ if (profile_fnname) ++ { ++ free (profile_fnname); ++ profile_fnname = NULL; ++ } ++} ++ + /* Turn the RTL into assembly. */ + static unsigned int + rest_of_handle_final (void) +@@ -4334,6 +4731,12 @@ rest_of_handle_final (void) + targetm.asm_out.destructor (XEXP (DECL_RTL (current_function_decl), 0), + decl_fini_priority_lookup + (current_function_decl)); ++ ++ if (flag_auto_bolt) ++ { ++ dump_profile_to_elf_sections (); ++ } ++ + return 0; + } + +diff --git a/gcc/opts.cc b/gcc/opts.cc +index b868d189e..6d57e7d69 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -1279,6 +1279,10 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set, + if (opts->x_flag_vtable_verify && opts->x_flag_lto) + sorry ("vtable verification is not supported with LTO"); + ++ /* Currently -fauto-bolt is not supported for LTO. */ ++ if (opts->x_flag_auto_bolt && opts->x_flag_lto) ++ sorry ("%<-fauto-bolt%> is not supported with LTO"); ++ + /* Control IPA optimizations based on different -flive-patching level. */ + if (opts->x_flag_live_patching) + control_options_for_live_patching (opts, opts_set, +@@ -1291,6 +1295,58 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set, + = (opts->x_flag_unroll_loops + || opts->x_flag_peel_loops + || opts->x_optimize >= 3); ++ ++ if (opts->x_flag_auto_bolt) ++ { ++ /* Record the function section to facilitate the feedback ++ data storage. */ ++ if (!opts->x_flag_function_sections) ++ { ++ inform (loc, ++ "%<-fauto-bolt%> should work with %<-ffunction-sections%>," ++ " enabling %<-ffunction-sections%>"); ++ opts->x_flag_function_sections = true; ++ } ++ ++ /* Cancel the internal alignment of the function. The binary ++ optimizer bolt will cancel the internal alignment optimization ++ of the function, so the alignment is meaningless at this time, ++ and if not, it will bring trouble to the calculation of the ++ offset address of the instruction. */ ++ if (opts->x_flag_align_jumps) ++ { ++ inform (loc, ++ "%<-fauto-bolt%> should not work with %<-falign-jumps%>," ++ " disabling %<-falign-jumps%>"); ++ opts->x_flag_align_jumps = false; ++ } ++ ++ if (opts->x_flag_align_labels) ++ { ++ inform (loc, ++ "%<-fauto-bolt%> should not work with %<-falign-labels%>," ++ " disabling %<-falign-labels%>"); ++ opts->x_flag_align_labels = false; ++ } ++ ++ if (opts->x_flag_align_loops) ++ { ++ inform (loc, ++ "%<-fauto-bolt%> should not work with %<-falign-loops%>," ++ " disabling %<-falign-loops%>"); ++ opts->x_flag_align_loops = false; ++ } ++ ++ /* When parsing instructions in RTL phase, we need to know ++ the call information of instructions to avoid being optimized. */ ++ if (!opts->x_flag_ipa_ra) ++ { ++ inform (loc, ++ "%<-fauto-bolt%> should work with %<-fipa-ra%>," ++ " enabling %<-fipa-ra%>"); ++ opts->x_flag_ipa_ra = true; ++ } ++ } + + /* With -fcx-limited-range, we do cheap and quick complex arithmetic. */ + if (opts->x_flag_cx_limited_range) +@@ -3226,6 +3282,11 @@ common_handle_option (struct gcc_options *opts, + &opts->x_flag_align_functions, + &opts->x_str_align_functions); + break; ++ ++ case OPT_fauto_bolt_: ++ case OPT_fauto_bolt: ++ /* Deferred. */ ++ break; + + case OPT_ftabstop_: + /* It is documented that we silently ignore silly values. */ +-- +2.33.0 + |