diff options
Diffstat (limited to '0029-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch')
-rw-r--r-- | 0029-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch | 548 |
1 files changed, 548 insertions, 0 deletions
diff --git a/0029-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch b/0029-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch new file mode 100644 index 0000000..42053fc --- /dev/null +++ b/0029-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch @@ -0,0 +1,548 @@ +From c34a02199b1dfd362e81e78cb90fbd11e02eb93e Mon Sep 17 00:00:00 2001 +From: liyancheng <412998149@qq.com> +Date: Mon, 14 Feb 2022 14:34:41 +0800 +Subject: [PATCH 29/32] [AutoBOLT] Support saving feedback count info to ELF + segment 1/3 + +Add flag -fauto-bolt to save the feedback count info from PGO or +AutoFDO to segment .text.fdo. The bolt plugin will read and parse +it into the profile of llvm-bolt. +--- + gcc/common.opt | 8 + + gcc/final.c | 400 +++++++++++++++++++++++++++++++++++++++++++++++++ + gcc/opts.c | 61 ++++++++ + 3 files changed, 469 insertions(+) + +diff --git a/gcc/common.opt b/gcc/common.opt +index 9488bd90f..5eaa667b3 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2403,6 +2403,14 @@ freorder-functions + Common Report Var(flag_reorder_functions) Optimization + Reorder functions to improve code placement. + ++fauto-bolt ++Common Report Var(flag_auto_bolt) ++Generate profile from AutoFDO or PGO and do BOLT optimization after linkage. ++ ++fauto-bolt= ++Common Joined RejectNegative ++Specify the feedback data directory required by BOLT-plugin. The default is the current directory. ++ + frerun-cse-after-loop + Common Report Var(flag_rerun_cse_after_loop) Optimization + Add a common subexpression elimination pass after loop optimizations. +diff --git a/gcc/final.c b/gcc/final.c +index a3601964a..b9affd3a7 100644 +--- a/gcc/final.c ++++ b/gcc/final.c +@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see + #include "rtl-iter.h" + #include "print-rtl.h" + #include "function-abi.h" ++#include "insn-codes.h" + + #ifdef XCOFF_DEBUGGING_INFO + #include "xcoffout.h" /* Needed for external data declarations. */ +@@ -4640,6 +4641,399 @@ leaf_renumber_regs_insn (rtx in_rtx) + } + #endif + ++ ++#define ASM_FDO_SECTION_PREFIX ".text.fdo." ++ ++#define ASM_FDO_CALLER_FLAG ".fdo.caller " ++#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size " ++#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind " ++ ++#define ASM_FDO_CALLEE_FLAG ".fdo.callee " ++ ++/* Return the relative offset address of the start instruction of BB, ++ return -1 if it is empty instruction. */ ++ ++static int ++get_bb_start_addr (basic_block bb) ++{ ++ rtx_insn *insn; ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (!INSN_P (insn)) ++ { ++ continue; ++ } ++ ++ int insn_code = recog_memoized (insn); ++ ++ /* The instruction NOP in llvm-bolt belongs to the previous ++ BB, so it needs to be skipped. */ ++ if (insn_code != CODE_FOR_nop) ++ { ++ return INSN_ADDRESSES (INSN_UID (insn)); ++ } ++ } ++ return -1; ++} ++ ++/* Return the relative offset address of the end instruction of BB, ++ return -1 if it is empty or call instruction. */ ++ ++static int ++get_bb_end_addr (basic_block bb) ++{ ++ rtx_insn *insn; ++ int num_succs = EDGE_COUNT (bb->succs); ++ FOR_BB_INSNS_REVERSE (bb, insn) ++ { ++ if (!INSN_P (insn)) ++ { ++ continue; ++ } ++ /* The jump target of call is not in this function, so ++ it should be excluded. */ ++ if (CALL_P (insn)) ++ { ++ return -1; ++ } ++ if ((num_succs == 1) ++ || ((num_succs == 2) && any_condjump_p (insn))) ++ { ++ return INSN_ADDRESSES (INSN_UID (insn)); ++ } ++ else ++ { ++ return -1; ++ } ++ } ++ return -1; ++} ++ ++/* Return the end address of cfun. */ ++ ++static int ++get_function_end_addr () ++{ ++ rtx_insn *insn = get_last_insn (); ++ for (; insn != get_insns (); insn = PREV_INSN (insn)) ++ { ++ if (!INSN_P (insn)) ++ { ++ continue; ++ } ++ return INSN_ADDRESSES (INSN_UID (insn)); ++ } ++ ++ return -1; ++} ++ ++/* Return the function profile status string. */ ++ ++static const char * ++get_function_profile_status () ++{ ++ const char *profile_status[] = { ++ "PROFILE_ABSENT", ++ "PROFILE_GUESSED", ++ "PROFILE_READ", ++ "PROFILE_LAST" /* Last value, used by profile streaming. */ ++ }; ++ ++ return profile_status[profile_status_for_fn (cfun)]; ++} ++ ++/* Return the count from the feedback data, such as PGO or AFDO. */ ++ ++inline static gcov_type ++get_fdo_count (profile_count count) ++{ ++ return count.quality () >= GUESSED ++ ? count.to_gcov_type () : 0; ++} ++ ++/* Return the profile quality string. */ ++ ++static const char * ++get_fdo_count_quality (profile_count count) ++{ ++ const char *profile_quality[] = { ++ "UNINITIALIZED_PROFILE", ++ "GUESSED_LOCAL", ++ "GUESSED_GLOBAL0", ++ "GUESSED_GLOBAL0_ADJUSTED", ++ "GUESSED", ++ "AFDO", ++ "ADJUSTED", ++ "PRECISE" ++ }; ++ ++ return profile_quality[count.quality ()]; ++} ++ ++static const char * ++alias_local_functions (const char *fnname) ++{ ++ if (TREE_PUBLIC (cfun->decl)) ++ { ++ return fnname; ++ } ++ ++ return concat (fnname, "/", lbasename (dump_base_name), NULL); ++} ++ ++/* Return function bind type string. */ ++ ++static const char * ++simple_get_function_bind () ++{ ++ const char *function_bind[] = { ++ "GLOBAL", ++ "WEAK", ++ "LOCAL", ++ "UNKNOWN" ++ }; ++ ++ if (TREE_PUBLIC (cfun->decl)) ++ { ++ if (!(DECL_WEAK (cfun->decl))) ++ { ++ return function_bind[0]; ++ } ++ else ++ { ++ return function_bind[1]; ++ } ++ } ++ else ++ { ++ return function_bind[2]; ++ } ++ ++ return function_bind[3]; ++} ++ ++/* Dump the callee functions insn in bb by CALL_P (insn). */ ++ ++static void ++dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count) ++{ ++ rtx_insn *insn; ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (insn && CALL_P (insn)) ++ { ++ tree callee = get_call_fndecl (insn); ++ ++ if (callee) ++ { ++ fprintf (asm_out_file, "\t.string \"%x\"\n", ++ INSN_ADDRESSES (INSN_UID (insn))); ++ ++ fprintf (asm_out_file, "\t.string \"%s%s\"\n", ++ ASM_FDO_CALLEE_FLAG, ++ alias_local_functions (get_fnname_from_decl (callee))); ++ ++ fprintf (asm_out_file, ++ "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n", ++ call_count); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "call: %x --> %s\n", ++ INSN_ADDRESSES (INSN_UID (insn)), ++ alias_local_functions ++ (get_fnname_from_decl (callee))); ++ } ++ } ++ } ++ } ++} ++ ++/* Dump the edge info into asm. */ ++ ++static void ++dump_edge_jump_info_to_asm (basic_block bb, gcov_type bb_count) ++{ ++ edge e; ++ edge_iterator ei; ++ gcov_type edge_total_count = 0; ++ ++ FOR_EACH_EDGE (e, ei, bb->succs) ++ { ++ gcov_type edge_count = get_fdo_count (e->count ()); ++ edge_total_count += edge_count; ++ ++ int edge_start_addr = get_bb_end_addr (e->src); ++ int edge_end_addr = get_bb_start_addr (e->dest); ++ ++ if (edge_start_addr == -1 || edge_end_addr == -1) ++ { ++ continue; ++ } ++ ++ /* This is a reserved assert for the original design. If this ++ assert is found, use the address of the previous instruction ++ as edge_start_addr. */ ++ gcc_assert (edge_start_addr != edge_end_addr); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "edge: %x --> %x = (%ld)\n", ++ edge_start_addr, edge_end_addr, edge_count); ++ } ++ ++ if (edge_count > 0) ++ { ++ fprintf (asm_out_file, "\t.string \"%x\"\n", edge_start_addr); ++ fprintf (asm_out_file, "\t.string \"%x\"\n", edge_end_addr); ++ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n", ++ edge_count); ++ } ++ } ++ ++ gcov_type call_count = MAX (edge_total_count, bb_count); ++ if (call_count > 0) ++ { ++ dump_direct_callee_info_to_asm (bb, call_count); ++ } ++} ++ ++/* Dump the bb info into asm. */ ++ ++static void ++dump_bb_info_to_asm (basic_block bb, gcov_type bb_count) ++{ ++ int bb_start_addr = get_bb_start_addr (bb); ++ if (bb_start_addr != -1) ++ { ++ fprintf (asm_out_file, "\t.string \"%x\"\n", bb_start_addr); ++ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n", ++ bb_count); ++ } ++} ++ ++/* Dump the function info into asm. */ ++ ++static void ++dump_function_info_to_asm (const char *fnname) ++{ ++ fprintf (asm_out_file, "\t.string \"%s%s\"\n", ++ ASM_FDO_CALLER_FLAG, alias_local_functions (fnname)); ++ fprintf (asm_out_file, "\t.string \"%s%d\"\n", ++ ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ()); ++ fprintf (asm_out_file, "\t.string \"%s%s\"\n", ++ ASM_FDO_CALLER_BIND_FLAG, simple_get_function_bind ()); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "\n FUNC_NAME: %s\n", ++ alias_local_functions (fnname)); ++ fprintf (dump_file, " file: %s\n", ++ dump_base_name); ++ fprintf (dump_file, " profile_status: %s\n", ++ get_function_profile_status ()); ++ fprintf (dump_file, " size: %x\n", ++ get_function_end_addr ()); ++ fprintf (dump_file, " function_bind: %s\n", ++ simple_get_function_bind ()); ++ } ++} ++ ++/* Dump function profile info form AutoFDO or PGO to asm. */ ++ ++static void ++dump_fdo_info_to_asm (const char *fnname) ++{ ++ basic_block bb; ++ ++ dump_function_info_to_asm (fnname); ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ gcov_type bb_count = get_fdo_count (bb->count); ++ if (bb_count == 0) ++ { ++ continue; ++ } ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "BB: %x --> %x = (%ld) [%s]\n", ++ get_bb_start_addr (bb), get_bb_end_addr (bb), ++ bb_count, get_fdo_count_quality (bb->count)); ++ } ++ ++ if (flag_profile_use) ++ { ++ dump_edge_jump_info_to_asm (bb, bb_count); ++ } ++ else if (flag_auto_profile) ++ { ++ dump_bb_info_to_asm (bb, bb_count); ++ } ++ } ++} ++ ++/* When -fauto-bolt option is turned on, the .text.fdo. section ++ will be generated in the *.s file if there is feedback information ++ from PGO or AutoFDO. This section will parserd in BOLT-plugin. */ ++ ++static void ++dump_profile_to_elf_sections () ++{ ++ if (!flag_function_sections) ++ { ++ error ("-fauto-bolt should work with -ffunction-sections"); ++ return; ++ } ++ if (!flag_ipa_ra) ++ { ++ error ("-fauto-bolt should work with -fipa-ra"); ++ return; ++ } ++ if (flag_align_jumps) ++ { ++ error ("-fauto-bolt is not supported with -falign-jumps"); ++ return; ++ } ++ if (flag_align_labels) ++ { ++ error ("-fauto-bolt is not supported with -falign-labels"); ++ return; ++ } ++ if (flag_align_loops) ++ { ++ error ("-fauto-bolt is not supported with -falign-loops"); ++ return; ++ } ++ ++ /* Return if no feedback data. */ ++ if (!flag_profile_use && !flag_auto_profile) ++ { ++ error ("-fauto-bolt should use with -fprofile-use or -fauto-profile"); ++ return; ++ } ++ ++ /* Avoid empty functions. */ ++ if (TREE_CODE (cfun->decl) != FUNCTION_DECL) ++ { ++ return; ++ } ++ int flags = SECTION_DEBUG | SECTION_EXCLUDE; ++ const char *fnname = get_fnname_from_decl (current_function_decl); ++ char *profile_fnname = NULL; ++ ++ asprintf (&profile_fnname,"%s%s", ASM_FDO_SECTION_PREFIX, fnname); ++ switch_to_section (get_section (profile_fnname, flags , NULL)); ++ dump_fdo_info_to_asm (fnname); ++ ++ if (profile_fnname) ++ { ++ free (profile_fnname); ++ profile_fnname = NULL; ++ } ++} ++ + /* Turn the RTL into assembly. */ + static unsigned int + rest_of_handle_final (void) +@@ -4707,6 +5101,12 @@ rest_of_handle_final (void) + targetm.asm_out.destructor (XEXP (DECL_RTL (current_function_decl), 0), + decl_fini_priority_lookup + (current_function_decl)); ++ ++ if (flag_auto_bolt) ++ { ++ dump_profile_to_elf_sections (); ++ } ++ + return 0; + } + +diff --git a/gcc/opts.c b/gcc/opts.c +index f49f5ee58..0b389ae1d 100644 +--- a/gcc/opts.c ++++ b/gcc/opts.c +@@ -1166,6 +1166,10 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set, + if (opts->x_flag_vtable_verify && opts->x_flag_lto) + sorry ("vtable verification is not supported with LTO"); + ++ /* Currently -fauto-bolt is not supported for LTO. */ ++ if (opts->x_flag_auto_bolt && opts->x_flag_lto) ++ sorry ("%<-fauto-bolt%> is not supported with LTO"); ++ + /* Control IPA optimizations based on different -flive-patching level. */ + if (opts->x_flag_live_patching) + control_options_for_live_patching (opts, opts_set, +@@ -1183,6 +1187,58 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set, + = (opts->x_flag_unroll_loops + || opts->x_flag_peel_loops + || opts->x_optimize >= 3); ++ ++ if (opts->x_flag_auto_bolt) ++ { ++ /* Record the function section to facilitate the feedback ++ data storage. */ ++ if (!opts->x_flag_function_sections) ++ { ++ inform (loc, ++ "%<-fauto-bolt%> should work with %<-ffunction-sections%>," ++ " enabling %<-ffunction-sections%>"); ++ opts->x_flag_function_sections = true; ++ } ++ ++ /* Cancel the internal alignment of the function. The binary ++ optimizer bolt will cancel the internal alignment optimization ++ of the function, so the alignment is meaningless at this time, ++ and if not, it will bring trouble to the calculation of the ++ offset address of the instruction. */ ++ if (opts->x_flag_align_jumps) ++ { ++ inform (loc, ++ "%<-fauto-bolt%> should not work with %<-falign-jumps%>," ++ " disabling %<-falign-jumps%>"); ++ opts->x_flag_align_jumps = false; ++ } ++ ++ if (opts->x_flag_align_labels) ++ { ++ inform (loc, ++ "%<-fauto-bolt%> should not work with %<-falign-labels%>," ++ " disabling %<-falign-labels%>"); ++ opts->x_flag_align_labels = false; ++ } ++ ++ if (opts->x_flag_align_loops) ++ { ++ inform (loc, ++ "%<-fauto-bolt%> should not work with %<-falign-loops%>," ++ " disabling %<-falign-loops%>"); ++ opts->x_flag_align_loops = false; ++ } ++ ++ /* When parsing instructions in RTL phase, we need to know ++ the call information of instructions to avoid being optimized. */ ++ if (!opts->x_flag_ipa_ra) ++ { ++ inform (loc, ++ "%<-fauto-bolt%> should work with %<-fipa-ra%>," ++ " enabling %<-fipa-ra%>"); ++ opts->x_flag_ipa_ra = true; ++ } ++ } + } + + #define LEFT_COLUMN 27 +@@ -2881,6 +2937,11 @@ common_handle_option (struct gcc_options *opts, + check_alignment_argument (loc, arg, "functions"); + break; + ++ case OPT_fauto_bolt_: ++ case OPT_fauto_bolt: ++ /* Deferred. */ ++ break; ++ + default: + /* If the flag was handled in a standard way, assume the lack of + processing here is intentional. */ +-- +2.27.0 + |