summaryrefslogtreecommitdiff
path: root/0029-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
diff options
context:
space:
mode:
Diffstat (limited to '0029-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch')
-rw-r--r--0029-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch548
1 files changed, 548 insertions, 0 deletions
diff --git a/0029-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch b/0029-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
new file mode 100644
index 0000000..42053fc
--- /dev/null
+++ b/0029-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
@@ -0,0 +1,548 @@
+From c34a02199b1dfd362e81e78cb90fbd11e02eb93e Mon Sep 17 00:00:00 2001
+From: liyancheng <412998149@qq.com>
+Date: Mon, 14 Feb 2022 14:34:41 +0800
+Subject: [PATCH 29/32] [AutoBOLT] Support saving feedback count info to ELF
+ segment 1/3
+
+Add flag -fauto-bolt to save the feedback count info from PGO or
+AutoFDO to segment .text.fdo. The bolt plugin will read and parse
+it into the profile of llvm-bolt.
+---
+ gcc/common.opt | 8 +
+ gcc/final.c | 400 +++++++++++++++++++++++++++++++++++++++++++++++++
+ gcc/opts.c | 61 ++++++++
+ 3 files changed, 469 insertions(+)
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 9488bd90f..5eaa667b3 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -2403,6 +2403,14 @@ freorder-functions
+ Common Report Var(flag_reorder_functions) Optimization
+ Reorder functions to improve code placement.
+
++fauto-bolt
++Common Report Var(flag_auto_bolt)
++Generate profile from AutoFDO or PGO and do BOLT optimization after linkage.
++
++fauto-bolt=
++Common Joined RejectNegative
++Specify the feedback data directory required by BOLT-plugin. The default is the current directory.
++
+ frerun-cse-after-loop
+ Common Report Var(flag_rerun_cse_after_loop) Optimization
+ Add a common subexpression elimination pass after loop optimizations.
+diff --git a/gcc/final.c b/gcc/final.c
+index a3601964a..b9affd3a7 100644
+--- a/gcc/final.c
++++ b/gcc/final.c
+@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see
+ #include "rtl-iter.h"
+ #include "print-rtl.h"
+ #include "function-abi.h"
++#include "insn-codes.h"
+
+ #ifdef XCOFF_DEBUGGING_INFO
+ #include "xcoffout.h" /* Needed for external data declarations. */
+@@ -4640,6 +4641,399 @@ leaf_renumber_regs_insn (rtx in_rtx)
+ }
+ #endif
+
++
++#define ASM_FDO_SECTION_PREFIX ".text.fdo."
++
++#define ASM_FDO_CALLER_FLAG ".fdo.caller "
++#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size "
++#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind "
++
++#define ASM_FDO_CALLEE_FLAG ".fdo.callee "
++
++/* Return the relative offset address of the start instruction of BB,
++ return -1 if it is empty instruction. */
++
++static int
++get_bb_start_addr (basic_block bb)
++{
++ rtx_insn *insn;
++ FOR_BB_INSNS (bb, insn)
++ {
++ if (!INSN_P (insn))
++ {
++ continue;
++ }
++
++ int insn_code = recog_memoized (insn);
++
++ /* The instruction NOP in llvm-bolt belongs to the previous
++ BB, so it needs to be skipped. */
++ if (insn_code != CODE_FOR_nop)
++ {
++ return INSN_ADDRESSES (INSN_UID (insn));
++ }
++ }
++ return -1;
++}
++
++/* Return the relative offset address of the end instruction of BB,
++ return -1 if it is empty or call instruction. */
++
++static int
++get_bb_end_addr (basic_block bb)
++{
++ rtx_insn *insn;
++ int num_succs = EDGE_COUNT (bb->succs);
++ FOR_BB_INSNS_REVERSE (bb, insn)
++ {
++ if (!INSN_P (insn))
++ {
++ continue;
++ }
++ /* The jump target of call is not in this function, so
++ it should be excluded. */
++ if (CALL_P (insn))
++ {
++ return -1;
++ }
++ if ((num_succs == 1)
++ || ((num_succs == 2) && any_condjump_p (insn)))
++ {
++ return INSN_ADDRESSES (INSN_UID (insn));
++ }
++ else
++ {
++ return -1;
++ }
++ }
++ return -1;
++}
++
++/* Return the end address of cfun. */
++
++static int
++get_function_end_addr ()
++{
++ rtx_insn *insn = get_last_insn ();
++ for (; insn != get_insns (); insn = PREV_INSN (insn))
++ {
++ if (!INSN_P (insn))
++ {
++ continue;
++ }
++ return INSN_ADDRESSES (INSN_UID (insn));
++ }
++
++ return -1;
++}
++
++/* Return the function profile status string. */
++
++static const char *
++get_function_profile_status ()
++{
++ const char *profile_status[] = {
++ "PROFILE_ABSENT",
++ "PROFILE_GUESSED",
++ "PROFILE_READ",
++ "PROFILE_LAST" /* Last value, used by profile streaming. */
++ };
++
++ return profile_status[profile_status_for_fn (cfun)];
++}
++
++/* Return the count from the feedback data, such as PGO or AFDO. */
++
++inline static gcov_type
++get_fdo_count (profile_count count)
++{
++ return count.quality () >= GUESSED
++ ? count.to_gcov_type () : 0;
++}
++
++/* Return the profile quality string. */
++
++static const char *
++get_fdo_count_quality (profile_count count)
++{
++ const char *profile_quality[] = {
++ "UNINITIALIZED_PROFILE",
++ "GUESSED_LOCAL",
++ "GUESSED_GLOBAL0",
++ "GUESSED_GLOBAL0_ADJUSTED",
++ "GUESSED",
++ "AFDO",
++ "ADJUSTED",
++ "PRECISE"
++ };
++
++ return profile_quality[count.quality ()];
++}
++
++static const char *
++alias_local_functions (const char *fnname)
++{
++ if (TREE_PUBLIC (cfun->decl))
++ {
++ return fnname;
++ }
++
++ return concat (fnname, "/", lbasename (dump_base_name), NULL);
++}
++
++/* Return function bind type string. */
++
++static const char *
++simple_get_function_bind ()
++{
++ const char *function_bind[] = {
++ "GLOBAL",
++ "WEAK",
++ "LOCAL",
++ "UNKNOWN"
++ };
++
++ if (TREE_PUBLIC (cfun->decl))
++ {
++ if (!(DECL_WEAK (cfun->decl)))
++ {
++ return function_bind[0];
++ }
++ else
++ {
++ return function_bind[1];
++ }
++ }
++ else
++ {
++ return function_bind[2];
++ }
++
++ return function_bind[3];
++}
++
++/* Dump the callee functions insn in bb by CALL_P (insn). */
++
++static void
++dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
++{
++ rtx_insn *insn;
++ FOR_BB_INSNS (bb, insn)
++ {
++ if (insn && CALL_P (insn))
++ {
++ tree callee = get_call_fndecl (insn);
++
++ if (callee)
++ {
++ fprintf (asm_out_file, "\t.string \"%x\"\n",
++ INSN_ADDRESSES (INSN_UID (insn)));
++
++ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
++ ASM_FDO_CALLEE_FLAG,
++ alias_local_functions (get_fnname_from_decl (callee)));
++
++ fprintf (asm_out_file,
++ "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
++ call_count);
++
++ if (dump_file)
++ {
++ fprintf (dump_file, "call: %x --> %s\n",
++ INSN_ADDRESSES (INSN_UID (insn)),
++ alias_local_functions
++ (get_fnname_from_decl (callee)));
++ }
++ }
++ }
++ }
++}
++
++/* Dump the edge info into asm. */
++
++static void
++dump_edge_jump_info_to_asm (basic_block bb, gcov_type bb_count)
++{
++ edge e;
++ edge_iterator ei;
++ gcov_type edge_total_count = 0;
++
++ FOR_EACH_EDGE (e, ei, bb->succs)
++ {
++ gcov_type edge_count = get_fdo_count (e->count ());
++ edge_total_count += edge_count;
++
++ int edge_start_addr = get_bb_end_addr (e->src);
++ int edge_end_addr = get_bb_start_addr (e->dest);
++
++ if (edge_start_addr == -1 || edge_end_addr == -1)
++ {
++ continue;
++ }
++
++ /* This is a reserved assert for the original design. If this
++ assert is found, use the address of the previous instruction
++ as edge_start_addr. */
++ gcc_assert (edge_start_addr != edge_end_addr);
++
++ if (dump_file)
++ {
++ fprintf (dump_file, "edge: %x --> %x = (%ld)\n",
++ edge_start_addr, edge_end_addr, edge_count);
++ }
++
++ if (edge_count > 0)
++ {
++ fprintf (asm_out_file, "\t.string \"%x\"\n", edge_start_addr);
++ fprintf (asm_out_file, "\t.string \"%x\"\n", edge_end_addr);
++ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
++ edge_count);
++ }
++ }
++
++ gcov_type call_count = MAX (edge_total_count, bb_count);
++ if (call_count > 0)
++ {
++ dump_direct_callee_info_to_asm (bb, call_count);
++ }
++}
++
++/* Dump the bb info into asm. */
++
++static void
++dump_bb_info_to_asm (basic_block bb, gcov_type bb_count)
++{
++ int bb_start_addr = get_bb_start_addr (bb);
++ if (bb_start_addr != -1)
++ {
++ fprintf (asm_out_file, "\t.string \"%x\"\n", bb_start_addr);
++ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
++ bb_count);
++ }
++}
++
++/* Dump the function info into asm. */
++
++static void
++dump_function_info_to_asm (const char *fnname)
++{
++ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
++ ASM_FDO_CALLER_FLAG, alias_local_functions (fnname));
++ fprintf (asm_out_file, "\t.string \"%s%d\"\n",
++ ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ());
++ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
++ ASM_FDO_CALLER_BIND_FLAG, simple_get_function_bind ());
++
++ if (dump_file)
++ {
++ fprintf (dump_file, "\n FUNC_NAME: %s\n",
++ alias_local_functions (fnname));
++ fprintf (dump_file, " file: %s\n",
++ dump_base_name);
++ fprintf (dump_file, " profile_status: %s\n",
++ get_function_profile_status ());
++ fprintf (dump_file, " size: %x\n",
++ get_function_end_addr ());
++ fprintf (dump_file, " function_bind: %s\n",
++ simple_get_function_bind ());
++ }
++}
++
++/* Dump function profile info form AutoFDO or PGO to asm. */
++
++static void
++dump_fdo_info_to_asm (const char *fnname)
++{
++ basic_block bb;
++
++ dump_function_info_to_asm (fnname);
++
++ FOR_EACH_BB_FN (bb, cfun)
++ {
++ gcov_type bb_count = get_fdo_count (bb->count);
++ if (bb_count == 0)
++ {
++ continue;
++ }
++
++ if (dump_file)
++ {
++ fprintf (dump_file, "BB: %x --> %x = (%ld) [%s]\n",
++ get_bb_start_addr (bb), get_bb_end_addr (bb),
++ bb_count, get_fdo_count_quality (bb->count));
++ }
++
++ if (flag_profile_use)
++ {
++ dump_edge_jump_info_to_asm (bb, bb_count);
++ }
++ else if (flag_auto_profile)
++ {
++ dump_bb_info_to_asm (bb, bb_count);
++ }
++ }
++}
++
++/* When -fauto-bolt option is turned on, the .text.fdo. section
++ will be generated in the *.s file if there is feedback information
++ from PGO or AutoFDO. This section will parserd in BOLT-plugin. */
++
++static void
++dump_profile_to_elf_sections ()
++{
++ if (!flag_function_sections)
++ {
++ error ("-fauto-bolt should work with -ffunction-sections");
++ return;
++ }
++ if (!flag_ipa_ra)
++ {
++ error ("-fauto-bolt should work with -fipa-ra");
++ return;
++ }
++ if (flag_align_jumps)
++ {
++ error ("-fauto-bolt is not supported with -falign-jumps");
++ return;
++ }
++ if (flag_align_labels)
++ {
++ error ("-fauto-bolt is not supported with -falign-labels");
++ return;
++ }
++ if (flag_align_loops)
++ {
++ error ("-fauto-bolt is not supported with -falign-loops");
++ return;
++ }
++
++ /* Return if no feedback data. */
++ if (!flag_profile_use && !flag_auto_profile)
++ {
++ error ("-fauto-bolt should use with -fprofile-use or -fauto-profile");
++ return;
++ }
++
++ /* Avoid empty functions. */
++ if (TREE_CODE (cfun->decl) != FUNCTION_DECL)
++ {
++ return;
++ }
++ int flags = SECTION_DEBUG | SECTION_EXCLUDE;
++ const char *fnname = get_fnname_from_decl (current_function_decl);
++ char *profile_fnname = NULL;
++
++ asprintf (&profile_fnname,"%s%s", ASM_FDO_SECTION_PREFIX, fnname);
++ switch_to_section (get_section (profile_fnname, flags , NULL));
++ dump_fdo_info_to_asm (fnname);
++
++ if (profile_fnname)
++ {
++ free (profile_fnname);
++ profile_fnname = NULL;
++ }
++}
++
+ /* Turn the RTL into assembly. */
+ static unsigned int
+ rest_of_handle_final (void)
+@@ -4707,6 +5101,12 @@ rest_of_handle_final (void)
+ targetm.asm_out.destructor (XEXP (DECL_RTL (current_function_decl), 0),
+ decl_fini_priority_lookup
+ (current_function_decl));
++
++ if (flag_auto_bolt)
++ {
++ dump_profile_to_elf_sections ();
++ }
++
+ return 0;
+ }
+
+diff --git a/gcc/opts.c b/gcc/opts.c
+index f49f5ee58..0b389ae1d 100644
+--- a/gcc/opts.c
++++ b/gcc/opts.c
+@@ -1166,6 +1166,10 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
+ if (opts->x_flag_vtable_verify && opts->x_flag_lto)
+ sorry ("vtable verification is not supported with LTO");
+
++ /* Currently -fauto-bolt is not supported for LTO. */
++ if (opts->x_flag_auto_bolt && opts->x_flag_lto)
++ sorry ("%<-fauto-bolt%> is not supported with LTO");
++
+ /* Control IPA optimizations based on different -flive-patching level. */
+ if (opts->x_flag_live_patching)
+ control_options_for_live_patching (opts, opts_set,
+@@ -1183,6 +1187,58 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
+ = (opts->x_flag_unroll_loops
+ || opts->x_flag_peel_loops
+ || opts->x_optimize >= 3);
++
++ if (opts->x_flag_auto_bolt)
++ {
++ /* Record the function section to facilitate the feedback
++ data storage. */
++ if (!opts->x_flag_function_sections)
++ {
++ inform (loc,
++ "%<-fauto-bolt%> should work with %<-ffunction-sections%>,"
++ " enabling %<-ffunction-sections%>");
++ opts->x_flag_function_sections = true;
++ }
++
++ /* Cancel the internal alignment of the function. The binary
++ optimizer bolt will cancel the internal alignment optimization
++ of the function, so the alignment is meaningless at this time,
++ and if not, it will bring trouble to the calculation of the
++ offset address of the instruction. */
++ if (opts->x_flag_align_jumps)
++ {
++ inform (loc,
++ "%<-fauto-bolt%> should not work with %<-falign-jumps%>,"
++ " disabling %<-falign-jumps%>");
++ opts->x_flag_align_jumps = false;
++ }
++
++ if (opts->x_flag_align_labels)
++ {
++ inform (loc,
++ "%<-fauto-bolt%> should not work with %<-falign-labels%>,"
++ " disabling %<-falign-labels%>");
++ opts->x_flag_align_labels = false;
++ }
++
++ if (opts->x_flag_align_loops)
++ {
++ inform (loc,
++ "%<-fauto-bolt%> should not work with %<-falign-loops%>,"
++ " disabling %<-falign-loops%>");
++ opts->x_flag_align_loops = false;
++ }
++
++ /* When parsing instructions in RTL phase, we need to know
++ the call information of instructions to avoid being optimized. */
++ if (!opts->x_flag_ipa_ra)
++ {
++ inform (loc,
++ "%<-fauto-bolt%> should work with %<-fipa-ra%>,"
++ " enabling %<-fipa-ra%>");
++ opts->x_flag_ipa_ra = true;
++ }
++ }
+ }
+
+ #define LEFT_COLUMN 27
+@@ -2881,6 +2937,11 @@ common_handle_option (struct gcc_options *opts,
+ check_alignment_argument (loc, arg, "functions");
+ break;
+
++ case OPT_fauto_bolt_:
++ case OPT_fauto_bolt:
++ /* Deferred. */
++ break;
++
+ default:
+ /* If the flag was handled in a standard way, assume the lack of
+ processing here is intentional. */
+--
+2.27.0
+