diff options
Diffstat (limited to 'gdb-rhbz-2232086-reduce-size-of-gdb-index.patch')
-rw-r--r-- | gdb-rhbz-2232086-reduce-size-of-gdb-index.patch | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/gdb-rhbz-2232086-reduce-size-of-gdb-index.patch b/gdb-rhbz-2232086-reduce-size-of-gdb-index.patch new file mode 100644 index 0000000..9eaf615 --- /dev/null +++ b/gdb-rhbz-2232086-reduce-size-of-gdb-index.patch @@ -0,0 +1,222 @@ +From FEDORA_PATCHES Mon Sep 17 00:00:00 2001 +From: Andrew Burgess <aburgess@redhat.com> +Date: Fri, 24 Nov 2023 11:50:35 +0000 +Subject: gdb-rhbz-2232086-reduce-size-of-gdb-index.patch + +;; Back-port upstream commit aa19bc1d259 as part of a fix for +;; non-deterministic gdb-index generation (RH BZ 2232086). + +gdb: reduce size of generated gdb-index file + +I noticed in passing that out algorithm for generating the gdb-index +file is incorrect. When building the hash table in add_index_entry we +count every incoming entry rehash when the number of entries gets too +large. However, some of the incoming entries will be duplicates, +which don't actually result in new items being added to the hash +table. + +As a result, we grow the gdb-index hash table far too often. + +With an unmodified GDB, generating a gdb-index for GDB, I see a file +size of 90M, with a hash usage (in the generated index file) of just +2.6%. + +With a patched GDB, generating a gdb-index for the _same_ GDB binary, +I now see a gdb-index file size of 30M, with a hash usage of 41.9%. + +This is a 67% reduction in gdb-index file size. + +Obviously, not every gdb-index file is going to see such big savings, +however, the larger a program, and the more symbols that are +duplicated between compilation units, the more GDB would over count, +and so, over-grow the index. + +The gdb-index hash table we create has a minimum size of 1024, and +then we grow the hash when it is 75% full, doubling the hash table at +that time. Given this, then we expect that either: + + a. The hash table is size 1024, and less than 75% full, or + b. The hash table is between 37.5% and 75% full. + +I've include a test that checks some of these constraints -- I've not +bothered to check the upper limit, and over full hash table isn't +really a problem here, but if the fill percentage is less than 37.5% +then this indicates that we've done something wrong (obviously, I also +check for the 1024 minimum size). + +Approved-By: Tom Tromey <tom@tromey.com> + +diff --git a/gdb/dwarf2/index-write.c b/gdb/dwarf2/index-write.c +--- a/gdb/dwarf2/index-write.c ++++ b/gdb/dwarf2/index-write.c +@@ -254,20 +254,29 @@ add_index_entry (struct mapped_symtab *symtab, const char *name, + int is_static, gdb_index_symbol_kind kind, + offset_type cu_index) + { +- offset_type cu_index_and_attrs; ++ symtab_index_entry *slot = &find_slot (symtab, name); ++ if (slot->name == NULL) ++ { ++ /* This is a new element in the hash table. */ ++ ++symtab->n_elements; + +- ++symtab->n_elements; +- if (4 * symtab->n_elements / 3 >= symtab->data.size ()) +- hash_expand (symtab); ++ /* We might need to grow the hash table. */ ++ if (4 * symtab->n_elements / 3 >= symtab->data.size ()) ++ { ++ hash_expand (symtab); + +- symtab_index_entry &slot = find_slot (symtab, name); +- if (slot.name == NULL) +- { +- slot.name = name; ++ /* This element will have a different slot in the new table. */ ++ slot = &find_slot (symtab, name); ++ ++ /* But it should still be a new element in the hash table. */ ++ gdb_assert (slot->name == nullptr); ++ } ++ ++ slot->name = name; + /* index_offset is set later. */ + } + +- cu_index_and_attrs = 0; ++ offset_type cu_index_and_attrs = 0; + DW2_GDB_INDEX_CU_SET_VALUE (cu_index_and_attrs, cu_index); + DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE (cu_index_and_attrs, is_static); + DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE (cu_index_and_attrs, kind); +@@ -279,7 +288,7 @@ add_index_entry (struct mapped_symtab *symtab, const char *name, + the last entry pushed), but a symbol could have multiple kinds in one CU. + To keep things simple we don't worry about the duplication here and + sort and uniquify the list after we've processed all symbols. */ +- slot.cu_indices.push_back (cu_index_and_attrs); ++ slot->cu_indices.push_back (cu_index_and_attrs); + } + + /* See symtab_index_entry. */ +diff --git a/gdb/testsuite/gdb.gdb/index-file.exp b/gdb/testsuite/gdb.gdb/index-file.exp +new file mode 100644 +--- /dev/null ++++ b/gdb/testsuite/gdb.gdb/index-file.exp +@@ -0,0 +1,118 @@ ++# Copyright 2023 Free Software Foundation, Inc. ++ ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see <http://www.gnu.org/licenses/>. ++ ++# Load the GDB executable, and then 'save gdb-index', and make some ++# checks of the generated index file. ++ ++load_lib selftest-support.exp ++ ++# Can't save an index with readnow. ++if {[readnow]} { ++ untested "cannot create an index when readnow is in use" ++ return -1 ++} ++ ++# A multiplier used to ensure slow tasks are less likely to timeout. ++set timeout_factor 20 ++ ++set filename [selftest_prepare] ++if { $filename eq "" } { ++ unsupported "${gdb_test_file_name}.exp" ++ return -1 ++} ++ ++with_timeout_factor $timeout_factor { ++ # Start GDB, load FILENAME. ++ clean_restart $filename ++} ++ ++# Generate an index file. ++set dir1 [standard_output_file "index_1"] ++remote_exec host "mkdir -p ${dir1}" ++with_timeout_factor $timeout_factor { ++ gdb_test_no_output "save gdb-index $dir1" \ ++ "create gdb-index file" ++} ++ ++# Close GDB. ++gdb_exit ++ ++# Validate that the index-file FILENAME has made efficient use of its ++# symbol hash table. Calculate the number of symbols in the hash ++# table and the total hash table size. The hash table starts with ++# 1024 entries, and then doubles each time it is filled to 75%. At ++# 75% filled, doubling the size takes it to 37.5% filled. ++# ++# Thus, the hash table is correctly filled if: ++# 1. Its size is 1024 (i.e. it has not yet had its first doubling), or ++# 2. Its filled percentage is over 37% ++# ++# We could check that it is not over filled, but I don't as that's not ++# really an issue. But we did once have a bug where the table was ++# doubled incorrectly, in which case we'd see a filled percentage of ++# around 2% in some cases, which is a huge waste of disk space. ++proc check_symbol_table_usage { filename } { ++ # Open the file in binary mode and read-only mode. ++ set fp [open $filename rb] ++ ++ # Configure the channel to use binary translation. ++ fconfigure $fp -translation binary ++ ++ # Read the first 8 bytes of the file, which contain the header of ++ # the index section. ++ set header [read $fp [expr 7 * 4]] ++ ++ # Scan the header to get the version, the CU list offset, and the ++ # types CU list offset. ++ binary scan $header iiiiii version \ ++ _ _ _ symbol_table_offset shortcut_offset ++ ++ # The length of the symbol hash table (in entries). ++ set len [expr ($shortcut_offset - $symbol_table_offset) / 8] ++ ++ # Now walk the hash table and count how many entries are in use. ++ set offset $symbol_table_offset ++ set count 0 ++ while { $offset < $shortcut_offset } { ++ seek $fp $offset ++ set entry [read $fp 8] ++ binary scan $entry ii name_ptr flags ++ if { $name_ptr != 0 } { ++ incr count ++ } ++ ++ incr offset 8 ++ } ++ ++ # Close the file. ++ close $fp ++ ++ # Calculate how full the cache is. ++ set pct [expr (100 * double($count)) / $len] ++ ++ # Write our results out to the gdb.log. ++ verbose -log "Hash table size: $len" ++ verbose -log "Hash table entries: $count" ++ verbose -log "Percentage usage: $pct%" ++ ++ # The minimum fill percentage is actually 37.5%, but we give TCL a ++ # little flexibility in case the FP maths give a result a little ++ # off. ++ gdb_assert { $len == 1024 || $pct > 37 } \ ++ "symbol hash table usage" ++} ++ ++set index_filename_base [file tail $filename] ++check_symbol_table_usage "$dir1/${index_filename_base}.gdb-index" |