diff options
Diffstat (limited to '0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch')
-rw-r--r-- | 0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch | 467 |
1 files changed, 467 insertions, 0 deletions
diff --git a/0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch b/0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch new file mode 100644 index 0000000..fb3f7a8 --- /dev/null +++ b/0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch @@ -0,0 +1,467 @@ +From e7ebc54e809e8647ff054a02fbaf946b41414004 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:55 +0100 +Subject: [PATCH 020/157] [Backport][SME] aarch64: Simplify generation of .arch + strings + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4ebf56f283ae5a98ae4c43079b7e8459945ef18d + +aarch64-common.cc has two arrays, one maintaining the original +definition order and one sorted by population count. Sorting +by population count was a way of ensuring topological ordering, +taking advantage of the fact that the entries are partially +ordered by the subset relation. However, the sorting is not +needed now that the .def file is forced to have topological +order from the outset. + +Other changes are: + +(1) The population count used: + + uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on; + uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on; + int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a); + int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b); + + where I think the & was supposed to be |. This meant that the + counts would always be 1 in practice, since flag_canonical is + a single bit. This led us to printing +nofp+nosimd even though + GCC "knows" (and GAS agrees) that +nofp disables simd. + +(2) The .arch output code converts +aes+sha2 to +crypto. I think + the main reason for doing this is to support assemblers that + predate the individual per-feature crypto flags. It therefore + seems more natural to treat it as a special case, rather than + as an instance of a general pattern. Hopefully we won't do + something similar in future! + + (There is already special handling of CRC, for different reasons.) + +(3) Previously, if the /proc/cpuinfo code saw a feature like sve, + it would assume the presence of all the features that sve + depends on. It would be possible to keep that behaviour + if necessary, but it was simpler to assume the presence of + fp16 (say) only when fphp is present. There's an argument + that that's more conservatively correct too. + +gcc/ + * common/config/aarch64/aarch64-common.cc + (TARGET_OPTION_INIT_STRUCT): Delete. + (aarch64_option_extension): Remove is_synthetic_flag. + (all_extensions): Update accordingly. + (all_extensions_by_on, opt_ext, opt_ext_cmp): Delete. + (aarch64_option_init_struct, aarch64_contains_opt): Delete. + (aarch64_get_extension_string_for_isa_flags): Rewrite to use + all_extensions instead of all_extensions_on. + +gcc/testsuite/ + * gcc.target/aarch64/cpunative/info_8: Add all dependencies of sve. + * gcc.target/aarch64/cpunative/info_9: Likewise svesm4. + * gcc.target/aarch64/cpunative/info_15: Likewise. + * gcc.target/aarch64/cpunative/info_16: Likewise sve2. + * gcc.target/aarch64/cpunative/info_17: Likewise. + * gcc.target/aarch64/cpunative/native_cpu_2.c: Expect just +nofp + rather than +nofp+nosimd. + * gcc.target/aarch64/cpunative/native_cpu_10.c: Likewise. + * gcc.target/aarch64/target_attr_15.c: Likewise. +--- + gcc/common/config/aarch64/aarch64-common.cc | 244 ++++-------------- + .../gcc.target/aarch64/cpunative/info_15 | 2 +- + .../gcc.target/aarch64/cpunative/info_16 | 2 +- + .../gcc.target/aarch64/cpunative/info_17 | 2 +- + .../gcc.target/aarch64/cpunative/info_8 | 2 +- + .../gcc.target/aarch64/cpunative/info_9 | 2 +- + .../aarch64/cpunative/native_cpu_10.c | 2 +- + .../aarch64/cpunative/native_cpu_2.c | 2 +- + .../gcc.target/aarch64/target_attr_15.c | 2 +- + 9 files changed, 55 insertions(+), 205 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 74729bb30..057dc094d 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -42,8 +42,6 @@ + + #undef TARGET_OPTION_OPTIMIZATION_TABLE + #define TARGET_OPTION_OPTIMIZATION_TABLE aarch_option_optimization_table +-#undef TARGET_OPTION_INIT_STRUCT +-#define TARGET_OPTION_INIT_STRUCT aarch64_option_init_struct + + #define INVALID_IMP ((unsigned) -1) + +@@ -209,7 +207,6 @@ struct aarch64_option_extension + const uint64_t flag_canonical; + const uint64_t flags_on; + const uint64_t flags_off; +- const bool is_synthetic; + }; + + /* ISA extensions in AArch64. */ +@@ -219,24 +216,9 @@ static const struct aarch64_option_extension all_extensions[] = + {NAME, AARCH64_FL_##IDENT, \ + feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ + feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ +- & ~AARCH64_FL_##IDENT, \ +- AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO}, ++ & ~AARCH64_FL_##IDENT}, + #include "config/aarch64/aarch64-option-extensions.def" +- {NULL, 0, 0, 0, false} +-}; +- +-/* A copy of the ISA extensions list for AArch64 sorted by the popcount of +- bits and extension turned on. Cached for efficiency. */ +-static struct aarch64_option_extension all_extensions_by_on[] = +-{ +-#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ +- {NAME, AARCH64_FL_##IDENT, \ +- feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ +- feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ +- & ~AARCH64_FL_##IDENT, \ +- AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO}, +-#include "config/aarch64/aarch64-option-extensions.def" +- {NULL, 0, 0, 0, false} ++ {NULL, 0, 0, 0} + }; + + struct processor_name_to_arch +@@ -353,79 +335,6 @@ aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates) + candidates->safe_push (opt->name); + } + +-/* Comparer to sort aarch64's feature extensions by population count. Largest +- first. */ +- +-typedef const struct aarch64_option_extension opt_ext; +- +-int opt_ext_cmp (const void* a, const void* b) +-{ +- opt_ext *opt_a = (opt_ext *)a; +- opt_ext *opt_b = (opt_ext *)b; +- +- /* We consider the total set of bits an options turns on to be the union of +- the singleton set containing the option itself and the set of options it +- turns on as a dependency. As an example +dotprod turns on FL_DOTPROD and +- FL_SIMD. As such the set of bits represented by this option is +- {FL_DOTPROD, FL_SIMD}. */ +- uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on; +- uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on; +- int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a); +- int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b); +- int order = popcnt_b - popcnt_a; +- +- /* If they have the same amount of bits set, give it a more +- deterministic ordering by using the value of the bits themselves. */ +- if (order != 0) +- return order; +- +- if (total_flags_a != total_flags_b) +- return total_flags_a < total_flags_b ? 1 : -1; +- +- return 0; +-} +- +-/* Implement TARGET_OPTION_INIT_STRUCT. */ +- +-static void +-aarch64_option_init_struct (struct gcc_options *opts ATTRIBUTE_UNUSED) +-{ +- /* Sort the extensions based on how many bits they set, order the larger +- counts first. We sort the list because this makes processing the +- feature bits O(n) instead of O(n^2). While n is small, the function +- to calculate the feature strings is called on every options push, +- pop and attribute change (arm_neon headers, lto etc all cause this to +- happen quite frequently). It is a trade-off between time and space and +- so time won. */ +- int n_extensions +- = sizeof (all_extensions) / sizeof (struct aarch64_option_extension); +- qsort (&all_extensions_by_on, n_extensions, +- sizeof (struct aarch64_option_extension), opt_ext_cmp); +-} +- +-/* Checks to see if enough bits from the option OPT are enabled in +- ISA_FLAG_BITS to be able to replace the individual options with the +- canonicalized version of the option. This is done based on two rules: +- +- 1) Synthetic groups, such as +crypto we only care about the bits that are +- turned on. e.g. +aes+sha2 can be replaced with +crypto. +- +- 2) Options that themselves have a bit, such as +rdma, in this case, all the +- feature bits they turn on must be available and the bit for the option +- itself must be. In this case it's effectively a reduction rather than a +- grouping. e.g. +fp+simd is not enough to turn on +rdma, for that you would +- need +rdma+fp+simd which is reduced down to +rdma. +-*/ +- +-static bool +-aarch64_contains_opt (uint64_t isa_flag_bits, opt_ext *opt) +-{ +- uint64_t flags_check +- = opt->is_synthetic ? opt->flags_on : opt->flag_canonical; +- +- return (isa_flag_bits & flags_check) == flags_check; +-} +- + /* Return a string representation of ISA_FLAGS. DEFAULT_ARCH_FLAGS + gives the default set of flags which are implied by whatever -march + we'd put out. Our job is to figure out the minimal set of "+" and +@@ -436,118 +345,59 @@ std::string + aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags, + uint64_t default_arch_flags) + { +- const struct aarch64_option_extension *opt = NULL; + std::string outstr = ""; + +- uint64_t isa_flag_bits = isa_flags; +- +- /* Pass one: Minimize the search space by reducing the set of options +- to the smallest set that still turns on the same features as before in +- conjunction with the bits that are turned on by default for the selected +- architecture. */ +- for (opt = all_extensions_by_on; opt->name != NULL; opt++) ++ aarch64_feature_flags current_flags = default_arch_flags; ++ ++ /* As a special case, do not assume that the assembler will enable CRC ++ even if it is the default for the architecture. This is required ++ because some CPUs had an incorrect specification in older assemblers: ++ even though CRC should be the default for these cases the -mcpu ++ values would not turn it on. ++ ++ However, assemblers with Armv8-R AArch64 support should not have this ++ issue, so we don't need this fix when targeting Armv8-R. */ ++ auto explicit_flags = (!(current_flags & AARCH64_FL_V8R) ++ ? AARCH64_FL_CRC : 0); ++ ++ /* Add the features in isa_flags & ~current_flags using the smallest ++ possible number of extensions. We can do this by iterating over the ++ array in reverse order, since the array is sorted topologically. ++ But in order to make the output more readable, it seems better ++ to add the strings in definition order. */ ++ aarch64_feature_flags added = 0; ++ for (unsigned int i = ARRAY_SIZE (all_extensions); i-- > 0; ) + { +- /* If the bit is on by default, then all the options it turns on are also +- on by default due to the transitive dependencies. +- +- If the option is enabled explicitly in the set then we need to emit +- an option for it. Since this list is sorted by extensions setting the +- largest number of featers first, we can be sure that nothing else will +- ever need to set the bits we already set. Consider the following +- situation: +- +- Feat1 = A + B + C +- Feat2 = A + B +- Feat3 = A + D +- Feat4 = B + C +- Feat5 = C +- +- The following results are expected: +- +- A + C = A + Feat5 +- B + C = Feat4 +- Feat4 + A = Feat1 +- Feat2 + Feat5 = Feat1 +- Feat1 + C = Feat1 +- Feat3 + Feat4 = Feat1 + D +- +- This search assumes that all invidual feature bits are use visible, +- in other words the user must be able to do +A, +B, +C and +D. */ +- if (aarch64_contains_opt (isa_flag_bits | default_arch_flags, opt)) +- { +- /* We remove all the dependent bits, to prevent them from being turned +- on twice. This only works because we assume that all there are +- individual options to set all bits standalone. */ +- +- /* PR target/94396. +- +- For flags which would already imply a bit that's on by default (e.g +- fp16fml which implies +fp,+fp16) we must emit the flags that are not +- on by default. i.e. in Armv8.4-a +fp16fml is default if +fp16. So +- if a user passes armv8.4-a+fp16 (or +fp16fml) then we need to emit +- +fp16. But if +fp16fml is used in an architecture where it is +- completely optional we only have to emit the canonical flag. */ +- uint64_t toggle_bits = opt->flags_on & default_arch_flags; +- /* Now check to see if the canonical flag is on by default. If it +- is not then enabling it will enable all bits in flags_on. */ +- if ((opt->flag_canonical & default_arch_flags) == 0) +- toggle_bits = opt->flags_on; +- +- isa_flag_bits &= ~toggle_bits; +- isa_flag_bits |= opt->flag_canonical; +- } +- } ++ auto &opt = all_extensions[i]; + +- /* By toggling bits on and off, we may have set bits on that are already +- enabled by default. So we mask the default set out so we don't emit an +- option for them. Instead of checking for this each time during Pass One +- we just mask all default bits away at the end. */ +- isa_flag_bits &= ~default_arch_flags; +- +- /* We now have the smallest set of features we need to process. A subsequent +- linear scan of the bits in isa_flag_bits will allow us to print the ext +- names. However as a special case if CRC was enabled before, always print +- it. This is required because some CPUs have an incorrect specification +- in older assemblers. Even though CRC should be the default for these +- cases the -mcpu values won't turn it on. +- +- Note that assemblers with Armv8-R AArch64 support should not have this +- issue, so we don't need this fix when targeting Armv8-R. */ +- if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8R) +- isa_flag_bits |= AARCH64_ISA_CRC; +- +- /* Pass Two: +- Print the option names that we're sure we must turn on. These are only +- optional extension names. Mandatory ones have already been removed and +- ones we explicitly want off have been too. */ +- for (opt = all_extensions_by_on; opt->name != NULL; opt++) +- { +- if (isa_flag_bits & opt->flag_canonical) +- { +- outstr += "+"; +- outstr += opt->name; +- } +- } ++ /* As a special case, emit +crypto rather than +aes+sha2, ++ in order to support assemblers that predate the separate ++ per-feature crypto flags. */ ++ auto flags = opt.flag_canonical; ++ if (flags == AARCH64_FL_CRYPTO) ++ flags = AARCH64_FL_AES | AARCH64_FL_SHA2; + +- /* Pass Three: +- Print out a +no for any mandatory extension that we are +- turning off. By this point aarch64_parse_extension would have ensured +- that any optional extensions are turned off. The only things left are +- things that can't be turned off usually, e.g. something that is on by +- default because it's mandatory and we want it off. For turning off bits +- we don't guarantee the smallest set of flags, but instead just emit all +- options the user has specified. +- +- The assembler requires all +<opts> to be printed before +no<opts>. */ +- for (opt = all_extensions_by_on; opt->name != NULL; opt++) +- { +- if ((~isa_flags) & opt->flag_canonical +- && !((~default_arch_flags) & opt->flag_canonical)) ++ if ((flags & isa_flags & (explicit_flags | ~current_flags)) == flags) + { +- outstr += "+no"; +- outstr += opt->name; ++ current_flags |= opt.flag_canonical | opt.flags_on; ++ added |= opt.flag_canonical; + } + } ++ for (auto &opt : all_extensions) ++ if (added & opt.flag_canonical) ++ { ++ outstr += "+"; ++ outstr += opt.name; ++ } ++ ++ /* Remove the features in current_flags & ~isa_flags. */ ++ for (auto &opt : all_extensions) ++ if (opt.flag_canonical & current_flags & ~isa_flags) ++ { ++ current_flags &= ~(opt.flag_canonical | opt.flags_off); ++ outstr += "+no"; ++ outstr += opt.name; ++ } + + return outstr; + } +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 +index bc6453945..6b425ea20 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 +@@ -1,6 +1,6 @@ + processor : 0 + BogoMIPS : 100.00 +-Features : Lorem ipsum dolor sit ametd rebum expetendis per at Dolor lucilius referrentur ei mei virtute eruditi eum ne Iisque verter svesm4 asimd fp ++Features : Lorem ipsum dolor sit ametd rebum expetendis per at Dolor lucilius referrentur ei mei virtute eruditi eum ne Iisque verter svesm4 asimd fp sve sve2 fphp asimdhp sm3 sm4 + CPU implementer : 0x41 + CPU architecture: 8 + CPU variant : 0x0 +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_16 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_16 +index 2c04ff19c..26f01c496 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_16 ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_16 +@@ -1,6 +1,6 @@ + processor : 0 + BogoMIPS : 100.00 +-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 ++Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 fphp asimdhp + CPU implementer : 0xfe + CPU architecture: 8 + CPU variant : 0x0 +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_17 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_17 +index 2c04ff19c..26f01c496 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_17 ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_17 +@@ -1,6 +1,6 @@ + processor : 0 + BogoMIPS : 100.00 +-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 ++Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 fphp asimdhp + CPU implementer : 0xfe + CPU architecture: 8 + CPU variant : 0x0 +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_8 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_8 +index d6d9d03a2..76da16c57 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_8 ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_8 +@@ -1,6 +1,6 @@ + processor : 0 + BogoMIPS : 100.00 +-Features : asimd sve fp ++Features : asimd sve fp fphp asimdhp + CPU implementer : 0x41 + CPU architecture: 8 + CPU variant : 0x0 +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_9 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_9 +index c9aa4a9a0..14703dd1d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_9 ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_9 +@@ -1,6 +1,6 @@ + processor : 0 + BogoMIPS : 100.00 +-Features : asimd fp svesm4 ++Features : asimd fp svesm4 sve sve2 fphp asimdhp sm3 sm4 + CPU implementer : 0x41 + CPU architecture: 8 + CPU variant : 0x0 +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c +index 6a753965c..ddb06b822 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c +@@ -7,6 +7,6 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+nofp\+nosimd} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+nofp} } } */ + + /* Test one with no entry in feature list. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c +index aad71f434..edbdb5626 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c +@@ -7,6 +7,6 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+nofp\+nosimd} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+nofp} } } */ + + /* Test one where asimd is provided byt no fp. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_15.c b/gcc/testsuite/gcc.target/aarch64/target_attr_15.c +index 108b372e4..069a00108 100644 +--- a/gcc/testsuite/gcc.target/aarch64/target_attr_15.c ++++ b/gcc/testsuite/gcc.target/aarch64/target_attr_15.c +@@ -10,4 +10,4 @@ foo (int a) + return a + 1; + } + +-/* { dg-final { scan-assembler-times "\\.arch armv8-a\\+nofp\\+nosimd\n" 1 } } */ ++/* { dg-final { scan-assembler-times "\\.arch armv8-a\\+nofp\n" 1 } } */ +-- +2.33.0 + |