summaryrefslogtreecommitdiff
path: root/0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2025-02-28 10:03:49 +0000
committerCoprDistGit <infra@openeuler.org>2025-02-28 10:03:49 +0000
commit73127104a245052cd5cf29cdaaca3e5c32c70348 (patch)
tree8e28b63e478c43c252f18b49836dff7313affe54 /0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch
parent49d3feaf4665cdb07576fc1a2382a4d82a612d35 (diff)
automatic import of gccopeneuler24.03_LTS_SP1
Diffstat (limited to '0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch')
-rw-r--r--0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch467
1 files changed, 467 insertions, 0 deletions
diff --git a/0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch b/0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch
new file mode 100644
index 0000000..fb3f7a8
--- /dev/null
+++ b/0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch
@@ -0,0 +1,467 @@
+From e7ebc54e809e8647ff054a02fbaf946b41414004 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 29 Sep 2022 11:32:55 +0100
+Subject: [PATCH 020/157] [Backport][SME] aarch64: Simplify generation of .arch
+ strings
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4ebf56f283ae5a98ae4c43079b7e8459945ef18d
+
+aarch64-common.cc has two arrays, one maintaining the original
+definition order and one sorted by population count. Sorting
+by population count was a way of ensuring topological ordering,
+taking advantage of the fact that the entries are partially
+ordered by the subset relation. However, the sorting is not
+needed now that the .def file is forced to have topological
+order from the outset.
+
+Other changes are:
+
+(1) The population count used:
+
+ uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on;
+ uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on;
+ int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a);
+ int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b);
+
+ where I think the & was supposed to be |. This meant that the
+ counts would always be 1 in practice, since flag_canonical is
+ a single bit. This led us to printing +nofp+nosimd even though
+ GCC "knows" (and GAS agrees) that +nofp disables simd.
+
+(2) The .arch output code converts +aes+sha2 to +crypto. I think
+ the main reason for doing this is to support assemblers that
+ predate the individual per-feature crypto flags. It therefore
+ seems more natural to treat it as a special case, rather than
+ as an instance of a general pattern. Hopefully we won't do
+ something similar in future!
+
+ (There is already special handling of CRC, for different reasons.)
+
+(3) Previously, if the /proc/cpuinfo code saw a feature like sve,
+ it would assume the presence of all the features that sve
+ depends on. It would be possible to keep that behaviour
+ if necessary, but it was simpler to assume the presence of
+ fp16 (say) only when fphp is present. There's an argument
+ that that's more conservatively correct too.
+
+gcc/
+ * common/config/aarch64/aarch64-common.cc
+ (TARGET_OPTION_INIT_STRUCT): Delete.
+ (aarch64_option_extension): Remove is_synthetic_flag.
+ (all_extensions): Update accordingly.
+ (all_extensions_by_on, opt_ext, opt_ext_cmp): Delete.
+ (aarch64_option_init_struct, aarch64_contains_opt): Delete.
+ (aarch64_get_extension_string_for_isa_flags): Rewrite to use
+ all_extensions instead of all_extensions_on.
+
+gcc/testsuite/
+ * gcc.target/aarch64/cpunative/info_8: Add all dependencies of sve.
+ * gcc.target/aarch64/cpunative/info_9: Likewise svesm4.
+ * gcc.target/aarch64/cpunative/info_15: Likewise.
+ * gcc.target/aarch64/cpunative/info_16: Likewise sve2.
+ * gcc.target/aarch64/cpunative/info_17: Likewise.
+ * gcc.target/aarch64/cpunative/native_cpu_2.c: Expect just +nofp
+ rather than +nofp+nosimd.
+ * gcc.target/aarch64/cpunative/native_cpu_10.c: Likewise.
+ * gcc.target/aarch64/target_attr_15.c: Likewise.
+---
+ gcc/common/config/aarch64/aarch64-common.cc | 244 ++++--------------
+ .../gcc.target/aarch64/cpunative/info_15 | 2 +-
+ .../gcc.target/aarch64/cpunative/info_16 | 2 +-
+ .../gcc.target/aarch64/cpunative/info_17 | 2 +-
+ .../gcc.target/aarch64/cpunative/info_8 | 2 +-
+ .../gcc.target/aarch64/cpunative/info_9 | 2 +-
+ .../aarch64/cpunative/native_cpu_10.c | 2 +-
+ .../aarch64/cpunative/native_cpu_2.c | 2 +-
+ .../gcc.target/aarch64/target_attr_15.c | 2 +-
+ 9 files changed, 55 insertions(+), 205 deletions(-)
+
+diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
+index 74729bb30..057dc094d 100644
+--- a/gcc/common/config/aarch64/aarch64-common.cc
++++ b/gcc/common/config/aarch64/aarch64-common.cc
+@@ -42,8 +42,6 @@
+
+ #undef TARGET_OPTION_OPTIMIZATION_TABLE
+ #define TARGET_OPTION_OPTIMIZATION_TABLE aarch_option_optimization_table
+-#undef TARGET_OPTION_INIT_STRUCT
+-#define TARGET_OPTION_INIT_STRUCT aarch64_option_init_struct
+
+ #define INVALID_IMP ((unsigned) -1)
+
+@@ -209,7 +207,6 @@ struct aarch64_option_extension
+ const uint64_t flag_canonical;
+ const uint64_t flags_on;
+ const uint64_t flags_off;
+- const bool is_synthetic;
+ };
+
+ /* ISA extensions in AArch64. */
+@@ -219,24 +216,9 @@ static const struct aarch64_option_extension all_extensions[] =
+ {NAME, AARCH64_FL_##IDENT, \
+ feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
+ feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \
+- & ~AARCH64_FL_##IDENT, \
+- AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO},
++ & ~AARCH64_FL_##IDENT},
+ #include "config/aarch64/aarch64-option-extensions.def"
+- {NULL, 0, 0, 0, false}
+-};
+-
+-/* A copy of the ISA extensions list for AArch64 sorted by the popcount of
+- bits and extension turned on. Cached for efficiency. */
+-static struct aarch64_option_extension all_extensions_by_on[] =
+-{
+-#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \
+- {NAME, AARCH64_FL_##IDENT, \
+- feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
+- feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \
+- & ~AARCH64_FL_##IDENT, \
+- AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO},
+-#include "config/aarch64/aarch64-option-extensions.def"
+- {NULL, 0, 0, 0, false}
++ {NULL, 0, 0, 0}
+ };
+
+ struct processor_name_to_arch
+@@ -353,79 +335,6 @@ aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates)
+ candidates->safe_push (opt->name);
+ }
+
+-/* Comparer to sort aarch64's feature extensions by population count. Largest
+- first. */
+-
+-typedef const struct aarch64_option_extension opt_ext;
+-
+-int opt_ext_cmp (const void* a, const void* b)
+-{
+- opt_ext *opt_a = (opt_ext *)a;
+- opt_ext *opt_b = (opt_ext *)b;
+-
+- /* We consider the total set of bits an options turns on to be the union of
+- the singleton set containing the option itself and the set of options it
+- turns on as a dependency. As an example +dotprod turns on FL_DOTPROD and
+- FL_SIMD. As such the set of bits represented by this option is
+- {FL_DOTPROD, FL_SIMD}. */
+- uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on;
+- uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on;
+- int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a);
+- int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b);
+- int order = popcnt_b - popcnt_a;
+-
+- /* If they have the same amount of bits set, give it a more
+- deterministic ordering by using the value of the bits themselves. */
+- if (order != 0)
+- return order;
+-
+- if (total_flags_a != total_flags_b)
+- return total_flags_a < total_flags_b ? 1 : -1;
+-
+- return 0;
+-}
+-
+-/* Implement TARGET_OPTION_INIT_STRUCT. */
+-
+-static void
+-aarch64_option_init_struct (struct gcc_options *opts ATTRIBUTE_UNUSED)
+-{
+- /* Sort the extensions based on how many bits they set, order the larger
+- counts first. We sort the list because this makes processing the
+- feature bits O(n) instead of O(n^2). While n is small, the function
+- to calculate the feature strings is called on every options push,
+- pop and attribute change (arm_neon headers, lto etc all cause this to
+- happen quite frequently). It is a trade-off between time and space and
+- so time won. */
+- int n_extensions
+- = sizeof (all_extensions) / sizeof (struct aarch64_option_extension);
+- qsort (&all_extensions_by_on, n_extensions,
+- sizeof (struct aarch64_option_extension), opt_ext_cmp);
+-}
+-
+-/* Checks to see if enough bits from the option OPT are enabled in
+- ISA_FLAG_BITS to be able to replace the individual options with the
+- canonicalized version of the option. This is done based on two rules:
+-
+- 1) Synthetic groups, such as +crypto we only care about the bits that are
+- turned on. e.g. +aes+sha2 can be replaced with +crypto.
+-
+- 2) Options that themselves have a bit, such as +rdma, in this case, all the
+- feature bits they turn on must be available and the bit for the option
+- itself must be. In this case it's effectively a reduction rather than a
+- grouping. e.g. +fp+simd is not enough to turn on +rdma, for that you would
+- need +rdma+fp+simd which is reduced down to +rdma.
+-*/
+-
+-static bool
+-aarch64_contains_opt (uint64_t isa_flag_bits, opt_ext *opt)
+-{
+- uint64_t flags_check
+- = opt->is_synthetic ? opt->flags_on : opt->flag_canonical;
+-
+- return (isa_flag_bits & flags_check) == flags_check;
+-}
+-
+ /* Return a string representation of ISA_FLAGS. DEFAULT_ARCH_FLAGS
+ gives the default set of flags which are implied by whatever -march
+ we'd put out. Our job is to figure out the minimal set of "+" and
+@@ -436,118 +345,59 @@ std::string
+ aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags,
+ uint64_t default_arch_flags)
+ {
+- const struct aarch64_option_extension *opt = NULL;
+ std::string outstr = "";
+
+- uint64_t isa_flag_bits = isa_flags;
+-
+- /* Pass one: Minimize the search space by reducing the set of options
+- to the smallest set that still turns on the same features as before in
+- conjunction with the bits that are turned on by default for the selected
+- architecture. */
+- for (opt = all_extensions_by_on; opt->name != NULL; opt++)
++ aarch64_feature_flags current_flags = default_arch_flags;
++
++ /* As a special case, do not assume that the assembler will enable CRC
++ even if it is the default for the architecture. This is required
++ because some CPUs had an incorrect specification in older assemblers:
++ even though CRC should be the default for these cases the -mcpu
++ values would not turn it on.
++
++ However, assemblers with Armv8-R AArch64 support should not have this
++ issue, so we don't need this fix when targeting Armv8-R. */
++ auto explicit_flags = (!(current_flags & AARCH64_FL_V8R)
++ ? AARCH64_FL_CRC : 0);
++
++ /* Add the features in isa_flags & ~current_flags using the smallest
++ possible number of extensions. We can do this by iterating over the
++ array in reverse order, since the array is sorted topologically.
++ But in order to make the output more readable, it seems better
++ to add the strings in definition order. */
++ aarch64_feature_flags added = 0;
++ for (unsigned int i = ARRAY_SIZE (all_extensions); i-- > 0; )
+ {
+- /* If the bit is on by default, then all the options it turns on are also
+- on by default due to the transitive dependencies.
+-
+- If the option is enabled explicitly in the set then we need to emit
+- an option for it. Since this list is sorted by extensions setting the
+- largest number of featers first, we can be sure that nothing else will
+- ever need to set the bits we already set. Consider the following
+- situation:
+-
+- Feat1 = A + B + C
+- Feat2 = A + B
+- Feat3 = A + D
+- Feat4 = B + C
+- Feat5 = C
+-
+- The following results are expected:
+-
+- A + C = A + Feat5
+- B + C = Feat4
+- Feat4 + A = Feat1
+- Feat2 + Feat5 = Feat1
+- Feat1 + C = Feat1
+- Feat3 + Feat4 = Feat1 + D
+-
+- This search assumes that all invidual feature bits are use visible,
+- in other words the user must be able to do +A, +B, +C and +D. */
+- if (aarch64_contains_opt (isa_flag_bits | default_arch_flags, opt))
+- {
+- /* We remove all the dependent bits, to prevent them from being turned
+- on twice. This only works because we assume that all there are
+- individual options to set all bits standalone. */
+-
+- /* PR target/94396.
+-
+- For flags which would already imply a bit that's on by default (e.g
+- fp16fml which implies +fp,+fp16) we must emit the flags that are not
+- on by default. i.e. in Armv8.4-a +fp16fml is default if +fp16. So
+- if a user passes armv8.4-a+fp16 (or +fp16fml) then we need to emit
+- +fp16. But if +fp16fml is used in an architecture where it is
+- completely optional we only have to emit the canonical flag. */
+- uint64_t toggle_bits = opt->flags_on & default_arch_flags;
+- /* Now check to see if the canonical flag is on by default. If it
+- is not then enabling it will enable all bits in flags_on. */
+- if ((opt->flag_canonical & default_arch_flags) == 0)
+- toggle_bits = opt->flags_on;
+-
+- isa_flag_bits &= ~toggle_bits;
+- isa_flag_bits |= opt->flag_canonical;
+- }
+- }
++ auto &opt = all_extensions[i];
+
+- /* By toggling bits on and off, we may have set bits on that are already
+- enabled by default. So we mask the default set out so we don't emit an
+- option for them. Instead of checking for this each time during Pass One
+- we just mask all default bits away at the end. */
+- isa_flag_bits &= ~default_arch_flags;
+-
+- /* We now have the smallest set of features we need to process. A subsequent
+- linear scan of the bits in isa_flag_bits will allow us to print the ext
+- names. However as a special case if CRC was enabled before, always print
+- it. This is required because some CPUs have an incorrect specification
+- in older assemblers. Even though CRC should be the default for these
+- cases the -mcpu values won't turn it on.
+-
+- Note that assemblers with Armv8-R AArch64 support should not have this
+- issue, so we don't need this fix when targeting Armv8-R. */
+- if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8R)
+- isa_flag_bits |= AARCH64_ISA_CRC;
+-
+- /* Pass Two:
+- Print the option names that we're sure we must turn on. These are only
+- optional extension names. Mandatory ones have already been removed and
+- ones we explicitly want off have been too. */
+- for (opt = all_extensions_by_on; opt->name != NULL; opt++)
+- {
+- if (isa_flag_bits & opt->flag_canonical)
+- {
+- outstr += "+";
+- outstr += opt->name;
+- }
+- }
++ /* As a special case, emit +crypto rather than +aes+sha2,
++ in order to support assemblers that predate the separate
++ per-feature crypto flags. */
++ auto flags = opt.flag_canonical;
++ if (flags == AARCH64_FL_CRYPTO)
++ flags = AARCH64_FL_AES | AARCH64_FL_SHA2;
+
+- /* Pass Three:
+- Print out a +no for any mandatory extension that we are
+- turning off. By this point aarch64_parse_extension would have ensured
+- that any optional extensions are turned off. The only things left are
+- things that can't be turned off usually, e.g. something that is on by
+- default because it's mandatory and we want it off. For turning off bits
+- we don't guarantee the smallest set of flags, but instead just emit all
+- options the user has specified.
+-
+- The assembler requires all +<opts> to be printed before +no<opts>. */
+- for (opt = all_extensions_by_on; opt->name != NULL; opt++)
+- {
+- if ((~isa_flags) & opt->flag_canonical
+- && !((~default_arch_flags) & opt->flag_canonical))
++ if ((flags & isa_flags & (explicit_flags | ~current_flags)) == flags)
+ {
+- outstr += "+no";
+- outstr += opt->name;
++ current_flags |= opt.flag_canonical | opt.flags_on;
++ added |= opt.flag_canonical;
+ }
+ }
++ for (auto &opt : all_extensions)
++ if (added & opt.flag_canonical)
++ {
++ outstr += "+";
++ outstr += opt.name;
++ }
++
++ /* Remove the features in current_flags & ~isa_flags. */
++ for (auto &opt : all_extensions)
++ if (opt.flag_canonical & current_flags & ~isa_flags)
++ {
++ current_flags &= ~(opt.flag_canonical | opt.flags_off);
++ outstr += "+no";
++ outstr += opt.name;
++ }
+
+ return outstr;
+ }
+diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_15
+index bc6453945..6b425ea20 100644
+--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_15
++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_15
+@@ -1,6 +1,6 @@
+ processor : 0
+ BogoMIPS : 100.00
+-Features : Lorem ipsum dolor sit ametd rebum expetendis per at Dolor lucilius referrentur ei mei virtute eruditi eum ne Iisque verter svesm4 asimd fp
++Features : Lorem ipsum dolor sit ametd rebum expetendis per at Dolor lucilius referrentur ei mei virtute eruditi eum ne Iisque verter svesm4 asimd fp sve sve2 fphp asimdhp sm3 sm4
+ CPU implementer : 0x41
+ CPU architecture: 8
+ CPU variant : 0x0
+diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_16 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_16
+index 2c04ff19c..26f01c496 100644
+--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_16
++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_16
+@@ -1,6 +1,6 @@
+ processor : 0
+ BogoMIPS : 100.00
+-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2
++Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 fphp asimdhp
+ CPU implementer : 0xfe
+ CPU architecture: 8
+ CPU variant : 0x0
+diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_17 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_17
+index 2c04ff19c..26f01c496 100644
+--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_17
++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_17
+@@ -1,6 +1,6 @@
+ processor : 0
+ BogoMIPS : 100.00
+-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2
++Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 fphp asimdhp
+ CPU implementer : 0xfe
+ CPU architecture: 8
+ CPU variant : 0x0
+diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_8 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_8
+index d6d9d03a2..76da16c57 100644
+--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_8
++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_8
+@@ -1,6 +1,6 @@
+ processor : 0
+ BogoMIPS : 100.00
+-Features : asimd sve fp
++Features : asimd sve fp fphp asimdhp
+ CPU implementer : 0x41
+ CPU architecture: 8
+ CPU variant : 0x0
+diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_9 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_9
+index c9aa4a9a0..14703dd1d 100644
+--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_9
++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_9
+@@ -1,6 +1,6 @@
+ processor : 0
+ BogoMIPS : 100.00
+-Features : asimd fp svesm4
++Features : asimd fp svesm4 sve sve2 fphp asimdhp sm3 sm4
+ CPU implementer : 0x41
+ CPU architecture: 8
+ CPU variant : 0x0
+diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c
+index 6a753965c..ddb06b822 100644
+--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c
++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c
+@@ -7,6 +7,6 @@ int main()
+ return 0;
+ }
+
+-/* { dg-final { scan-assembler {\.arch armv8-a\+nofp\+nosimd} } } */
++/* { dg-final { scan-assembler {\.arch armv8-a\+nofp} } } */
+
+ /* Test one with no entry in feature list. */
+diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c
+index aad71f434..edbdb5626 100644
+--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c
++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c
+@@ -7,6 +7,6 @@ int main()
+ return 0;
+ }
+
+-/* { dg-final { scan-assembler {\.arch armv8-a\+nofp\+nosimd} } } */
++/* { dg-final { scan-assembler {\.arch armv8-a\+nofp} } } */
+
+ /* Test one where asimd is provided byt no fp. */
+diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_15.c b/gcc/testsuite/gcc.target/aarch64/target_attr_15.c
+index 108b372e4..069a00108 100644
+--- a/gcc/testsuite/gcc.target/aarch64/target_attr_15.c
++++ b/gcc/testsuite/gcc.target/aarch64/target_attr_15.c
+@@ -10,4 +10,4 @@ foo (int a)
+ return a + 1;
+ }
+
+-/* { dg-final { scan-assembler-times "\\.arch armv8-a\\+nofp\\+nosimd\n" 1 } } */
++/* { dg-final { scan-assembler-times "\\.arch armv8-a\\+nofp\n" 1 } } */
+--
+2.33.0
+