diff options
author | CoprDistGit <infra@openeuler.org> | 2025-02-28 10:03:49 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2025-02-28 10:03:49 +0000 |
commit | 73127104a245052cd5cf29cdaaca3e5c32c70348 (patch) | |
tree | 8e28b63e478c43c252f18b49836dff7313affe54 /0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch | |
parent | 49d3feaf4665cdb07576fc1a2382a4d82a612d35 (diff) |
automatic import of gccopeneuler24.03_LTS_SP1
Diffstat (limited to '0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch')
-rw-r--r-- | 0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch | 469 |
1 files changed, 469 insertions, 0 deletions
diff --git a/0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch b/0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch new file mode 100644 index 0000000..607c83c --- /dev/null +++ b/0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch @@ -0,0 +1,469 @@ +From 8394394bd26c7be6129b9a4e673d2a3530d9efde Mon Sep 17 00:00:00 2001 +From: Christophe Lyon <christophe.lyon@arm.com> +Date: Fri, 11 Mar 2022 16:21:02 +0000 +Subject: [PATCH 057/157] [Backport][SME] aarch64: Add backend support for DFP + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0dc8e1e7026d9b8ec8b669c051786d426a52cd22 + +This patch updates the aarch64 backend as needed to support DFP modes +(SD, DD and TD). + +Changes v1->v2: + +* Drop support for DFP modes in + aarch64_gen_{load||store}[wb]_pair as these are only used in + prologue/epilogue where DFP modes are not used. Drop the + changes to the corresponding patterns in aarch64.md, and + useless GPF_PAIR iterator. + +* In aarch64_reinterpret_float_as_int, handle DDmode the same way + as DFmode (needed in case the representation of the + floating-point value can be loaded using mov/movk. + +* In aarch64_float_const_zero_rtx_p, reject constants with DFP + mode: when X is zero, the callers want to emit either '0' or + 'zr' depending on the context, which is not the way 0.0 is + represented in DFP mode (in particular fmov d0, #0 is not right + for DFP). + +* In aarch64_legitimate_constant_p, accept DFP + +2022-03-31 Christophe Lyon <christophe.lyon@arm.com> + + gcc/ + * config/aarch64/aarch64.cc + (aarch64_split_128bit_move): Handle DFP modes. + (aarch64_mode_valid_for_sched_fusion_p): Likewise. + (aarch64_classify_address): Likewise. + (aarch64_legitimize_address_displacement): Likewise. + (aarch64_reinterpret_float_as_int): Likewise. + (aarch64_float_const_zero_rtx_p): Likewise. + (aarch64_can_const_movi_rtx_p): Likewise. + (aarch64_anchor_offset): Likewise. + (aarch64_secondary_reload): Likewise. + (aarch64_rtx_costs): Likewise. + (aarch64_legitimate_constant_p): Likewise. + (aarch64_gimplify_va_arg_expr): Likewise. + (aapcs_vfp_sub_candidate): Likewise. + (aarch64_vfp_is_call_or_return_candidate): Likewise. + (aarch64_output_scalar_simd_mov_immediate): Likewise. + (aarch64_gen_adjusted_ldpstp): Likewise. + (aarch64_scalar_mode_supported_p): Accept DFP modes if enabled. + * config/aarch64/aarch64.md + (movsf_aarch64): Use SFD iterator and rename into + mov<mode>_aarch64. + (movdf_aarch64): Use DFD iterator and rename into + mov<mode>_aarch64. + (movtf_aarch64): Use TFD iterator and rename into + mov<mode>_aarch64. + (split pattern for move TF mode): Use TFD iterator. + * config/aarch64/iterators.md + (GPF_TF_F16_MOV): Add DFP modes. + (SFD, DFD, TFD): New iterators. + (GPF_TF): Add DFP modes. + (TX, DX, DX2): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 82 ++++++++++++++++++++++----------- + gcc/config/aarch64/aarch64.md | 34 +++++++------- + gcc/config/aarch64/iterators.md | 24 +++++++--- + 3 files changed, 89 insertions(+), 51 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 055b436b1..02210ed13 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -5068,7 +5068,7 @@ aarch64_split_128bit_move (rtx dst, rtx src) + + machine_mode mode = GET_MODE (dst); + +- gcc_assert (mode == TImode || mode == TFmode); ++ gcc_assert (mode == TImode || mode == TFmode || mode == TDmode); + gcc_assert (!(side_effects_p (src) || side_effects_p (dst))); + gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode); + +@@ -10834,6 +10834,7 @@ aarch64_mode_valid_for_sched_fusion_p (machine_mode mode) + { + return mode == SImode || mode == DImode + || mode == SFmode || mode == DFmode ++ || mode == SDmode || mode == DDmode + || (aarch64_vector_mode_supported_p (mode) + && (known_eq (GET_MODE_SIZE (mode), 8) + || (known_eq (GET_MODE_SIZE (mode), 16) +@@ -10876,12 +10877,13 @@ aarch64_classify_address (struct aarch64_address_info *info, + vec_flags &= ~VEC_PARTIAL; + + /* On BE, we use load/store pair for all large int mode load/stores. +- TI/TFmode may also use a load/store pair. */ ++ TI/TF/TDmode may also use a load/store pair. */ + bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT)); + bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP + || type == ADDR_QUERY_LDP_STP_N + || mode == TImode + || mode == TFmode ++ || mode == TDmode + || (BYTES_BIG_ENDIAN && advsimd_struct_p)); + /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode + corresponds to the actual size of the memory being loaded/stored and the +@@ -10955,7 +10957,7 @@ aarch64_classify_address (struct aarch64_address_info *info, + info->offset = op1; + info->const_offset = offset; + +- /* TImode and TFmode values are allowed in both pairs of X ++ /* TImode, TFmode and TDmode values are allowed in both pairs of X + registers and individual Q registers. The available + address modes are: + X,X: 7-bit signed scaled offset +@@ -10964,7 +10966,7 @@ aarch64_classify_address (struct aarch64_address_info *info, + When performing the check for pairs of X registers i.e. LDP/STP + pass down DImode since that is the natural size of the LDP/STP + instruction memory accesses. */ +- if (mode == TImode || mode == TFmode) ++ if (mode == TImode || mode == TFmode || mode == TDmode) + return (aarch64_offset_7bit_signed_scaled_p (DImode, offset) + && (aarch64_offset_9bit_signed_unscaled_p (mode, offset) + || offset_12bit_unsigned_scaled_p (mode, offset))); +@@ -11087,14 +11089,14 @@ aarch64_classify_address (struct aarch64_address_info *info, + info->offset = XEXP (XEXP (x, 1), 1); + info->const_offset = offset; + +- /* TImode and TFmode values are allowed in both pairs of X ++ /* TImode, TFmode and TDmode values are allowed in both pairs of X + registers and individual Q registers. The available + address modes are: + X,X: 7-bit signed scaled offset + Q: 9-bit signed offset + We conservatively require an offset representable in either mode. + */ +- if (mode == TImode || mode == TFmode) ++ if (mode == TImode || mode == TFmode || mode == TDmode) + return (aarch64_offset_7bit_signed_scaled_p (mode, offset) + && aarch64_offset_9bit_signed_unscaled_p (mode, offset)); + +@@ -11256,9 +11258,9 @@ aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2, + offset. Use 4KB range for 1- and 2-byte accesses and a 16KB + range otherwise to increase opportunities for sharing the base + address of different sizes. Unaligned accesses use the signed +- 9-bit range, TImode/TFmode use the intersection of signed ++ 9-bit range, TImode/TFmode/TDmode use the intersection of signed + scaled 7-bit and signed 9-bit offset. */ +- if (mode == TImode || mode == TFmode) ++ if (mode == TImode || mode == TFmode || mode == TDmode) + second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100; + else if ((const_offset & (size - 1)) != 0) + second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100; +@@ -11339,7 +11341,7 @@ aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval) + CONST_DOUBLE_REAL_VALUE (value), + REAL_MODE_FORMAT (mode)); + +- if (mode == DFmode) ++ if (mode == DFmode || mode == DDmode) + { + int order = BYTES_BIG_ENDIAN ? 1 : 0; + ival = zext_hwi (res[order], 32); +@@ -11380,11 +11382,15 @@ aarch64_float_const_rtx_p (rtx x) + return false; + } + +-/* Return TRUE if rtx X is immediate constant 0.0 */ ++/* Return TRUE if rtx X is immediate constant 0.0 (but not in Decimal ++ Floating Point). */ + bool + aarch64_float_const_zero_rtx_p (rtx x) + { +- if (GET_MODE (x) == VOIDmode) ++ /* 0.0 in Decimal Floating Point cannot be represented by #0 or ++ zr as our callers expect, so no need to check the actual ++ value if X is of Decimal Floating Point type. */ ++ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_DECIMAL_FLOAT) + return false; + + if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x))) +@@ -11422,7 +11428,7 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode) + else + return false; + +- /* use a 64 bit mode for everything except for DI/DF mode, where we use ++ /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use + a 128 bit vector mode. */ + int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64; + +@@ -12628,7 +12634,7 @@ aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size, + if (IN_RANGE (offset, -256, 0)) + return 0; + +- if (mode == TImode || mode == TFmode) ++ if (mode == TImode || mode == TFmode || mode == TDmode) + return (offset + 0x100) & ~0x1ff; + + /* Use 12-bit offset by access size. */ +@@ -12737,7 +12743,9 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + + /* Without the TARGET_SIMD instructions we cannot move a Q register + to a Q register directly. We need a scratch. */ +- if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x) ++ if (REG_P (x) ++ && (mode == TFmode || mode == TImode || mode == TDmode) ++ && mode == GET_MODE (x) + && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD + && reg_class_subset_p (rclass, FP_REGS)) + { +@@ -12745,14 +12753,16 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + return NO_REGS; + } + +- /* A TFmode or TImode memory access should be handled via an FP_REGS ++ /* A TFmode, TImode or TDmode memory access should be handled via an FP_REGS + because AArch64 has richer addressing modes for LDR/STR instructions + than LDP/STP instructions. */ + if (TARGET_FLOAT && rclass == GENERAL_REGS + && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x)) + return FP_REGS; + +- if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x)) ++ if (rclass == FP_REGS ++ && (mode == TImode || mode == TFmode || mode == TDmode) ++ && CONSTANT_P(x)) + return GENERAL_REGS; + + return NO_REGS; +@@ -13883,9 +13893,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, + *cost += extra_cost->ldst.storev; + else if (GET_MODE_CLASS (mode) == MODE_INT) + *cost += extra_cost->ldst.store; +- else if (mode == SFmode) ++ else if (mode == SFmode || mode == SDmode) + *cost += extra_cost->ldst.storef; +- else if (mode == DFmode) ++ else if (mode == DFmode || mode == DDmode) + *cost += extra_cost->ldst.stored; + + *cost += +@@ -14009,11 +14019,11 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, + /* mov[df,sf]_aarch64. */ + if (aarch64_float_const_representable_p (x)) + /* FMOV (scalar immediate). */ +- *cost += extra_cost->fp[mode == DFmode].fpconst; ++ *cost += extra_cost->fp[mode == DFmode || mode == DDmode].fpconst; + else if (!aarch64_float_const_zero_rtx_p (x)) + { + /* This will be a load from memory. */ +- if (mode == DFmode) ++ if (mode == DFmode || mode == DDmode) + *cost += extra_cost->ldst.loadd; + else + *cost += extra_cost->ldst.loadf; +@@ -14039,9 +14049,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, + *cost += extra_cost->ldst.loadv; + else if (GET_MODE_CLASS (mode) == MODE_INT) + *cost += extra_cost->ldst.load; +- else if (mode == SFmode) ++ else if (mode == SFmode || mode == SDmode) + *cost += extra_cost->ldst.loadf; +- else if (mode == DFmode) ++ else if (mode == DFmode || mode == DDmode) + *cost += extra_cost->ldst.loadd; + + *cost += +@@ -19623,7 +19633,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x) + { + /* Support CSE and rematerialization of common constants. */ + if (CONST_INT_P (x) +- || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)) ++ || CONST_DOUBLE_P (x)) + return true; + + /* Only accept variable-length vector constants if they can be +@@ -20064,6 +20074,18 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + field_t = long_double_type_node; + field_ptr_t = long_double_ptr_type_node; + break; ++ case SDmode: ++ field_t = dfloat32_type_node; ++ field_ptr_t = build_pointer_type (dfloat32_type_node); ++ break; ++ case DDmode: ++ field_t = dfloat64_type_node; ++ field_ptr_t = build_pointer_type (dfloat64_type_node); ++ break; ++ case TDmode: ++ field_t = dfloat128_type_node; ++ field_ptr_t = build_pointer_type (dfloat128_type_node); ++ break; + case E_HFmode: + field_t = aarch64_fp16_type_node; + field_ptr_t = aarch64_fp16_ptr_type_node; +@@ -20315,7 +20337,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep, + case REAL_TYPE: + mode = TYPE_MODE (type); + if (mode != DFmode && mode != SFmode +- && mode != TFmode && mode != HFmode) ++ && mode != TFmode && mode != HFmode ++ && mode != SDmode && mode != DDmode && mode != TDmode) + return -1; + + if (*modep == VOIDmode) +@@ -20631,7 +20654,9 @@ aarch64_vfp_is_call_or_return_candidate (machine_mode mode, + machine_mode new_mode = VOIDmode; + bool composite_p = aarch64_composite_type_p (type, mode); + +- if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT) ++ if ((!composite_p ++ && (GET_MODE_CLASS (mode) == MODE_FLOAT ++ || GET_MODE_CLASS (mode) == MODE_DECIMAL_FLOAT)) + || aarch64_short_vector_p (type, mode)) + { + *count = 1; +@@ -23565,7 +23590,7 @@ aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode) + } + + machine_mode vmode; +- /* use a 64 bit mode for everything except for DI/DF mode, where we use ++ /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use + a 128 bit vector mode. */ + int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64; + +@@ -26417,7 +26442,7 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, + base_off = (off_val_1 + off_val_3) / 2; + else + /* However, due to issues with negative LDP/STP offset generation for +- larger modes, for DF, DI and vector modes. we must not use negative ++ larger modes, for DF, DD, DI and vector modes. we must not use negative + addresses smaller than 9 signed unadjusted bits can store. This + provides the most range in this case. */ + base_off = off_val_1; +@@ -26695,6 +26720,9 @@ aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode) + static bool + aarch64_scalar_mode_supported_p (scalar_mode mode) + { ++ if (DECIMAL_FLOAT_MODE_P (mode)) ++ return default_decimal_float_supported_p (); ++ + return (mode == HFmode + ? true + : default_scalar_mode_supported_p (mode)); +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index a78476c8a..8757a962f 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -1476,11 +1476,11 @@ + (set_attr "arch" "simd,fp16,simd,*,simd,*,simd,*,fp16,simd,*,*,*,*,*")] + ) + +-(define_insn "*movsf_aarch64" +- [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r") +- (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))] +- "TARGET_FLOAT && (register_operand (operands[0], SFmode) +- || aarch64_reg_or_fp_zero (operands[1], SFmode))" ++(define_insn "*mov<mode>_aarch64" ++ [(set (match_operand:SFD 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r") ++ (match_operand:SFD 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))] ++ "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) ++ || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" + "@ + movi\\t%0.2s, #0 + fmov\\t%s0, %w1 +@@ -1500,11 +1500,11 @@ + (set_attr "arch" "simd,*,*,*,*,simd,*,*,*,*,*,*")] + ) + +-(define_insn "*movdf_aarch64" +- [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r") +- (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))] +- "TARGET_FLOAT && (register_operand (operands[0], DFmode) +- || aarch64_reg_or_fp_zero (operands[1], DFmode))" ++(define_insn "*mov<mode>_aarch64" ++ [(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r") ++ (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))] ++ "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) ++ || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" + "@ + movi\\t%d0, #0 + fmov\\t%d0, %x1 +@@ -1545,13 +1545,13 @@ + } + ) + +-(define_insn "*movtf_aarch64" +- [(set (match_operand:TF 0 ++(define_insn "*mov<mode>_aarch64" ++ [(set (match_operand:TFD 0 + "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m") +- (match_operand:TF 1 ++ (match_operand:TFD 1 + "general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))] +- "TARGET_FLOAT && (register_operand (operands[0], TFmode) +- || aarch64_reg_or_fp_zero (operands[1], TFmode))" ++ "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) ++ || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" + "@ + mov\\t%0.16b, %1.16b + # +@@ -1571,8 +1571,8 @@ + ) + + (define_split +- [(set (match_operand:TF 0 "register_operand" "") +- (match_operand:TF 1 "nonmemory_operand" ""))] ++ [(set (match_operand:TFD 0 "register_operand" "") ++ (match_operand:TFD 1 "nonmemory_operand" ""))] + "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])" + [(const_int 0)] + { +diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md +index 967e6b0b1..d0cd1b788 100644 +--- a/gcc/config/aarch64/iterators.md ++++ b/gcc/config/aarch64/iterators.md +@@ -67,14 +67,24 @@ + (define_mode_iterator GPF_TF_F16 [HF SF DF TF]) + + ;; Iterator for all scalar floating point modes suitable for moving, including +-;; special BF type (HF, SF, DF, TF and BF) +-(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF]) ++;; special BF type and decimal floating point types (HF, SF, DF, TF, BF, ++;; SD, DD and TD) ++(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF SD DD TD]) ++ ++;; Iterator for scalar 32bit fp modes (SF, SD) ++(define_mode_iterator SFD [SD SF]) ++ ++;; Iterator for scalar 64bit fp modes (DF, DD) ++(define_mode_iterator DFD [DD DF]) ++ ++;; Iterator for scalar 128bit fp modes (TF, TD) ++(define_mode_iterator TFD [TD TF]) + + ;; Double vector modes. + (define_mode_iterator VDF [V2SF V4HF]) + +-;; Iterator for all scalar floating point modes (SF, DF and TF) +-(define_mode_iterator GPF_TF [SF DF TF]) ++;; Iterator for all scalar floating point modes (SF, DF, TF, SD, DD, and TD) ++(define_mode_iterator GPF_TF [SF DF TF SD DD TD]) + + ;; Integer Advanced SIMD modes. + (define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI]) +@@ -301,7 +311,7 @@ + ;; 2 and 4 lane SI modes. + (define_mode_iterator VS [V2SI V4SI]) + +-(define_mode_iterator TX [TI TF]) ++(define_mode_iterator TX [TI TF TD]) + + ;; Advanced SIMD opaque structure modes. + (define_mode_iterator VSTRUCT [OI CI XI]) +@@ -403,10 +413,10 @@ + V4x8HF V4x4SF V4x2DF V4x8BF]) + + ;; Double scalar modes +-(define_mode_iterator DX [DI DF]) ++(define_mode_iterator DX [DI DF DD]) + + ;; Duplicate of the above +-(define_mode_iterator DX2 [DI DF]) ++(define_mode_iterator DX2 [DI DF DD]) + + ;; Single scalar modes + (define_mode_iterator SX [SI SF]) +-- +2.33.0 + |