summaryrefslogtreecommitdiff
path: root/0009-Backport-expand-Simplify-removing-subregs-when-expan.patch
blob: 434c669691c5d5caebdaa8ce886ebb61cdfdac00 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
From 7bc78d0ab13c37e2b11adb385d9916181ec4cc20 Mon Sep 17 00:00:00 2001
From: zhanghaijian <z.zhanghaijian@huawei.com>
Date: Thu, 15 Jul 2021 09:04:55 +0800
Subject: [PATCH 09/13] [Backport]expand: Simplify removing subregs when
 expanding a copy [PR95254]

Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9a182ef9ee011935d827ab5c6c9a7cd8e22257d8

In rtl expand, if we have a copy that matches one of the following patterns:
  (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
  (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
  (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
  (set (subreg:M1 (reg:M2 ...)) (constant C))
where mode M1 is equal in size to M2, try to detect whether the mode change
involves an implicit round trip through memory.  If so, see if we can avoid
that by removing the subregs and doing the move in mode M2 instead.

diff --git a/gcc/expr.c b/gcc/expr.c
index 991b26f3341..d66fdd4e93d 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -3814,6 +3814,78 @@ emit_move_insn (rtx x, rtx y)
   gcc_assert (mode != BLKmode
 	      && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
 
+  /* If we have a copy that looks like one of the following patterns:
+       (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
+       (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
+       (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
+       (set (subreg:M1 (reg:M2 ...)) (constant C))
+     where mode M1 is equal in size to M2, try to detect whether the
+     mode change involves an implicit round trip through memory.
+     If so, see if we can avoid that by removing the subregs and
+     doing the move in mode M2 instead.  */
+
+  rtx x_inner = NULL_RTX;
+  rtx y_inner = NULL_RTX;
+
+#define CANDIDATE_SUBREG_P(subreg) \
+  (REG_P (SUBREG_REG (subreg)) \
+   && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg))), \
+               GET_MODE_SIZE (GET_MODE (subreg))) \
+   && optab_handler (mov_optab, GET_MODE (SUBREG_REG (subreg))) \
+      != CODE_FOR_nothing)
+
+#define CANDIDATE_MEM_P(innermode, mem) \
+  (!targetm.can_change_mode_class ((innermode), GET_MODE (mem), ALL_REGS) \
+   && !push_operand ((mem), GET_MODE (mem))                              \
+   /* Not a candiate if innermode requires too much alignment.  */       \
+   && (MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (innermode)                 \
+       || targetm.slow_unaligned_access (GET_MODE (mem),                 \
+                                        MEM_ALIGN (mem))                 \
+       || !targetm.slow_unaligned_access ((innermode),                   \
+                                         MEM_ALIGN (mem))))
+
+  if (SUBREG_P (x) && CANDIDATE_SUBREG_P (x))
+    x_inner = SUBREG_REG (x);
+
+  if (SUBREG_P (y) && CANDIDATE_SUBREG_P (y))
+    y_inner = SUBREG_REG (y);
+
+  if (x_inner != NULL_RTX
+      && y_inner != NULL_RTX
+      && GET_MODE (x_inner) == GET_MODE (y_inner)
+      && !targetm.can_change_mode_class (GET_MODE (x_inner), mode, ALL_REGS))
+    {
+      x = x_inner;
+      y = y_inner;
+      mode = GET_MODE (x_inner);
+    }
+  else if (x_inner != NULL_RTX
+          && MEM_P (y)
+          && CANDIDATE_MEM_P (GET_MODE (x_inner), y))
+    {
+      x = x_inner;
+      y = adjust_address (y, GET_MODE (x_inner), 0);
+      mode = GET_MODE (x_inner);
+    }
+  else if (y_inner != NULL_RTX
+          && MEM_P (x)
+          && CANDIDATE_MEM_P (GET_MODE (y_inner), x))
+    {
+      x = adjust_address (x, GET_MODE (y_inner), 0);
+      y = y_inner;
+      mode = GET_MODE (y_inner);
+    }
+  else if (x_inner != NULL_RTX
+          && CONSTANT_P (y)
+          && !targetm.can_change_mode_class (GET_MODE (x_inner),
+                                             mode, ALL_REGS)
+          && (y_inner = simplify_subreg (GET_MODE (x_inner), y, mode, 0)))
+    {
+      x = x_inner;
+      y = y_inner;
+      mode = GET_MODE (x_inner);
+    }
+
   if (CONSTANT_P (y))
     {
       if (optimize
diff --git a/gcc/testsuite/gcc.target/aarch64/pr95254.c b/gcc/testsuite/gcc.target/aarch64/pr95254.c
new file mode 100644
index 00000000000..10bfc868197
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr95254.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-slp-vectorize -march=armv8.2-a+sve -msve-vector-bits=256" } */
+
+typedef short __attribute__((vector_size (8))) v4hi;
+
+typedef union U4HI { v4hi v; short a[4]; } u4hi;
+
+short b[4];
+
+void pass_v4hi (v4hi v)
+{
+    int i;
+    u4hi u;
+    u.v = v;
+    for (i = 0; i < 4; i++)
+      b[i] = u.a[i];
+};
+
+/* { dg-final { scan-assembler-not "ptrue" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67609.c b/gcc/testsuite/gcc.target/i386/pr67609.c
index 518071bdd86..398cdba5d5f 100644
--- a/gcc/testsuite/gcc.target/i386/pr67609.c
+++ b/gcc/testsuite/gcc.target/i386/pr67609.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -msse2" } */
 /* { dg-require-effective-target lp64 } */
-/* { dg-final { scan-assembler "movdqa" } } */
+/* { dg-final { scan-assembler "movq\t%xmm0" } } */
 
 #include <emmintrin.h>
 __m128d reg;
-- 
2.21.0.windows.1