source: icGREP/icgrep-devel/llvm-3.8.0.src/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll @ 5027

Last change on this file since 5027 was 5027, checked in by cameron, 3 years ago

Upgrade to llvm 3.8

File size: 13.0 KB
Line 
1; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-EVEN
2; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-ODD
3; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-EVEN
4; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-ODD
5
6; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so
7; our test strategy is to:
8;   * Force the pass to always perform register swapping even if the dest register is of the
9;     correct color already (-force-all)
10;   * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance),
11;     and run it twice, once where it always hints odd, and once where it always hints even.
12;
13; We then use regex magic to check that in the two cases the register allocation is
14; different; this is what gives us the testing coverage and distinguishes cases where
15; the pass has done some work versus accidental regalloc.
16
17target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
18target triple = "aarch64"
19
20; Non-overlapping groups - shouldn't need any changing at all.
21
22; CHECK-LABEL: f1:
23; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
24; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
25; CHECK: fmadd [[x]]
26; CHECK: fmsub [[x]]
27; CHECK: fmadd [[x]]
28; CHECK: str [[x]]
29
30define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 {
31entry:
32  %0 = load double, double* %p, align 8
33  %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
34  %1 = load double, double* %arrayidx1, align 8
35  %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
36  %2 = load double, double* %arrayidx2, align 8
37  %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
38  %3 = load double, double* %arrayidx3, align 8
39  %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
40  %4 = load double, double* %arrayidx4, align 8
41  %mul = fmul fast double %0, %1
42  %add = fadd fast double %mul, %4
43  %mul5 = fmul fast double %1, %2
44  %add6 = fadd fast double %mul5, %add
45  %mul7 = fmul fast double %1, %3
46  %sub = fsub fast double %add6, %mul7
47  %mul8 = fmul fast double %2, %3
48  %add9 = fadd fast double %mul8, %sub
49  store double %add9, double* %q, align 8
50  %arrayidx11 = getelementptr inbounds double, double* %p, i64 5
51  %5 = load double, double* %arrayidx11, align 8
52  %arrayidx12 = getelementptr inbounds double, double* %p, i64 6
53  %6 = load double, double* %arrayidx12, align 8
54  %arrayidx13 = getelementptr inbounds double, double* %p, i64 7
55  %7 = load double, double* %arrayidx13, align 8
56  %mul15 = fmul fast double %6, %7
57  %mul16 = fmul fast double %0, %5
58  %add17 = fadd fast double %mul16, %mul15
59  %mul18 = fmul fast double %5, %6
60  %add19 = fadd fast double %mul18, %add17
61  %arrayidx20 = getelementptr inbounds double, double* %q, i64 1
62  store double %add19, double* %arrayidx20, align 8
63  ret void
64}
65
66; Overlapping groups - coloring needed.
67
68; CHECK-LABEL: f2:
69; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
70; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]]
71; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
72; CHECK-ODD: fmul [[y:d[0-9]*[02468]]]
73; CHECK: fmadd [[x]]
74; CHECK: fmadd [[y]]
75; CHECK: fmsub [[x]]
76; CHECK: fmadd [[y]]
77; CHECK: fmadd [[x]]
78; CHECK-A57: stp [[x]], [[y]]
79; CHECK-A53-DAG: str [[x]]
80; CHECK-A53-DAG: str [[y]]
81
82define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 {
83entry:
84  %0 = load double, double* %p, align 8
85  %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
86  %1 = load double, double* %arrayidx1, align 8
87  %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
88  %2 = load double, double* %arrayidx2, align 8
89  %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
90  %3 = load double, double* %arrayidx3, align 8
91  %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
92  %4 = load double, double* %arrayidx4, align 8
93  %arrayidx5 = getelementptr inbounds double, double* %p, i64 5
94  %5 = load double, double* %arrayidx5, align 8
95  %arrayidx6 = getelementptr inbounds double, double* %p, i64 6
96  %6 = load double, double* %arrayidx6, align 8
97  %arrayidx7 = getelementptr inbounds double, double* %p, i64 7
98  %7 = load double, double* %arrayidx7, align 8
99  %mul = fmul fast double %0, %1
100  %add = fadd fast double %mul, %7
101  %mul8 = fmul fast double %5, %6
102  %mul9 = fmul fast double %1, %2
103  %add10 = fadd fast double %mul9, %add
104  %mul11 = fmul fast double %3, %4
105  %add12 = fadd fast double %mul11, %mul8
106  %mul13 = fmul fast double %1, %3
107  %sub = fsub fast double %add10, %mul13
108  %mul14 = fmul fast double %4, %5
109  %add15 = fadd fast double %mul14, %add12
110  %mul16 = fmul fast double %2, %3
111  %add17 = fadd fast double %mul16, %sub
112  store double %add17, double* %q, align 8
113  %arrayidx19 = getelementptr inbounds double, double* %q, i64 1
114  store double %add15, double* %arrayidx19, align 8
115  ret void
116}
117
118; Dest register is live on block exit - fixup needed.
119
120; CHECK-LABEL: f3:
121; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
122; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
123; CHECK: fmadd [[x]]
124; CHECK: fmsub [[x]]
125; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]]
126; CHECK: str [[y]]
127
128define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 {
129entry:
130  %0 = load double, double* %p, align 8
131  %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
132  %1 = load double, double* %arrayidx1, align 8
133  %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
134  %2 = load double, double* %arrayidx2, align 8
135  %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
136  %3 = load double, double* %arrayidx3, align 8
137  %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
138  %4 = load double, double* %arrayidx4, align 8
139  %mul = fmul fast double %0, %1
140  %add = fadd fast double %mul, %4
141  %mul5 = fmul fast double %1, %2
142  %add6 = fadd fast double %mul5, %add
143  %mul7 = fmul fast double %1, %3
144  %sub = fsub fast double %add6, %mul7
145  %mul8 = fmul fast double %2, %3
146  %add9 = fadd fast double %mul8, %sub
147  %cmp = fcmp oeq double %3, 0.000000e+00
148  br i1 %cmp, label %if.then, label %if.end
149
150if.then:                                          ; preds = %entry
151  tail call void bitcast (void (...)* @g to void ()*)() #2
152  br label %if.end
153
154if.end:                                           ; preds = %if.then, %entry
155  store double %add9, double* %q, align 8
156  ret void
157}
158
159declare void @g(...) #1
160
161; Single precision version of f2.
162
163; CHECK-LABEL: f4:
164; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
165; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]]
166; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
167; CHECK-ODD: fmul [[y:s[0-9]*[02468]]]
168; CHECK: fmadd [[x]]
169; CHECK: fmadd [[y]]
170; CHECK: fmsub [[x]]
171; CHECK: fmadd [[y]]
172; CHECK: fmadd [[x]]
173; CHECK-A57: stp [[x]], [[y]]
174; CHECK-A53-DAG: str [[x]]
175; CHECK-A53-DAG: str [[y]]
176
177define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 {
178entry:
179  %0 = load float, float* %p, align 4
180  %arrayidx1 = getelementptr inbounds float, float* %p, i64 1
181  %1 = load float, float* %arrayidx1, align 4
182  %arrayidx2 = getelementptr inbounds float, float* %p, i64 2
183  %2 = load float, float* %arrayidx2, align 4
184  %arrayidx3 = getelementptr inbounds float, float* %p, i64 3
185  %3 = load float, float* %arrayidx3, align 4
186  %arrayidx4 = getelementptr inbounds float, float* %p, i64 4
187  %4 = load float, float* %arrayidx4, align 4
188  %arrayidx5 = getelementptr inbounds float, float* %p, i64 5
189  %5 = load float, float* %arrayidx5, align 4
190  %arrayidx6 = getelementptr inbounds float, float* %p, i64 6
191  %6 = load float, float* %arrayidx6, align 4
192  %arrayidx7 = getelementptr inbounds float, float* %p, i64 7
193  %7 = load float, float* %arrayidx7, align 4
194  %mul = fmul fast float %0, %1
195  %add = fadd fast float %mul, %7
196  %mul8 = fmul fast float %5, %6
197  %mul9 = fmul fast float %1, %2
198  %add10 = fadd fast float %mul9, %add
199  %mul11 = fmul fast float %3, %4
200  %add12 = fadd fast float %mul11, %mul8
201  %mul13 = fmul fast float %1, %3
202  %sub = fsub fast float %add10, %mul13
203  %mul14 = fmul fast float %4, %5
204  %add15 = fadd fast float %mul14, %add12
205  %mul16 = fmul fast float %2, %3
206  %add17 = fadd fast float %mul16, %sub
207  store float %add17, float* %q, align 4
208  %arrayidx19 = getelementptr inbounds float, float* %q, i64 1
209  store float %add15, float* %arrayidx19, align 4
210  ret void
211}
212
213; Single precision version of f3
214
215; CHECK-LABEL: f5:
216; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
217; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
218; CHECK: fmadd [[x]]
219; CHECK: fmsub [[x]]
220; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]]
221; CHECK: str [[y]]
222
223define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 {
224entry:
225  %0 = load float, float* %p, align 4
226  %arrayidx1 = getelementptr inbounds float, float* %p, i64 1
227  %1 = load float, float* %arrayidx1, align 4
228  %arrayidx2 = getelementptr inbounds float, float* %p, i64 2
229  %2 = load float, float* %arrayidx2, align 4
230  %arrayidx3 = getelementptr inbounds float, float* %p, i64 3
231  %3 = load float, float* %arrayidx3, align 4
232  %arrayidx4 = getelementptr inbounds float, float* %p, i64 4
233  %4 = load float, float* %arrayidx4, align 4
234  %mul = fmul fast float %0, %1
235  %add = fadd fast float %mul, %4
236  %mul5 = fmul fast float %1, %2
237  %add6 = fadd fast float %mul5, %add
238  %mul7 = fmul fast float %1, %3
239  %sub = fsub fast float %add6, %mul7
240  %mul8 = fmul fast float %2, %3
241  %add9 = fadd fast float %mul8, %sub
242  %cmp = fcmp oeq float %3, 0.000000e+00
243  br i1 %cmp, label %if.then, label %if.end
244
245if.then:                                          ; preds = %entry
246  tail call void bitcast (void (...)* @g to void ()*)() #2
247  br label %if.end
248
249if.end:                                           ; preds = %if.then, %entry
250  store float %add9, float* %q, align 4
251  ret void
252}
253
254; Test that regmask clobbering stops a chain sequence.
255
256; CHECK-LABEL: f6:
257; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
258; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
259; CHECK: fmadd [[x]]
260; CHECK: fmsub [[x]]
261; CHECK: fmadd d0, {{.*}}, [[x]]
262; CHECK: bl hh
263; CHECK: str d0
264
265define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 {
266entry:
267  %0 = load double, double* %p, align 8
268  %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
269  %1 = load double, double* %arrayidx1, align 8
270  %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
271  %2 = load double, double* %arrayidx2, align 8
272  %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
273  %3 = load double, double* %arrayidx3, align 8
274  %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
275  %4 = load double, double* %arrayidx4, align 8
276  %mul = fmul fast double %0, %1
277  %add = fadd fast double %mul, %4
278  %mul5 = fmul fast double %1, %2
279  %add6 = fadd fast double %mul5, %add
280  %mul7 = fmul fast double %1, %3
281  %sub = fsub fast double %add6, %mul7
282  %mul8 = fmul fast double %2, %3
283  %add9 = fadd fast double %mul8, %sub
284  %call = tail call double @hh(double %add9) #2
285  store double %call, double* %q, align 8
286  ret void
287}
288
289declare double @hh(double) #1
290
291; Check that we correctly deal with repeated operands.
292; The following testcase creates:
293;   %D1<def> = FADDDrr %D0<kill>, %D0
294; We'll get a crash if we naively look at the first operand, remove it
295; from the substitution list then look at the second operand.
296
297; CHECK: fmadd [[x:d[0-9]+]]
298; CHECK: fadd d1, [[x]], [[x]]
299
300define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 {
301entry:
302  %0 = load double, double* %p, align 8
303  %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
304  %1 = load double, double* %arrayidx1, align 8
305  %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
306  %2 = load double, double* %arrayidx2, align 8
307  %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
308  %3 = load double, double* %arrayidx3, align 8
309  %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
310  %4 = load double, double* %arrayidx4, align 8
311  %mul = fmul fast double %0, %1
312  %add = fadd fast double %mul, %4
313  %mul5 = fmul fast double %1, %2
314  %add6 = fadd fast double %mul5, %add
315  %mul7 = fmul fast double %1, %3
316  %sub = fsub fast double %add6, %mul7
317  %mul8 = fmul fast double %2, %3
318  %add9 = fadd fast double %mul8, %sub
319  %add10 = fadd fast double %add9, %add9
320  call void @hhh(double 0.0, double %add10)
321  ret void
322}
323
324declare void @hhh(double, double)
325
326attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
327attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
328attributes #2 = { nounwind }
329
Note: See TracBrowser for help on using the repository browser.