source: icGREP/icgrep-devel/llvm-3.8.0.src/test/CodeGen/AArch64/arm64-vqadd.ll @ 5027

Last change on this file since 5027 was 5027, checked in by cameron, 3 years ago

Upgrade to llvm 3.8

File size: 12.3 KB
Line 
1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
2
3define <8 x i8> @sqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
4;CHECK-LABEL: sqadd8b:
5;CHECK: sqadd.8b
6        %tmp1 = load <8 x i8>, <8 x i8>* %A
7        %tmp2 = load <8 x i8>, <8 x i8>* %B
8        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
9        ret <8 x i8> %tmp3
10}
11
12define <4 x i16> @sqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
13;CHECK-LABEL: sqadd4h:
14;CHECK: sqadd.4h
15        %tmp1 = load <4 x i16>, <4 x i16>* %A
16        %tmp2 = load <4 x i16>, <4 x i16>* %B
17        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
18        ret <4 x i16> %tmp3
19}
20
21define <2 x i32> @sqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
22;CHECK-LABEL: sqadd2s:
23;CHECK: sqadd.2s
24        %tmp1 = load <2 x i32>, <2 x i32>* %A
25        %tmp2 = load <2 x i32>, <2 x i32>* %B
26        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
27        ret <2 x i32> %tmp3
28}
29
30define <8 x i8> @uqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
31;CHECK-LABEL: uqadd8b:
32;CHECK: uqadd.8b
33        %tmp1 = load <8 x i8>, <8 x i8>* %A
34        %tmp2 = load <8 x i8>, <8 x i8>* %B
35        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
36        ret <8 x i8> %tmp3
37}
38
39define <4 x i16> @uqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
40;CHECK-LABEL: uqadd4h:
41;CHECK: uqadd.4h
42        %tmp1 = load <4 x i16>, <4 x i16>* %A
43        %tmp2 = load <4 x i16>, <4 x i16>* %B
44        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
45        ret <4 x i16> %tmp3
46}
47
48define <2 x i32> @uqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
49;CHECK-LABEL: uqadd2s:
50;CHECK: uqadd.2s
51        %tmp1 = load <2 x i32>, <2 x i32>* %A
52        %tmp2 = load <2 x i32>, <2 x i32>* %B
53        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
54        ret <2 x i32> %tmp3
55}
56
57define <16 x i8> @sqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
58;CHECK-LABEL: sqadd16b:
59;CHECK: sqadd.16b
60        %tmp1 = load <16 x i8>, <16 x i8>* %A
61        %tmp2 = load <16 x i8>, <16 x i8>* %B
62        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
63        ret <16 x i8> %tmp3
64}
65
66define <8 x i16> @sqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
67;CHECK-LABEL: sqadd8h:
68;CHECK: sqadd.8h
69        %tmp1 = load <8 x i16>, <8 x i16>* %A
70        %tmp2 = load <8 x i16>, <8 x i16>* %B
71        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
72        ret <8 x i16> %tmp3
73}
74
75define <4 x i32> @sqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
76;CHECK-LABEL: sqadd4s:
77;CHECK: sqadd.4s
78        %tmp1 = load <4 x i32>, <4 x i32>* %A
79        %tmp2 = load <4 x i32>, <4 x i32>* %B
80        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
81        ret <4 x i32> %tmp3
82}
83
84define <2 x i64> @sqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
85;CHECK-LABEL: sqadd2d:
86;CHECK: sqadd.2d
87        %tmp1 = load <2 x i64>, <2 x i64>* %A
88        %tmp2 = load <2 x i64>, <2 x i64>* %B
89        %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
90        ret <2 x i64> %tmp3
91}
92
93define <16 x i8> @uqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
94;CHECK-LABEL: uqadd16b:
95;CHECK: uqadd.16b
96        %tmp1 = load <16 x i8>, <16 x i8>* %A
97        %tmp2 = load <16 x i8>, <16 x i8>* %B
98        %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
99        ret <16 x i8> %tmp3
100}
101
102define <8 x i16> @uqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
103;CHECK-LABEL: uqadd8h:
104;CHECK: uqadd.8h
105        %tmp1 = load <8 x i16>, <8 x i16>* %A
106        %tmp2 = load <8 x i16>, <8 x i16>* %B
107        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
108        ret <8 x i16> %tmp3
109}
110
111define <4 x i32> @uqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
112;CHECK-LABEL: uqadd4s:
113;CHECK: uqadd.4s
114        %tmp1 = load <4 x i32>, <4 x i32>* %A
115        %tmp2 = load <4 x i32>, <4 x i32>* %B
116        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
117        ret <4 x i32> %tmp3
118}
119
120define <2 x i64> @uqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
121;CHECK-LABEL: uqadd2d:
122;CHECK: uqadd.2d
123        %tmp1 = load <2 x i64>, <2 x i64>* %A
124        %tmp2 = load <2 x i64>, <2 x i64>* %B
125        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
126        ret <2 x i64> %tmp3
127}
128
129declare <8 x i8>  @llvm.aarch64.neon.sqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
130declare <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
131declare <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
132declare <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
133
134declare <8 x i8>  @llvm.aarch64.neon.uqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
135declare <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
136declare <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
137declare <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
138
139declare <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
140declare <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
141declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
142declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
143
144declare <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
145declare <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
146declare <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
147declare <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
148
149define <8 x i8> @usqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
150;CHECK-LABEL: usqadd8b:
151;CHECK: usqadd.8b
152        %tmp1 = load <8 x i8>, <8 x i8>* %A
153        %tmp2 = load <8 x i8>, <8 x i8>* %B
154        %tmp3 = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
155        ret <8 x i8> %tmp3
156}
157
158define <4 x i16> @usqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
159;CHECK-LABEL: usqadd4h:
160;CHECK: usqadd.4h
161        %tmp1 = load <4 x i16>, <4 x i16>* %A
162        %tmp2 = load <4 x i16>, <4 x i16>* %B
163        %tmp3 = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
164        ret <4 x i16> %tmp3
165}
166
167define <2 x i32> @usqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
168;CHECK-LABEL: usqadd2s:
169;CHECK: usqadd.2s
170        %tmp1 = load <2 x i32>, <2 x i32>* %A
171        %tmp2 = load <2 x i32>, <2 x i32>* %B
172        %tmp3 = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
173        ret <2 x i32> %tmp3
174}
175
176define <16 x i8> @usqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
177;CHECK-LABEL: usqadd16b:
178;CHECK: usqadd.16b
179        %tmp1 = load <16 x i8>, <16 x i8>* %A
180        %tmp2 = load <16 x i8>, <16 x i8>* %B
181        %tmp3 = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
182        ret <16 x i8> %tmp3
183}
184
185define <8 x i16> @usqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
186;CHECK-LABEL: usqadd8h:
187;CHECK: usqadd.8h
188        %tmp1 = load <8 x i16>, <8 x i16>* %A
189        %tmp2 = load <8 x i16>, <8 x i16>* %B
190        %tmp3 = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
191        ret <8 x i16> %tmp3
192}
193
194define <4 x i32> @usqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
195;CHECK-LABEL: usqadd4s:
196;CHECK: usqadd.4s
197        %tmp1 = load <4 x i32>, <4 x i32>* %A
198        %tmp2 = load <4 x i32>, <4 x i32>* %B
199        %tmp3 = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
200        ret <4 x i32> %tmp3
201}
202
203define <2 x i64> @usqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
204;CHECK-LABEL: usqadd2d:
205;CHECK: usqadd.2d
206        %tmp1 = load <2 x i64>, <2 x i64>* %A
207        %tmp2 = load <2 x i64>, <2 x i64>* %B
208        %tmp3 = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
209        ret <2 x i64> %tmp3
210}
211
212define i64 @usqadd_d(i64 %l, i64 %r) nounwind {
213; CHECK-LABEL: usqadd_d:
214; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}}
215  %sum = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %l, i64 %r)
216  ret i64 %sum
217}
218
219define i32 @usqadd_s(i32 %l, i32 %r) nounwind {
220; CHECK-LABEL: usqadd_s:
221; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}}
222  %sum = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %l, i32 %r)
223  ret i32 %sum
224}
225
226declare <8 x i8>  @llvm.aarch64.neon.usqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
227declare <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
228declare <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
229declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
230declare i64 @llvm.aarch64.neon.usqadd.i64(i64, i64) nounwind readnone
231declare i32 @llvm.aarch64.neon.usqadd.i32(i32, i32) nounwind readnone
232
233declare <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
234declare <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
235declare <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
236declare <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
237
238define <8 x i8> @suqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
239;CHECK-LABEL: suqadd8b:
240;CHECK: suqadd.8b
241        %tmp1 = load <8 x i8>, <8 x i8>* %A
242        %tmp2 = load <8 x i8>, <8 x i8>* %B
243        %tmp3 = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
244        ret <8 x i8> %tmp3
245}
246
247define <4 x i16> @suqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
248;CHECK-LABEL: suqadd4h:
249;CHECK: suqadd.4h
250        %tmp1 = load <4 x i16>, <4 x i16>* %A
251        %tmp2 = load <4 x i16>, <4 x i16>* %B
252        %tmp3 = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
253        ret <4 x i16> %tmp3
254}
255
256define <2 x i32> @suqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
257;CHECK-LABEL: suqadd2s:
258;CHECK: suqadd.2s
259        %tmp1 = load <2 x i32>, <2 x i32>* %A
260        %tmp2 = load <2 x i32>, <2 x i32>* %B
261        %tmp3 = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
262        ret <2 x i32> %tmp3
263}
264
265define <16 x i8> @suqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
266;CHECK-LABEL: suqadd16b:
267;CHECK: suqadd.16b
268        %tmp1 = load <16 x i8>, <16 x i8>* %A
269        %tmp2 = load <16 x i8>, <16 x i8>* %B
270        %tmp3 = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
271        ret <16 x i8> %tmp3
272}
273
274define <8 x i16> @suqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
275;CHECK-LABEL: suqadd8h:
276;CHECK: suqadd.8h
277        %tmp1 = load <8 x i16>, <8 x i16>* %A
278        %tmp2 = load <8 x i16>, <8 x i16>* %B
279        %tmp3 = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
280        ret <8 x i16> %tmp3
281}
282
283define <4 x i32> @suqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
284;CHECK-LABEL: suqadd4s:
285;CHECK: suqadd.4s
286        %tmp1 = load <4 x i32>, <4 x i32>* %A
287        %tmp2 = load <4 x i32>, <4 x i32>* %B
288        %tmp3 = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
289        ret <4 x i32> %tmp3
290}
291
292define <2 x i64> @suqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
293;CHECK-LABEL: suqadd2d:
294;CHECK: suqadd.2d
295        %tmp1 = load <2 x i64>, <2 x i64>* %A
296        %tmp2 = load <2 x i64>, <2 x i64>* %B
297        %tmp3 = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
298        ret <2 x i64> %tmp3
299}
300
301define <1 x i64> @suqadd_1d(<1 x i64> %l, <1 x i64> %r) nounwind {
302; CHECK-LABEL: suqadd_1d:
303; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
304  %sum = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %l, <1 x i64> %r)
305  ret <1 x i64> %sum
306}
307
308define i64 @suqadd_d(i64 %l, i64 %r) nounwind {
309; CHECK-LABEL: suqadd_d:
310; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
311  %sum = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %l, i64 %r)
312  ret i64 %sum
313}
314
315define i32 @suqadd_s(i32 %l, i32 %r) nounwind {
316; CHECK-LABEL: suqadd_s:
317; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}}
318  %sum = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %l, i32 %r)
319  ret i32 %sum
320}
321
322declare <8 x i8>  @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
323declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
324declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
325declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
326declare i64 @llvm.aarch64.neon.suqadd.i64(i64, i64) nounwind readnone
327declare i32 @llvm.aarch64.neon.suqadd.i32(i32, i32) nounwind readnone
328
329declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
330declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
331declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
332declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
Note: See TracBrowser for help on using the repository browser.