source: icGREP/icgrep-devel/llvm-3.8.0.src/test/CodeGen/AMDGPU/store.ll @ 5027

Last change on this file since 5027 was 5027, checked in by cameron, 3 years ago

Upgrade to llvm 3.8

File size: 10.3 KB
Line 
1; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
5
6;===------------------------------------------------------------------------===;
7; Global Address Space
8;===------------------------------------------------------------------------===;
9; FUNC-LABEL: {{^}}store_i1:
10; EG: MEM_RAT MSKOR
11; SI: buffer_store_byte
12define void @store_i1(i1 addrspace(1)* %out) {
13entry:
14  store i1 true, i1 addrspace(1)* %out
15  ret void
16}
17
18; i8 store
19; FUNC-LABEL: {{^}}store_i8:
20; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
21
22; IG 0: Get the byte index and truncate the value
23; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
24; EG: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
25; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y
26; EG-NEXT: 3(4.203895e-45), 255(3.573311e-43)
27
28
29; IG 1: Truncate the calculated the shift amount for the mask
30
31; IG 2: Shift the value and the mask
32; EG: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]]
33; EG: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
34; EG-NEXT: 255
35; IG 3: Initialize the Y and Z channels to zero
36;       XXX: An optimal scheduler should merge this into one of the prevous IGs.
37; EG: MOV T[[RW_GPR]].Y, 0.0
38; EG: MOV * T[[RW_GPR]].Z, 0.0
39
40; SI: buffer_store_byte
41
42define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
43entry:
44  store i8 %in, i8 addrspace(1)* %out
45  ret void
46}
47
48; i16 store
49; FUNC-LABEL: {{^}}store_i16:
50; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
51
52; IG 0: Get the byte index and truncate the value
53
54
55; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
56; EG-NEXT: 3(4.203895e-45),
57
58; EG: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
59; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y
60
61; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
62; IG 1: Truncate the calculated the shift amount for the mask
63
64; IG 2: Shift the value and the mask
65; EG: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]]
66; EG: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
67; EG-NEXT: 65535
68; IG 3: Initialize the Y and Z channels to zero
69;       XXX: An optimal scheduler should merge this into one of the prevous IGs.
70; EG: MOV T[[RW_GPR]].Y, 0.0
71; EG: MOV * T[[RW_GPR]].Z, 0.0
72
73; SI: buffer_store_short
74define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
75entry:
76  store i16 %in, i16 addrspace(1)* %out
77  ret void
78}
79
80; FUNC-LABEL: {{^}}store_v2i8:
81; EG: MEM_RAT MSKOR
82; EG-NOT: MEM_RAT MSKOR
83
84; SI: buffer_store_byte
85; SI: buffer_store_byte
86define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
87entry:
88  %0 = trunc <2 x i32> %in to <2 x i8>
89  store <2 x i8> %0, <2 x i8> addrspace(1)* %out
90  ret void
91}
92
93
94; FUNC-LABEL: {{^}}store_v2i16:
95; EG: MEM_RAT_CACHELESS STORE_RAW
96
97; CM: MEM_RAT_CACHELESS STORE_DWORD
98
99; SI: buffer_store_short
100; SI: buffer_store_short
101define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
102entry:
103  %0 = trunc <2 x i32> %in to <2 x i16>
104  store <2 x i16> %0, <2 x i16> addrspace(1)* %out
105  ret void
106}
107
108; FUNC-LABEL: {{^}}store_v4i8:
109; EG: MEM_RAT_CACHELESS STORE_RAW
110
111; CM: MEM_RAT_CACHELESS STORE_DWORD
112
113; SI: buffer_store_byte
114; SI: buffer_store_byte
115; SI: buffer_store_byte
116; SI: buffer_store_byte
117define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
118entry:
119  %0 = trunc <4 x i32> %in to <4 x i8>
120  store <4 x i8> %0, <4 x i8> addrspace(1)* %out
121  ret void
122}
123
124; floating-point store
125; FUNC-LABEL: {{^}}store_f32:
126; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
127
128; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
129
130; SI: buffer_store_dword
131
132define void @store_f32(float addrspace(1)* %out, float %in) {
133  store float %in, float addrspace(1)* %out
134  ret void
135}
136
137; FUNC-LABEL: {{^}}store_v4i16:
138; EG: MEM_RAT MSKOR
139; EG: MEM_RAT MSKOR
140; EG: MEM_RAT MSKOR
141; EG: MEM_RAT MSKOR
142; EG-NOT: MEM_RAT MSKOR
143
144; SI: buffer_store_short
145; SI: buffer_store_short
146; SI: buffer_store_short
147; SI: buffer_store_short
148; SI-NOT: buffer_store_byte
149define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) {
150entry:
151  %0 = trunc <4 x i32> %in to <4 x i16>
152  store <4 x i16> %0, <4 x i16> addrspace(1)* %out
153  ret void
154}
155
156; vec2 floating-point stores
157; FUNC-LABEL: {{^}}store_v2f32:
158; EG: MEM_RAT_CACHELESS STORE_RAW
159
160; CM: MEM_RAT_CACHELESS STORE_DWORD
161
162; SI: buffer_store_dwordx2
163
164define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
165entry:
166  %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0
167  %1 = insertelement <2 x float> %0, float %b, i32 1
168  store <2 x float> %1, <2 x float> addrspace(1)* %out
169  ret void
170}
171
172; FUNC-LABEL: {{^}}store_v4i32:
173; EG: MEM_RAT_CACHELESS STORE_RAW
174; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
175
176; CM: MEM_RAT_CACHELESS STORE_DWORD
177; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
178
179; SI: buffer_store_dwordx4
180define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
181entry:
182  store <4 x i32> %in, <4 x i32> addrspace(1)* %out
183  ret void
184}
185
186; FUNC-LABEL: {{^}}store_i64_i8:
187; EG: MEM_RAT MSKOR
188; SI: buffer_store_byte
189define void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) {
190entry:
191  %0 = trunc i64 %in to i8
192  store i8 %0, i8 addrspace(1)* %out
193  ret void
194}
195
196; FUNC-LABEL: {{^}}store_i64_i16:
197; EG: MEM_RAT MSKOR
198; SI: buffer_store_short
199define void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) {
200entry:
201  %0 = trunc i64 %in to i16
202  store i16 %0, i16 addrspace(1)* %out
203  ret void
204}
205
206;===------------------------------------------------------------------------===;
207; Local Address Space
208;===------------------------------------------------------------------------===;
209
210; FUNC-LABEL: {{^}}store_local_i1:
211; EG: LDS_BYTE_WRITE
212; SI: ds_write_b8
213define void @store_local_i1(i1 addrspace(3)* %out) {
214entry:
215  store i1 true, i1 addrspace(3)* %out
216  ret void
217}
218
219; FUNC-LABEL: {{^}}store_local_i8:
220; EG: LDS_BYTE_WRITE
221
222; SI: ds_write_b8
223define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
224  store i8 %in, i8 addrspace(3)* %out
225  ret void
226}
227
228; FUNC-LABEL: {{^}}store_local_i16:
229; EG: LDS_SHORT_WRITE
230
231; SI: ds_write_b16
232define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
233  store i16 %in, i16 addrspace(3)* %out
234  ret void
235}
236
237; FUNC-LABEL: {{^}}store_local_v2i16:
238; EG: LDS_WRITE
239
240; CM: LDS_WRITE
241
242; SI: ds_write_b16
243; SI: ds_write_b16
244define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
245entry:
246  store <2 x i16> %in, <2 x i16> addrspace(3)* %out
247  ret void
248}
249
250; FUNC-LABEL: {{^}}store_local_v4i8:
251; EG: LDS_WRITE
252
253; CM: LDS_WRITE
254
255; SI: ds_write_b8
256; SI: ds_write_b8
257; SI: ds_write_b8
258; SI: ds_write_b8
259define void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
260entry:
261  store <4 x i8> %in, <4 x i8> addrspace(3)* %out
262  ret void
263}
264
265; FUNC-LABEL: {{^}}store_local_v2i32:
266; EG: LDS_WRITE
267; EG: LDS_WRITE
268
269; CM: LDS_WRITE
270; CM: LDS_WRITE
271
272; SI: ds_write_b64
273define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
274entry:
275  store <2 x i32> %in, <2 x i32> addrspace(3)* %out
276  ret void
277}
278
279; FUNC-LABEL: {{^}}store_local_v4i32:
280; EG: LDS_WRITE
281; EG: LDS_WRITE
282; EG: LDS_WRITE
283; EG: LDS_WRITE
284
285; CM: LDS_WRITE
286; CM: LDS_WRITE
287; CM: LDS_WRITE
288; CM: LDS_WRITE
289
290; SI: ds_write_b64
291; SI: ds_write_b64
292define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
293entry:
294  store <4 x i32> %in, <4 x i32> addrspace(3)* %out
295  ret void
296}
297
298; FUNC-LABEL: {{^}}store_local_v4i32_align4:
299; EG: LDS_WRITE
300; EG: LDS_WRITE
301; EG: LDS_WRITE
302; EG: LDS_WRITE
303
304; CM: LDS_WRITE
305; CM: LDS_WRITE
306; CM: LDS_WRITE
307; CM: LDS_WRITE
308
309; SI: ds_write2_b32
310; SI: ds_write2_b32
311define void @store_local_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
312entry:
313  store <4 x i32> %in, <4 x i32> addrspace(3)* %out, align 4
314  ret void
315}
316
317; FUNC-LABEL: {{^}}store_local_i64_i8:
318; EG: LDS_BYTE_WRITE
319; SI: ds_write_b8
320define void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) {
321entry:
322  %0 = trunc i64 %in to i8
323  store i8 %0, i8 addrspace(3)* %out
324  ret void
325}
326
327; FUNC-LABEL: {{^}}store_local_i64_i16:
328; EG: LDS_SHORT_WRITE
329; SI: ds_write_b16
330define void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) {
331entry:
332  %0 = trunc i64 %in to i16
333  store i16 %0, i16 addrspace(3)* %out
334  ret void
335}
336
337; The stores in this function are combined by the optimizer to create a
338; 64-bit store with 32-bit alignment.  This is legal for SI and the legalizer
339; should not try to split the 64-bit store back into 2 32-bit stores.
340;
341; Evergreen / Northern Islands don't support 64-bit stores yet, so there should
342; be two 32-bit stores.
343
344; FUNC-LABEL: {{^}}vecload2:
345; EG: MEM_RAT_CACHELESS STORE_RAW
346
347; CM: MEM_RAT_CACHELESS STORE_DWORD
348
349; SI: buffer_store_dwordx2
350define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
351entry:
352  %0 = load i32, i32 addrspace(2)* %mem, align 4
353  %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
354  %1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
355  store i32 %0, i32 addrspace(1)* %out, align 4
356  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
357  store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
358  ret void
359}
360
361attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
362
363; When i128 was a legal type this program generated cannot select errors:
364
365; FUNC-LABEL: {{^}}"i128-const-store":
366; FIXME: We should be able to to this with one store instruction
367; EG: STORE_RAW
368; EG: STORE_RAW
369; EG: STORE_RAW
370; EG: STORE_RAW
371; CM: STORE_DWORD
372; CM: STORE_DWORD
373; CM: STORE_DWORD
374; CM: STORE_DWORD
375; SI: buffer_store_dwordx4
376define void @i128-const-store(i32 addrspace(1)* %out) {
377entry:
378  store i32 1, i32 addrspace(1)* %out, align 4
379  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
380  store i32 1, i32 addrspace(1)* %arrayidx2, align 4
381  %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
382  store i32 2, i32 addrspace(1)* %arrayidx4, align 4
383  %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
384  store i32 2, i32 addrspace(1)* %arrayidx6, align 4
385  ret void
386}
Note: See TracBrowser for help on using the repository browser.