source: icGREP/icgrep-devel/llvm-3.8.0.src/test/CodeGen/AMDGPU/load.ll @ 5027

Last change on this file since 5027 was 5027, checked in by cameron, 3 years ago

Upgrade to llvm 3.8

File size: 22.1 KB
Line 
1; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
2; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
3; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-NOHSA --check-prefix=FUNC %s
4; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefix=FUNC --check-prefix=CI-HSA --check-prefix=SI %s
5; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI-NOHSA --check-prefix=FUNC %s
6
7;===------------------------------------------------------------------------===;
8; GLOBAL ADDRESS SPACE
9;===------------------------------------------------------------------------===;
10
11; Load an i8 value from the global address space.
12; FUNC-LABEL: {{^}}load_i8:
13; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
14
15; SI-NOHSA: buffer_load_ubyte v{{[0-9]+}},
16; CI-HSA: flat_load_ubyte
17define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
18  %1 = load i8, i8 addrspace(1)* %in
19  %2 = zext i8 %1 to i32
20  store i32 %2, i32 addrspace(1)* %out
21  ret void
22}
23
24; FUNC-LABEL: {{^}}load_i8_sext:
25; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
26; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
27; R600: 8
28; SI-NOHSA: buffer_load_sbyte
29; CI-HSA: flat_load_sbyte
30define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
31entry:
32  %0 = load i8, i8 addrspace(1)* %in
33  %1 = sext i8 %0 to i32
34  store i32 %1, i32 addrspace(1)* %out
35  ret void
36}
37
38; FUNC-LABEL: {{^}}load_v2i8:
39; R600: VTX_READ_8
40; R600: VTX_READ_8
41; SI-NOHSA: buffer_load_ubyte
42; SI-NOHSA: buffer_load_ubyte
43; CI-HSA: flat_load_ubyte
44; CI-HSA: flat_load_ubyte
45define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
46entry:
47  %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
48  %1 = zext <2 x i8> %0 to <2 x i32>
49  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
50  ret void
51}
52
53; FUNC-LABEL: {{^}}load_v2i8_sext:
54; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
55; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
56; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
57; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
58; R600-DAG: 8
59; R600-DAG: 8
60
61; SI-NOHSA: buffer_load_sbyte
62; SI-NOHSA: buffer_load_sbyte
63; CI-HSA: flat_load_sbyte
64; CI-HSA: flat_load_sbyte
65define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
66entry:
67  %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
68  %1 = sext <2 x i8> %0 to <2 x i32>
69  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
70  ret void
71}
72
73; FUNC-LABEL: {{^}}load_v4i8:
74; R600: VTX_READ_8
75; R600: VTX_READ_8
76; R600: VTX_READ_8
77; R600: VTX_READ_8
78; SI-NOHSA: buffer_load_ubyte
79; SI-NOHSA: buffer_load_ubyte
80; SI-NOHSA: buffer_load_ubyte
81; SI-NOHSA: buffer_load_ubyte
82; CI-HSA: flat_load_ubyte
83; CI-HSA: flat_load_ubyte
84; CI-HSA: flat_load_ubyte
85; CI-HSA: flat_load_ubyte
86define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
87entry:
88  %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
89  %1 = zext <4 x i8> %0 to <4 x i32>
90  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
91  ret void
92}
93
94; FUNC-LABEL: {{^}}load_v4i8_sext:
95; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
96; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
97; R600-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
98; R600-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
99; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
100; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
101; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
102; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
103; R600-DAG: 8
104; R600-DAG: 8
105; R600-DAG: 8
106; R600-DAG: 8
107; SI-NOHSA: buffer_load_sbyte
108; SI-NOHSA: buffer_load_sbyte
109; SI-NOHSA: buffer_load_sbyte
110; SI-NOHSA: buffer_load_sbyte
111; CI-HSA: flat_load_sbyte
112; CI-HSA: flat_load_sbyte
113; CI-HSA: flat_load_sbyte
114; CI-HSA: flat_load_sbyte
115define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
116entry:
117  %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
118  %1 = sext <4 x i8> %0 to <4 x i32>
119  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
120  ret void
121}
122
123; Load an i16 value from the global address space.
124; FUNC-LABEL: {{^}}load_i16:
125; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
126; SI-NOHSA: buffer_load_ushort
127; CI-HSA: flat_load_ushort
128define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
129entry:
130  %0 = load i16 , i16    addrspace(1)* %in
131  %1 = zext i16 %0 to i32
132  store i32 %1, i32 addrspace(1)* %out
133  ret void
134}
135
136; FUNC-LABEL: {{^}}load_i16_sext:
137; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
138; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
139; R600: 16
140; SI-NOHSA: buffer_load_sshort
141; CI-HSA: flat_load_sshort
142define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
143entry:
144  %0 = load i16, i16 addrspace(1)* %in
145  %1 = sext i16 %0 to i32
146  store i32 %1, i32 addrspace(1)* %out
147  ret void
148}
149
150; FUNC-LABEL: {{^}}load_v2i16:
151; R600: VTX_READ_16
152; R600: VTX_READ_16
153; SI-NOHSA: buffer_load_ushort
154; SI-NOHSA: buffer_load_ushort
155; CI-HSA: flat_load_ushort
156; CI-HSA: flat_load_ushort
157define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
158entry:
159  %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
160  %1 = zext <2 x i16> %0 to <2 x i32>
161  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
162  ret void
163}
164
165; FUNC-LABEL: {{^}}load_v2i16_sext:
166; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
167; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
168; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
169; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
170; R600-DAG: 16
171; R600-DAG: 16
172; SI-NOHSA: buffer_load_sshort
173; SI-NOHSA: buffer_load_sshort
174; CI-HSA: flat_load_sshort
175; CI-HSA: flat_load_sshort
176define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
177entry:
178  %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
179  %1 = sext <2 x i16> %0 to <2 x i32>
180  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
181  ret void
182}
183
184; FUNC-LABEL: {{^}}load_v4i16:
185; R600: VTX_READ_16
186; R600: VTX_READ_16
187; R600: VTX_READ_16
188; R600: VTX_READ_16
189; SI-NOHSA: buffer_load_ushort
190; SI-NOHSA: buffer_load_ushort
191; SI-NOHSA: buffer_load_ushort
192; SI-NOHSA: buffer_load_ushort
193; CI-HSA: flat_load_ushort
194; CI-HSA: flat_load_ushort
195; CI-HSA: flat_load_ushort
196; CI-HSA: flat_load_ushort
197define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
198entry:
199  %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
200  %1 = zext <4 x i16> %0 to <4 x i32>
201  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
202  ret void
203}
204
205; FUNC-LABEL: {{^}}load_v4i16_sext:
206; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
207; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
208; R600-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
209; R600-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
210; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
211; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
212; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
213; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
214; R600-DAG: 16
215; R600-DAG: 16
216; R600-DAG: 16
217; R600-DAG: 16
218; SI-NOHSA: buffer_load_sshort
219; SI-NOHSA: buffer_load_sshort
220; SI-NOHSA: buffer_load_sshort
221; SI-NOHSA: buffer_load_sshort
222; CI-HSA: flat_load_sshort
223; CI-HSA: flat_load_sshort
224; CI-HSA: flat_load_sshort
225; CI-HSA: flat_load_sshort
226define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
227entry:
228  %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
229  %1 = sext <4 x i16> %0 to <4 x i32>
230  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
231  ret void
232}
233
234; load an i32 value from the global address space.
235; FUNC-LABEL: {{^}}load_i32:
236; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
237
238; SI-NOHSA: buffer_load_dword v{{[0-9]+}}
239; CI-HSA: flat_load_dword
240define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
241entry:
242  %0 = load i32, i32 addrspace(1)* %in
243  store i32 %0, i32 addrspace(1)* %out
244  ret void
245}
246
247; load a f32 value from the global address space.
248; FUNC-LABEL: {{^}}load_f32:
249; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
250
251; SI-NOHSA: buffer_load_dword v{{[0-9]+}}
252; CI-HSA: flat_load_dword
253define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
254entry:
255  %0 = load float, float addrspace(1)* %in
256  store float %0, float addrspace(1)* %out
257  ret void
258}
259
260; load a v2f32 value from the global address space
261; FUNC-LABEL: {{^}}load_v2f32:
262; R600: MEM_RAT
263; R600: VTX_READ_64
264; SI-NOHSA: buffer_load_dwordx2
265; CI-HSA: flat_load_dwordx2
266define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
267entry:
268  %0 = load <2 x float>, <2 x float> addrspace(1)* %in
269  store <2 x float> %0, <2 x float> addrspace(1)* %out
270  ret void
271}
272
273; FUNC-LABEL: {{^}}load_i64:
274; R600: VTX_READ_64
275; SI-NOHSA: buffer_load_dwordx2
276; CI-HSA: flat_load_dwordx2
277define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
278entry:
279  %0 = load i64, i64 addrspace(1)* %in
280  store i64 %0, i64 addrspace(1)* %out
281  ret void
282}
283
284; FUNC-LABEL: {{^}}load_i64_sext:
285; R600: MEM_RAT
286; R600: MEM_RAT
287; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.x
288; R600: 31
289; SI-NOHSA: buffer_load_dword
290; CI-HSA: flat_load_dword
291
292define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
293entry:
294  %0 = load i32, i32 addrspace(1)* %in
295  %1 = sext i32 %0 to i64
296  store i64 %1, i64 addrspace(1)* %out
297  ret void
298}
299
300; FUNC-LABEL: {{^}}load_i64_zext:
301; R600: MEM_RAT
302; R600: MEM_RAT
303define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
304entry:
305  %0 = load i32, i32 addrspace(1)* %in
306  %1 = zext i32 %0 to i64
307  store i64 %1, i64 addrspace(1)* %out
308  ret void
309}
310
311; FUNC-LABEL: {{^}}load_v8i32:
312; R600: VTX_READ_128
313; R600: VTX_READ_128
314
315; SI-NOHSA: buffer_load_dwordx4
316; SI-NOHSA: buffer_load_dwordx4
317; CI-HSA: flat_load_dwordx4
318; CI-HSA: flat_load_dwordx4
319define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
320entry:
321  %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in
322  store <8 x i32> %0, <8 x i32> addrspace(1)* %out
323  ret void
324}
325
326; FUNC-LABEL: {{^}}load_v16i32:
327; R600: VTX_READ_128
328; R600: VTX_READ_128
329; R600: VTX_READ_128
330; R600: VTX_READ_128
331
332; SI-NOHSA: buffer_load_dwordx4
333; SI-NOHSA: buffer_load_dwordx4
334; SI-NOHSA: buffer_load_dwordx4
335; SI-NOHSA: buffer_load_dwordx4
336; CI-HSA: flat_load_dwordx4
337; CI-HSA: flat_load_dwordx4
338; CI-HSA: flat_load_dwordx4
339; CI-HSA: flat_load_dwordx4
340define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
341entry:
342  %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in
343  store <16 x i32> %0, <16 x i32> addrspace(1)* %out
344  ret void
345}
346
347;===------------------------------------------------------------------------===;
348; CONSTANT ADDRESS SPACE
349;===------------------------------------------------------------------------===;
350
351; Load a sign-extended i8 value
352; FUNC-LABEL: {{^}}load_const_i8_sext:
353; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
354; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
355; R600: 8
356; SI-NOHSA: buffer_load_sbyte v{{[0-9]+}},
357; CI-HSA: flat_load_sbyte v{{[0-9]+}},
358define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
359entry:
360  %0 = load i8, i8 addrspace(2)* %in
361  %1 = sext i8 %0 to i32
362  store i32 %1, i32 addrspace(1)* %out
363  ret void
364}
365
366; Load an aligned i8 value
367; FUNC-LABEL: {{^}}load_const_i8_aligned:
368; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
369; SI-NOHSA: buffer_load_ubyte v{{[0-9]+}},
370; CI-HSA: flat_load_ubyte v{{[0-9]+}},
371define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
372entry:
373  %0 = load i8, i8 addrspace(2)* %in
374  %1 = zext i8 %0 to i32
375  store i32 %1, i32 addrspace(1)* %out
376  ret void
377}
378
379; Load an un-aligned i8 value
380; FUNC-LABEL: {{^}}load_const_i8_unaligned:
381; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
382; SI-NOHSA: buffer_load_ubyte v{{[0-9]+}},
383; CI-HSA: flat_load_ubyte v{{[0-9]+}},
384define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
385entry:
386  %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1
387  %1 = load i8, i8 addrspace(2)* %0
388  %2 = zext i8 %1 to i32
389  store i32 %2, i32 addrspace(1)* %out
390  ret void
391}
392
393; Load a sign-extended i16 value
394; FUNC-LABEL: {{^}}load_const_i16_sext:
395; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
396; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
397; R600: 16
398; SI-NOHSA: buffer_load_sshort
399; CI-HSA: flat_load_sshort
400define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
401entry:
402  %0 = load i16, i16 addrspace(2)* %in
403  %1 = sext i16 %0 to i32
404  store i32 %1, i32 addrspace(1)* %out
405  ret void
406}
407
408; Load an aligned i16 value
409; FUNC-LABEL: {{^}}load_const_i16_aligned:
410; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
411; SI-NOHSA: buffer_load_ushort
412; CI-HSA: flat_load_ushort
413define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
414entry:
415  %0 = load i16, i16 addrspace(2)* %in
416  %1 = zext i16 %0 to i32
417  store i32 %1, i32 addrspace(1)* %out
418  ret void
419}
420
421; Load an un-aligned i16 value
422; FUNC-LABEL: {{^}}load_const_i16_unaligned:
423; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
424; SI-NOHSA: buffer_load_ushort
425; CI-HSA: flat_load_ushort
426define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
427entry:
428  %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
429  %1 = load i16, i16 addrspace(2)* %0
430  %2 = zext i16 %1 to i32
431  store i32 %2, i32 addrspace(1)* %out
432  ret void
433}
434
435; Load an i32 value from the constant address space.
436; FUNC-LABEL: {{^}}load_const_addrspace_i32:
437; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
438
439; SI: s_load_dword s{{[0-9]+}}
440define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
441entry:
442  %0 = load i32, i32 addrspace(2)* %in
443  store i32 %0, i32 addrspace(1)* %out
444  ret void
445}
446
447; Load a f32 value from the constant address space.
448; FUNC-LABEL: {{^}}load_const_addrspace_f32:
449; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
450
451; SI: s_load_dword s{{[0-9]+}}
452define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
453  %1 = load float, float addrspace(2)* %in
454  store float %1, float addrspace(1)* %out
455  ret void
456}
457
458;===------------------------------------------------------------------------===;
459; LOCAL ADDRESS SPACE
460;===------------------------------------------------------------------------===;
461
462; Load an i8 value from the local address space.
463; FUNC-LABEL: {{^}}load_i8_local:
464; R600: LDS_UBYTE_READ_RET
465; SI-NOT: s_wqm_b64
466; SI: s_mov_b32 m0
467; SI: ds_read_u8
468define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
469  %1 = load i8, i8 addrspace(3)* %in
470  %2 = zext i8 %1 to i32
471  store i32 %2, i32 addrspace(1)* %out
472  ret void
473}
474
475; FUNC-LABEL: {{^}}load_i8_sext_local:
476; R600: LDS_UBYTE_READ_RET
477; R600: BFE_INT
478; SI-NOT: s_wqm_b64
479; SI: s_mov_b32 m0
480; SI: ds_read_i8
481define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
482entry:
483  %0 = load i8, i8 addrspace(3)* %in
484  %1 = sext i8 %0 to i32
485  store i32 %1, i32 addrspace(1)* %out
486  ret void
487}
488
489; FUNC-LABEL: {{^}}load_v2i8_local:
490; R600: LDS_UBYTE_READ_RET
491; R600: LDS_UBYTE_READ_RET
492; SI-NOT: s_wqm_b64
493; SI: s_mov_b32 m0
494; SI: ds_read_u8
495; SI: ds_read_u8
496define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
497entry:
498  %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
499  %1 = zext <2 x i8> %0 to <2 x i32>
500  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
501  ret void
502}
503
504; FUNC-LABEL: {{^}}load_v2i8_sext_local:
505; R600-DAG: LDS_UBYTE_READ_RET
506; R600-DAG: LDS_UBYTE_READ_RET
507; R600-DAG: BFE_INT
508; R600-DAG: BFE_INT
509; SI-NOT: s_wqm_b64
510; SI: s_mov_b32 m0
511; SI: ds_read_i8
512; SI: ds_read_i8
513define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
514entry:
515  %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
516  %1 = sext <2 x i8> %0 to <2 x i32>
517  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
518  ret void
519}
520
521; FUNC-LABEL: {{^}}load_v4i8_local:
522; R600: LDS_UBYTE_READ_RET
523; R600: LDS_UBYTE_READ_RET
524; R600: LDS_UBYTE_READ_RET
525; R600: LDS_UBYTE_READ_RET
526; SI-NOT: s_wqm_b64
527; SI: s_mov_b32 m0
528; SI: ds_read_u8
529; SI: ds_read_u8
530; SI: ds_read_u8
531; SI: ds_read_u8
532define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
533entry:
534  %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
535  %1 = zext <4 x i8> %0 to <4 x i32>
536  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
537  ret void
538}
539
540; FUNC-LABEL: {{^}}load_v4i8_sext_local:
541; R600-DAG: LDS_UBYTE_READ_RET
542; R600-DAG: LDS_UBYTE_READ_RET
543; R600-DAG: LDS_UBYTE_READ_RET
544; R600-DAG: LDS_UBYTE_READ_RET
545; R600-DAG: BFE_INT
546; R600-DAG: BFE_INT
547; R600-DAG: BFE_INT
548; R600-DAG: BFE_INT
549; SI-NOT: s_wqm_b64
550; SI: s_mov_b32 m0
551; SI: ds_read_i8
552; SI: ds_read_i8
553; SI: ds_read_i8
554; SI: ds_read_i8
555define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
556entry:
557  %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
558  %1 = sext <4 x i8> %0 to <4 x i32>
559  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
560  ret void
561}
562
563; Load an i16 value from the local address space.
564; FUNC-LABEL: {{^}}load_i16_local:
565; R600: LDS_USHORT_READ_RET
566; SI-NOT: s_wqm_b64
567; SI: s_mov_b32 m0
568; SI: ds_read_u16
569define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
570entry:
571  %0 = load i16 , i16    addrspace(3)* %in
572  %1 = zext i16 %0 to i32
573  store i32 %1, i32 addrspace(1)* %out
574  ret void
575}
576
577; FUNC-LABEL: {{^}}load_i16_sext_local:
578; R600: LDS_USHORT_READ_RET
579; R600: BFE_INT
580; SI-NOT: s_wqm_b64
581; SI: s_mov_b32 m0
582; SI: ds_read_i16
583define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
584entry:
585  %0 = load i16, i16 addrspace(3)* %in
586  %1 = sext i16 %0 to i32
587  store i32 %1, i32 addrspace(1)* %out
588  ret void
589}
590
591; FUNC-LABEL: {{^}}load_v2i16_local:
592; R600: LDS_USHORT_READ_RET
593; R600: LDS_USHORT_READ_RET
594; SI-NOT: s_wqm_b64
595; SI: s_mov_b32 m0
596; SI: ds_read_u16
597; SI: ds_read_u16
598define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
599entry:
600  %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
601  %1 = zext <2 x i16> %0 to <2 x i32>
602  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
603  ret void
604}
605
606; FUNC-LABEL: {{^}}load_v2i16_sext_local:
607; R600-DAG: LDS_USHORT_READ_RET
608; R600-DAG: LDS_USHORT_READ_RET
609; R600-DAG: BFE_INT
610; R600-DAG: BFE_INT
611; SI-NOT: s_wqm_b64
612; SI: s_mov_b32 m0
613; SI: ds_read_i16
614; SI: ds_read_i16
615define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
616entry:
617  %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
618  %1 = sext <2 x i16> %0 to <2 x i32>
619  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
620  ret void
621}
622
623; FUNC-LABEL: {{^}}load_v4i16_local:
624; R600: LDS_USHORT_READ_RET
625; R600: LDS_USHORT_READ_RET
626; R600: LDS_USHORT_READ_RET
627; R600: LDS_USHORT_READ_RET
628; SI-NOT: s_wqm_b64
629; SI: s_mov_b32 m0
630; SI: ds_read_u16
631; SI: ds_read_u16
632; SI: ds_read_u16
633; SI: ds_read_u16
634define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
635entry:
636  %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
637  %1 = zext <4 x i16> %0 to <4 x i32>
638  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
639  ret void
640}
641
642; FUNC-LABEL: {{^}}load_v4i16_sext_local:
643; R600-DAG: LDS_USHORT_READ_RET
644; R600-DAG: LDS_USHORT_READ_RET
645; R600-DAG: LDS_USHORT_READ_RET
646; R600-DAG: LDS_USHORT_READ_RET
647; R600-DAG: BFE_INT
648; R600-DAG: BFE_INT
649; R600-DAG: BFE_INT
650; R600-DAG: BFE_INT
651; SI-NOT: s_wqm_b64
652; SI: s_mov_b32 m0
653; SI: ds_read_i16
654; SI: ds_read_i16
655; SI: ds_read_i16
656; SI: ds_read_i16
657define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
658entry:
659  %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
660  %1 = sext <4 x i16> %0 to <4 x i32>
661  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
662  ret void
663}
664
665; load an i32 value from the local address space.
666; FUNC-LABEL: {{^}}load_i32_local:
667; R600: LDS_READ_RET
668; SI-NOT: s_wqm_b64
669; SI: s_mov_b32 m0
670; SI: ds_read_b32
671define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
672entry:
673  %0 = load i32, i32 addrspace(3)* %in
674  store i32 %0, i32 addrspace(1)* %out
675  ret void
676}
677
678; load a f32 value from the local address space.
679; FUNC-LABEL: {{^}}load_f32_local:
680; R600: LDS_READ_RET
681; SI: s_mov_b32 m0
682; SI: ds_read_b32
683define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
684entry:
685  %0 = load float, float addrspace(3)* %in
686  store float %0, float addrspace(1)* %out
687  ret void
688}
689
690; load a v2f32 value from the local address space
691; FUNC-LABEL: {{^}}load_v2f32_local:
692; R600: LDS_READ_RET
693; R600: LDS_READ_RET
694; SI: s_mov_b32 m0
695; SI: ds_read_b64
696define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
697entry:
698  %0 = load <2 x float>, <2 x float> addrspace(3)* %in
699  store <2 x float> %0, <2 x float> addrspace(1)* %out
700  ret void
701}
702
703; Test loading a i32 and v2i32 value from the same base pointer.
704; FUNC-LABEL: {{^}}load_i32_v2i32_local:
705; R600: LDS_READ_RET
706; R600: LDS_READ_RET
707; R600: LDS_READ_RET
708; SI-DAG: ds_read_b32
709; SI-DAG: ds_read2_b32
710define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
711  %scalar = load i32, i32 addrspace(3)* %in
712  %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
713  %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
714  %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4
715  %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
716  %vec = add <2 x i32> %vec0, %vec1
717  store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
718  ret void
719}
720
721
722@lds = addrspace(3) global [512 x i32] undef, align 4
723
724; On SI we need to make sure that the base offset is a register and not
725; an immediate.
726; FUNC-LABEL: {{^}}load_i32_local_const_ptr:
727; SI: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
728; SI: ds_read_b32 v0, v[[ZERO]] offset:4
729; R600: LDS_READ_RET
730define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
731entry:
732  %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
733  %tmp1 = load i32, i32 addrspace(3)* %tmp0
734  %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
735  store i32 %tmp1, i32 addrspace(1)* %tmp2
736  ret void
737}
Note: See TracBrowser for help on using the repository browser.