source: icGREP/icgrep-devel/llvm-3.6.1.src/test/CodeGen/R600/flat-address-space.ll @ 4664

Last change on this file since 4664 was 4664, checked in by cameron, 4 years ago

Upgrade LLVM to 3.6.1

File size: 6.7 KB
Line 
1; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
2; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
3; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
4; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
5
6; Disable optimizations in case there are optimizations added that
7; specialize away generic pointer accesses.
8
9
10; CHECK-LABEL: {{^}}branch_use_flat_i32:
11; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, [M0, FLAT_SCRATCH]
12; CHECK: s_endpgm
13define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
14entry:
15  %cmp = icmp ne i32 %c, 0
16  br i1 %cmp, label %local, label %global
17
18local:
19  %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
20  br label %end
21
22global:
23  %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
24  br label %end
25
26end:
27  %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
28  store i32 %x, i32 addrspace(4)* %fptr, align 4
29;  %val = load i32 addrspace(4)* %fptr, align 4
30;  store i32 %val, i32 addrspace(1)* %out, align 4
31  ret void
32}
33
34
35
36; These testcases might become useless when there are optimizations to
37; remove generic pointers.
38
39; CHECK-LABEL: {{^}}store_flat_i32:
40; CHECK: v_mov_b32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}}
41; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}}
42; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}}
43; CHECK: flat_store_dword v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
44define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
45  %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
46  store i32 %x, i32 addrspace(4)* %fptr, align 4
47  ret void
48}
49
50; CHECK-LABEL: {{^}}store_flat_i64:
51; CHECK: flat_store_dwordx2
52define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
53  %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
54  store i64 %x, i64 addrspace(4)* %fptr, align 8
55  ret void
56}
57
58; CHECK-LABEL: {{^}}store_flat_v4i32:
59; CHECK: flat_store_dwordx4
60define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
61  %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
62  store <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16
63  ret void
64}
65
66; CHECK-LABEL: {{^}}store_flat_trunc_i16:
67; CHECK: flat_store_short
68define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
69  %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
70  %y = trunc i32 %x to i16
71  store i16 %y, i16 addrspace(4)* %fptr, align 2
72  ret void
73}
74
75; CHECK-LABEL: {{^}}store_flat_trunc_i8:
76; CHECK: flat_store_byte
77define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
78  %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
79  %y = trunc i32 %x to i8
80  store i8 %y, i8 addrspace(4)* %fptr, align 2
81  ret void
82}
83
84
85
86; CHECK-LABEL @load_flat_i32:
87; CHECK: flat_load_dword
88define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
89  %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
90  %fload = load i32 addrspace(4)* %fptr, align 4
91  store i32 %fload, i32 addrspace(1)* %out, align 4
92  ret void
93}
94
95; CHECK-LABEL @load_flat_i64:
96; CHECK: flat_load_dwordx2
97define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
98  %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
99  %fload = load i64 addrspace(4)* %fptr, align 4
100  store i64 %fload, i64 addrspace(1)* %out, align 8
101  ret void
102}
103
104; CHECK-LABEL @load_flat_v4i32:
105; CHECK: flat_load_dwordx4
106define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
107  %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
108  %fload = load <4 x i32> addrspace(4)* %fptr, align 4
109  store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
110  ret void
111}
112
113; CHECK-LABEL @sextload_flat_i8:
114; CHECK: flat_load_sbyte
115define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
116  %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
117  %fload = load i8 addrspace(4)* %fptr, align 4
118  %ext = sext i8 %fload to i32
119  store i32 %ext, i32 addrspace(1)* %out, align 4
120  ret void
121}
122
123; CHECK-LABEL @zextload_flat_i8:
124; CHECK: flat_load_ubyte
125define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
126  %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
127  %fload = load i8 addrspace(4)* %fptr, align 4
128  %ext = zext i8 %fload to i32
129  store i32 %ext, i32 addrspace(1)* %out, align 4
130  ret void
131}
132
133; CHECK-LABEL @sextload_flat_i16:
134; CHECK: flat_load_sshort
135define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
136  %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
137  %fload = load i16 addrspace(4)* %fptr, align 4
138  %ext = sext i16 %fload to i32
139  store i32 %ext, i32 addrspace(1)* %out, align 4
140  ret void
141}
142
143; CHECK-LABEL @zextload_flat_i16:
144; CHECK: flat_load_ushort
145define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
146  %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
147  %fload = load i16 addrspace(4)* %fptr, align 4
148  %ext = zext i16 %fload to i32
149  store i32 %ext, i32 addrspace(1)* %out, align 4
150  ret void
151}
152
153
154
155; TODO: This should not be zero when registers are used for small
156; scratch allocations again.
157
158; Check for prologue initializing special SGPRs pointing to scratch.
159; CHECK-LABEL: {{^}}store_flat_scratch:
160; CHECK: s_movk_i32 flat_scratch_lo, 0
161; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 0x28{{$}}
162; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0x0{{$}}
163; CHECK: flat_store_dword
164; CHECK: s_barrier
165; CHECK: flat_load_dword
166define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
167  %alloca = alloca i32, i32 9, align 4
168  %x = call i32 @llvm.r600.read.tidig.x() #3
169  %pptr = getelementptr i32* %alloca, i32 %x
170  %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
171  store i32 %x, i32 addrspace(4)* %fptr
172  ; Dummy call
173  call void @llvm.AMDGPU.barrier.local() #1
174  %reload = load i32 addrspace(4)* %fptr, align 4
175  store i32 %reload, i32 addrspace(1)* %out, align 4
176  ret void
177}
178
179declare void @llvm.AMDGPU.barrier.local() #1
180declare i32 @llvm.r600.read.tidig.x() #3
181
182attributes #0 = { nounwind }
183attributes #1 = { nounwind noduplicate }
184attributes #3 = { nounwind readnone }
Note: See TracBrowser for help on using the repository browser.