source: trunk/lib_ir/s2p.ll @ 3977

Last change on this file since 3977 was 3945, checked in by linmengl, 5 years ago

seperate s2p_ideal from s2p.ll, since it won't compile on SSE2 machine.

File size: 7.7 KB
Line 
1;An IR function to fill <i32 x, i32 x, i32 x, i32 x>
2; used to test the build system.
3define void @test_link(<4 x i32> *%p, i32 %x) {
4entry:
5  %r1 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
6  %r2 = insertelement <4 x i32> %r1, i32 %x, i32 1
7  %r3 = insertelement <4 x i32> %r2, i32 %x, i32 2
8  %r4 = insertelement <4 x i32> %r3, i32 %x, i32 3
9
10  store <4 x i32> %r4, <4 x i32>* %p
11  ret void
12}
13
14define <4 x i32> @packh_16(<4 x i32> %a, <4 x i32> %b) alwaysinline {
15entry:
16  %aa = bitcast <4 x i32> %a to <16 x i8>
17  %bb = bitcast <4 x i32> %b to <16 x i8>
18  %rr = shufflevector <16 x i8> %bb, <16 x i8> %aa, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
19
20  %rr1 = bitcast <16 x i8> %rr to <4 x i32>
21  ret <4 x i32> %rr1
22}
23
24define <4 x i32> @packl_16(<4 x i32> %a, <4 x i32> %b) alwaysinline {
25entry:
26  %aa = bitcast <4 x i32> %a to <16 x i8>
27  %bb = bitcast <4 x i32> %b to <16 x i8>
28  %rr = shufflevector <16 x i8> %bb, <16 x i8> %aa, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
29
30  %rr1 = bitcast <16 x i8> %rr to <4 x i32>
31  ret <4 x i32> %rr1
32}
33
34define <4 x i32> @ifh_1(<4 x i32> %cond, <4 x i32> %b, <4 x i32> %c) alwaysinline {
35entry:
36  %not_cond = xor <4 x i32> %cond, <i32 -1, i32 -1, i32 -1, i32 -1>
37
38  %t0 = and <4 x i32> %cond, %b
39  %t1 = and <4 x i32> %not_cond, %c
40  %r = or <4 x i32> %t0, %t1
41
42  ret <4 x i32> %r
43}
44
45define <4 x i32> @srli_16(<4 x i32> %a, <8 x i16> %shift_mask) alwaysinline {
46entry:
47  %aa = bitcast <4 x i32> %a to <8 x i16>
48  %r0 = lshr <8 x i16> %aa, %shift_mask
49  %rr = bitcast <8 x i16> %r0 to <4 x i32>
50  ret <4 x i32> %rr
51}
52
53define <4 x i32> @slli_16(<4 x i32> %a, <8 x i16> %shift_mask) alwaysinline {
54entry:
55  %aa = bitcast <4 x i32> %a to <8 x i16>
56  %r0 = shl <8 x i16> %aa, %shift_mask
57  %rr = bitcast <8 x i16> %r0 to <4 x i32>
58  ret <4 x i32> %rr
59}
60
61define void @s2p_step_ir(<4 x i32> %s0, <4 x i32> %s1, <4 x i32> %hi_mask, <8 x i16> %shift_mask, <4 x i32>* %p0, <4 x i32>* %p1) alwaysinline {
62entry:
63  %t0 = call <4 x i32> @packh_16(<4 x i32> %s0, <4 x i32> %s1)
64  %t1 = call <4 x i32> @packl_16(<4 x i32> %s0, <4 x i32> %s1)
65
66  %t2 = call <4 x i32> @srli_16(<4 x i32> %t1, <8 x i16> %shift_mask)
67  %q0 = call <4 x i32> @ifh_1(<4 x i32> %hi_mask, <4 x i32> %t0, <4 x i32> %t2)
68  %t3 = call <4 x i32> @slli_16(<4 x i32> %t0, <8 x i16> %shift_mask)
69  %q1 = call <4 x i32> @ifh_1(<4 x i32> %hi_mask, <4 x i32> %t3, <4 x i32> %t1)
70
71  store <4 x i32> %q0, <4 x i32>* %p0
72  store <4 x i32> %q1, <4 x i32>* %p1
73
74  ret void
75}
76
77define <8 x i16> @const16_1() alwaysinline {
78entry:
79  ret <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
80}
81
82define <8 x i16> @const16_2() alwaysinline {
83entry:
84  ret <8 x i16> <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
85}
86
87define <8 x i16> @const16_4() alwaysinline {
88entry:
89  ret <8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
90}
91
92define <4 x i32> @himask_2() alwaysinline {
93entry:
94  ret <4 x i32> <i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766>
95}
96
97define <4 x i32> @himask_4() alwaysinline {
98entry:
99  ret <4 x i32> <i32 -858993460, i32 -858993460, i32 -858993460, i32 -858993460>
100}
101
102define <4 x i32> @himask_8() alwaysinline {
103entry:
104  ret <4 x i32> <i32 -252645136, i32 -252645136, i32 -252645136, i32 -252645136>
105}
106
107define void @s2p_bytepack_ir(<4 x i32> %s0, <4 x i32> %s1, <4 x i32> %s2, <4 x i32> %s3, <4 x i32> %s4, <4 x i32> %s5, <4 x i32> %s6, <4 x i32> %s7, <4 x i32>* %p0, <4 x i32>* %p1, <4 x i32>* %p2, <4 x i32>* %p3, <4 x i32>* %p4, <4 x i32>* %p5, <4 x i32>* %p6, <4 x i32>* %p7) {
108entry:
109  %bit00224466_0 = alloca <4 x i32>, align 16
110  %bit00224466_1 = alloca <4 x i32>, align 16
111  %bit00224466_2 = alloca <4 x i32>, align 16
112  %bit00224466_3 = alloca <4 x i32>, align 16
113  %bit11335577_0 = alloca <4 x i32>, align 16
114  %bit11335577_1 = alloca <4 x i32>, align 16
115  %bit11335577_2 = alloca <4 x i32>, align 16
116  %bit11335577_3 = alloca <4 x i32>, align 16
117  %bit00004444_0 = alloca <4 x i32>, align 16
118  %bit22226666_0 = alloca <4 x i32>, align 16
119  %bit00004444_1 = alloca <4 x i32>, align 16
120  %bit22226666_1 = alloca <4 x i32>, align 16
121  %bit11115555_0 = alloca <4 x i32>, align 16
122  %bit33337777_0 = alloca <4 x i32>, align 16
123  %bit11115555_1 = alloca <4 x i32>, align 16
124  %bit33337777_1 = alloca <4 x i32>, align 16
125
126  %call10 = call <4 x i32> @himask_2()
127  %call11 = call <8 x i16> @const16_1()
128  call void @s2p_step_ir(<4 x i32> %s0, <4 x i32> %s1, <4 x i32> %call10, <8 x i16> %call11, <4 x i32>* %bit00224466_0, <4 x i32>* %bit11335577_0)
129  %call14 = call <4 x i32> @himask_2()
130  %call15 = call <8 x i16> @const16_1()
131  call void @s2p_step_ir(<4 x i32> %s2, <4 x i32> %s3, <4 x i32> %call14, <8 x i16> %call15, <4 x i32>* %bit00224466_1, <4 x i32>* %bit11335577_1)
132  %call18 = call <4 x i32> @himask_2()
133  %call19 = call <8 x i16> @const16_1()
134  call void @s2p_step_ir(<4 x i32> %s4, <4 x i32> %s5, <4 x i32> %call18, <8 x i16> %call19, <4 x i32>* %bit00224466_2, <4 x i32>* %bit11335577_2)
135  %call22 = call <4 x i32> @himask_2()
136  %call23 = call <8 x i16> @const16_1()
137  call void @s2p_step_ir(<4 x i32> %s6, <4 x i32> %s7, <4 x i32> %call22, <8 x i16> %call23, <4 x i32>* %bit00224466_3, <4 x i32>* %bit11335577_3)
138  %p23 = load <4 x i32>* %bit00224466_0, align 16
139  %p24 = load <4 x i32>* %bit00224466_1, align 16
140  %call24 = call <4 x i32> @himask_4()
141  %call25 = call <8 x i16> @const16_2()
142  call void @s2p_step_ir(<4 x i32> %p23, <4 x i32> %p24, <4 x i32> %call24, <8 x i16> %call25, <4 x i32>* %bit00004444_0, <4 x i32>* %bit22226666_0)
143  %p25 = load <4 x i32>* %bit00224466_2, align 16
144  %p26 = load <4 x i32>* %bit00224466_3, align 16
145  %call26 = call <4 x i32> @himask_4()
146  %call27 = call <8 x i16> @const16_2()
147  call void @s2p_step_ir(<4 x i32> %p25, <4 x i32> %p26, <4 x i32> %call26, <8 x i16> %call27, <4 x i32>* %bit00004444_1, <4 x i32>* %bit22226666_1)
148  %p27 = load <4 x i32>* %bit11335577_0, align 16
149  %p28 = load <4 x i32>* %bit11335577_1, align 16
150  %call28 = call <4 x i32> @himask_4()
151  %call29 = call <8 x i16> @const16_2()
152  call void @s2p_step_ir(<4 x i32> %p27, <4 x i32> %p28, <4 x i32> %call28, <8 x i16> %call29, <4 x i32>* %bit11115555_0, <4 x i32>* %bit33337777_0)
153  %p29 = load <4 x i32>* %bit11335577_2, align 16
154  %p30 = load <4 x i32>* %bit11335577_3, align 16
155  %call30 = call <4 x i32> @himask_4()
156  %call31 = call <8 x i16> @const16_2()
157  call void @s2p_step_ir(<4 x i32> %p29, <4 x i32> %p30, <4 x i32> %call30, <8 x i16> %call31, <4 x i32>* %bit11115555_1, <4 x i32>* %bit33337777_1)
158
159  %p31 = load <4 x i32>* %bit00004444_0, align 16
160  %p32 = load <4 x i32>* %bit00004444_1, align 16
161  %call32 = call <4 x i32> @himask_8()
162  %call33 = call <8 x i16> @const16_4()
163  call void @s2p_step_ir(<4 x i32> %p31, <4 x i32> %p32, <4 x i32> %call32, <8 x i16> %call33, <4 x i32>* %p0, <4 x i32>* %p4)
164  %p33 = load <4 x i32>* %bit11115555_0, align 16
165  %p34 = load <4 x i32>* %bit11115555_1, align 16
166  %call36 = call <4 x i32> @himask_8()
167  %call37 = call <8 x i16> @const16_4()
168  call void @s2p_step_ir(<4 x i32> %p33, <4 x i32> %p34, <4 x i32> %call36, <8 x i16> %call37, <4 x i32>* %p1, <4 x i32>* %p5)
169  %p35 = load <4 x i32>* %bit22226666_0, align 16
170  %p36 = load <4 x i32>* %bit22226666_1, align 16
171  %call40 = call <4 x i32> @himask_8()
172  %call41 = call <8 x i16> @const16_4()
173  call void @s2p_step_ir(<4 x i32> %p35, <4 x i32> %p36, <4 x i32> %call40, <8 x i16> %call41, <4 x i32>* %p2, <4 x i32>* %p6)
174  %p37 = load <4 x i32>* %bit33337777_0, align 16
175  %p38 = load <4 x i32>* %bit33337777_1, align 16
176  %call44 = call <4 x i32> @himask_8()
177  %call45 = call <8 x i16> @const16_4()
178  call void @s2p_step_ir(<4 x i32> %p37, <4 x i32> %p38, <4 x i32> %call44, <8 x i16> %call45, <4 x i32>* %p3, <4 x i32>* %p7)
179
180  ret void
181}
182
Note: See TracBrowser for help on using the repository browser.