source: trunk/lib_ir/gen/llc_func.pytemplate.ll

Last change on this file was 4296, checked in by linmengl, 4 years ago

add tests for long shift right and dslli

File size: 5.2 KB
RevLine 
[4005]1; v128i1 starts here
2{% for name in FunctionNamesI1 %}
3define <128 x i1> @{{ name.c }}(<128 x i1> %a, <128 x i1> %b) {
4entry:
5  %c = {{ name.op }} <128 x i1> %a, %b
6  ret <128 x i1> %c
7}
8{% endfor %}
9
10; v64i2 starts here
[3997]11{% for name in FunctionNamesI2 %}
[3977]12define <64 x i2> @{{ name.c }}(<64 x i2> %a, <64 x i2> %b) {
13entry:
14  %c = {{ name.op }} <64 x i2> %a, %b
15{% if "icmp" in name.op %}
16  %d = sext <64 x i1> %c to <64 x i2>
17  ret <64 x i2> %d
18{% else %}
19  ret <64 x i2> %c
20{% endif %}
21}
22{% endfor %}
[3985]23
[4001]24define <64 x i2> @insertelement_2(<64 x i2> %a, i8 %elt, i32 %idx) {
[3985]25entry:
[4001]26  %e = trunc i8 %elt to i2
27  %c = insertelement <64 x i2> %a, i2 %e, i32 %idx
[3985]28  ret <64 x i2> %c
29}
30
[4001]31define i8 @extractelement_2(<64 x i2> %a, i32 %idx) {
[3985]32entry:
[4001]33  %c = extractelement <64 x i2> %a, i32 %idx
[3985]34  %d = zext i2 %c to i8
35  ret i8 %d
36}
[3996]37
38; v32i4 starts here
39{% for name in FunctionNamesI4 %}
40define <32 x i4> @{{name.c}}(<32 x i4> %a, <32 x i4> %b) {
41entry:
42  %c = {{ name.op }} <32 x i4> %a, %b
43  {% if "icmp" in name.op %}
44  %d = sext <32 x i1> %c to <32 x i4>
45  ret <32 x i4> %d
46  {% else %}
47  ret <32 x i4> %c
48  {% endif %}
49}
50{% endfor %}
51
[4001]52define <32 x i4> @insertelement_4(<32 x i4> %a, i8 %elt, i32 %idx) {
53entry:
54  %e = trunc i8 %elt to i4
55  %c = insertelement <32 x i4> %a, i4 %e, i32 %idx
56  ret <32 x i4> %c
57}
58
59define i8 @extractelement_4(<32 x i4> %a, i32 %idx) {
60entry:
61  %c = extractelement <32 x i4> %a, i32 %idx
62  %d = zext i4 %c to i8
63  ret i8 %d
64}
65
[3996]66; v16i8 starts here
67{% for name in FunctionNamesI8 %}
68define <16 x i8> @{{name.c}}(<16 x i8> %a, <16 x i8> %b) {
69entry:
70  %c = {{ name.op }} <16 x i8> %a, %b
71  {% if "icmp" in name.op %}
72  %d = sext <16 x i1> %c to <16 x i8>
73  ret <16 x i8> %d
74  {% else %}
75  ret <16 x i8> %c
76  {% endif %}
77}
78{% endfor %}
[4076]79
80
[4138]81; long stream add/shift
82declare {i128, i1} @llvm.uadd.with.overflow.i128(i128 %a, i128 %b)
83
84define void @uadd_with_overflow_i128(<2 x i64> %a, <2 x i64> %b, <2 x i64>* %P, i8* %B) {
85entry:
86  %aa = bitcast <2 x i64> %a to i128
87  %bb = bitcast <2 x i64> %b to i128
88
89  %res = call {i128, i1} @llvm.uadd.with.overflow.i128(i128 %aa, i128 %bb)
90  %sum = extractvalue {i128, i1} %res, 0
91  %obit = extractvalue {i128, i1} %res, 1
92  %obit8 = zext i1 %obit to i8
93
94  %r = bitcast i128 %sum to <2 x i64>
95
96  store <2 x i64> %r, <2 x i64>* %P
97  store i8 %obit8, i8* %B
98
99  ret void
100}
101
[4143]102define void @add_with_carry_ir(<2 x i64> %a, <2 x i64> %b, <2 x i64> %carry_in, <2 x i64>* %carry_out, <2 x i64>* %sum) {
103entry:
104  %aa = bitcast <2 x i64> %a to i128
105  %bb = bitcast <2 x i64> %b to i128
106  %cin = bitcast <2 x i64> %carry_in to i128
107
108  %res1 = call {i128, i1} @llvm.uadd.with.overflow.i128(i128 %aa, i128 %bb)
109  %sum1 = extractvalue {i128, i1} %res1, 0
110  %obit1 = extractvalue {i128, i1} %res1, 1
111
112  %res2 = call {i128, i1} @llvm.uadd.with.overflow.i128(i128 %sum1, i128 %cin)
113  %sum2 = extractvalue {i128, i1} %res2, 0
114  %obit2 = extractvalue {i128, i1} %res2, 1
115
116  %ret_sum = bitcast i128 %sum2 to <2 x i64>
117  %obit = or i1 %obit1, %obit2
118  %obit_64 = zext i1 %obit to i64
119  %obit_2x64 = insertelement <2 x i64> zeroinitializer, i64 %obit_64, i32 0
120
121  store <2 x i64> %ret_sum, <2 x i64>* %sum
122  store <2 x i64> %obit_2x64, <2 x i64>* %carry_out
123
124  ret void
125}
126
[4236]127declare {i128, i1} @llvm.uadd.with.overflow.carryin.i128(i128 %a, i128 %b, i1 %carryin)
128
129define void @add_with_carry_ir_better(<2 x i64> %a, <2 x i64> %b, <2 x i64> %carry_in, <2 x i64>* %carry_out, <2 x i64>* %sum) {
130entry:
131  ;CHECK-LABEL: add_with_carry_ir
132  %aa = bitcast <2 x i64> %a to i128
133  %bb = bitcast <2 x i64> %b to i128
134  %cc = bitcast <2 x i64> %carry_in to i128
135  %cin = trunc i128 %cc to i1
136
137  %res1 = call {i128, i1} @llvm.uadd.with.overflow.carryin.i128(i128 %aa, i128 %bb, i1 %cin)
138  %sum1 = extractvalue {i128, i1} %res1, 0
139  %obit = extractvalue {i128, i1} %res1, 1
140
141  %ret_sum = bitcast i128 %sum1 to <2 x i64>
142  %obit_64 = zext i1 %obit to i64
143  %obit_2x64 = insertelement <2 x i64> zeroinitializer, i64 %obit_64, i32 0
144
145  store <2 x i64> %ret_sum, <2 x i64>* %sum
146  store <2 x i64> %obit_2x64, <2 x i64>* %carry_out
147
148  ret void
149}
150
[4076]151define <4 x i32> @add_128(<4 x i32> %a, <4 x i32> %b) {
152entry:
153  %aa = bitcast <4 x i32> %a to i128
154  %bb = bitcast <4 x i32> %b to i128
155  %c = add i128 %aa, %bb
156  %cc = bitcast i128 %c to <4 x i32>
157  ret <4 x i32> %cc
158}
159
160define <4 x i32> @sll_128(<4 x i32> %a, <4 x i32> %b) {
161entry:
162  %aa = bitcast <4 x i32> %a to i128
163  %bb = bitcast <4 x i32> %b to i128
164  %c = shl i128 %aa, %bb
165  %cc = bitcast i128 %c to <4 x i32>
166  ret <4 x i32> %cc
167}
168
[4278]169{% for sh in LongShiftAmount128 %}
170define <4 x i32> @long_shift_left_{{sh}}(<4 x i32> %a) {
171entry:
172  %aa = bitcast <4 x i32> %a to i128
173  %c = shl i128 %aa, {{sh}}
174  %cc = bitcast i128 %c to <4 x i32>
175  ret <4 x i32> %cc
176}
177{% endfor %}
178 
179 
[4296]180{% for sh in LongShiftAmount128 %}
181define <4 x i32> @long_shift_right_{{sh}}(<4 x i32> %a) {
182entry:
183  %aa = bitcast <4 x i32> %a to i128
184  %c = lshr i128 %aa, {{sh}}
185  %cc = bitcast i128 %c to <4 x i32>
186  ret <4 x i32> %cc
187}
188{% endfor %}
189
190{% for sh in LongShiftAmount128 %}
191define <4 x i32> @long_shift_dslli_{{sh}}(<4 x i32> %a, <4 x i32> %b) {
192entry:
193  %aa = bitcast <4 x i32> %a to i128
194  %bb = bitcast <4 x i32> %b to i128
195
196  %c = shl i128 %aa, {{sh}}
197  %d = lshr i128 %bb, {{128 - sh}}
198  %e = or i128 %c, %d
199
200  %cc = bitcast i128 %e to <4 x i32>
201  ret <4 x i32> %cc
202}
203{% endfor %}
Note: See TracBrowser for help on using the repository browser.