source: trunk/lib_ir/gen/llc_func.pytemplate.ll @ 4143

Last change on this file since 4143 was 4143, checked in by linmengl, 5 years ago

add test code for add_with_carry_ir, detect a bug on AVX2.

File size: 3.6 KB
Line 
1; v128i1 starts here
2{% for name in FunctionNamesI1 %}
3define <128 x i1> @{{ name.c }}(<128 x i1> %a, <128 x i1> %b) {
4entry:
5  %c = {{ name.op }} <128 x i1> %a, %b
6  ret <128 x i1> %c
7}
8{% endfor %}
9
10; v64i2 starts here
11{% for name in FunctionNamesI2 %}
12define <64 x i2> @{{ name.c }}(<64 x i2> %a, <64 x i2> %b) {
13entry:
14  %c = {{ name.op }} <64 x i2> %a, %b
15{% if "icmp" in name.op %}
16  %d = sext <64 x i1> %c to <64 x i2>
17  ret <64 x i2> %d
18{% else %}
19  ret <64 x i2> %c
20{% endif %}
21}
22{% endfor %}
23
24define <64 x i2> @insertelement_2(<64 x i2> %a, i8 %elt, i32 %idx) {
25entry:
26  %e = trunc i8 %elt to i2
27  %c = insertelement <64 x i2> %a, i2 %e, i32 %idx
28  ret <64 x i2> %c
29}
30
31define i8 @extractelement_2(<64 x i2> %a, i32 %idx) {
32entry:
33  %c = extractelement <64 x i2> %a, i32 %idx
34  %d = zext i2 %c to i8
35  ret i8 %d
36}
37
38; v32i4 starts here
39{% for name in FunctionNamesI4 %}
40define <32 x i4> @{{name.c}}(<32 x i4> %a, <32 x i4> %b) {
41entry:
42  %c = {{ name.op }} <32 x i4> %a, %b
43  {% if "icmp" in name.op %}
44  %d = sext <32 x i1> %c to <32 x i4>
45  ret <32 x i4> %d
46  {% else %}
47  ret <32 x i4> %c
48  {% endif %}
49}
50{% endfor %}
51
52define <32 x i4> @insertelement_4(<32 x i4> %a, i8 %elt, i32 %idx) {
53entry:
54  %e = trunc i8 %elt to i4
55  %c = insertelement <32 x i4> %a, i4 %e, i32 %idx
56  ret <32 x i4> %c
57}
58
59define i8 @extractelement_4(<32 x i4> %a, i32 %idx) {
60entry:
61  %c = extractelement <32 x i4> %a, i32 %idx
62  %d = zext i4 %c to i8
63  ret i8 %d
64}
65
66; v16i8 starts here
67{% for name in FunctionNamesI8 %}
68define <16 x i8> @{{name.c}}(<16 x i8> %a, <16 x i8> %b) {
69entry:
70  %c = {{ name.op }} <16 x i8> %a, %b
71  {% if "icmp" in name.op %}
72  %d = sext <16 x i1> %c to <16 x i8>
73  ret <16 x i8> %d
74  {% else %}
75  ret <16 x i8> %c
76  {% endif %}
77}
78{% endfor %}
79
80
81; long stream add/shift
82declare {i128, i1} @llvm.uadd.with.overflow.i128(i128 %a, i128 %b)
83
84define void @uadd_with_overflow_i128(<2 x i64> %a, <2 x i64> %b, <2 x i64>* %P, i8* %B) {
85entry:
86  %aa = bitcast <2 x i64> %a to i128
87  %bb = bitcast <2 x i64> %b to i128
88
89  %res = call {i128, i1} @llvm.uadd.with.overflow.i128(i128 %aa, i128 %bb)
90  %sum = extractvalue {i128, i1} %res, 0
91  %obit = extractvalue {i128, i1} %res, 1
92  %obit8 = zext i1 %obit to i8
93
94  %r = bitcast i128 %sum to <2 x i64>
95
96  store <2 x i64> %r, <2 x i64>* %P
97  store i8 %obit8, i8* %B
98
99  ret void
100}
101
102define void @add_with_carry_ir(<2 x i64> %a, <2 x i64> %b, <2 x i64> %carry_in, <2 x i64>* %carry_out, <2 x i64>* %sum) {
103entry:
104  %aa = bitcast <2 x i64> %a to i128
105  %bb = bitcast <2 x i64> %b to i128
106  %cin = bitcast <2 x i64> %carry_in to i128
107
108  %res1 = call {i128, i1} @llvm.uadd.with.overflow.i128(i128 %aa, i128 %bb)
109  %sum1 = extractvalue {i128, i1} %res1, 0
110  %obit1 = extractvalue {i128, i1} %res1, 1
111
112  %res2 = call {i128, i1} @llvm.uadd.with.overflow.i128(i128 %sum1, i128 %cin)
113  %sum2 = extractvalue {i128, i1} %res2, 0
114  %obit2 = extractvalue {i128, i1} %res2, 1
115
116  %ret_sum = bitcast i128 %sum2 to <2 x i64>
117  %obit = or i1 %obit1, %obit2
118  %obit_64 = zext i1 %obit to i64
119  %obit_2x64 = insertelement <2 x i64> zeroinitializer, i64 %obit_64, i32 0
120
121  store <2 x i64> %ret_sum, <2 x i64>* %sum
122  store <2 x i64> %obit_2x64, <2 x i64>* %carry_out
123
124  ret void
125}
126
127define <4 x i32> @add_128(<4 x i32> %a, <4 x i32> %b) {
128entry:
129  %aa = bitcast <4 x i32> %a to i128
130  %bb = bitcast <4 x i32> %b to i128
131  %c = add i128 %aa, %bb
132  %cc = bitcast i128 %c to <4 x i32>
133  ret <4 x i32> %cc
134}
135
136define <4 x i32> @sll_128(<4 x i32> %a, <4 x i32> %b) {
137entry:
138  %aa = bitcast <4 x i32> %a to i128
139  %bb = bitcast <4 x i32> %b to i128
140  %c = shl i128 %aa, %bb
141  %cc = bitcast i128 %c to <4 x i32>
142  ret <4 x i32> %cc
143}
144
Note: See TracBrowser for help on using the repository browser.