1 | ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s |
2 | ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=SSE3 |
3 | ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX2 |
4 | |
5 | target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" |
6 | target triple = "x86_64-apple-macosx10.8.0" |
7 | |
8 | define i32 @add(i32 %arg) { |
9 | ;CHECK: cost of 1 {{.*}} add |
10 | %A = add <4 x i32> undef, undef |
11 | ;CHECK: cost of 4 {{.*}} add |
12 | %B = add <8 x i32> undef, undef |
13 | ;CHECK: cost of 1 {{.*}} add |
14 | %C = add <2 x i64> undef, undef |
15 | ;CHECK: cost of 4 {{.*}} add |
16 | %D = add <4 x i64> undef, undef |
17 | ;CHECK: cost of 8 {{.*}} add |
18 | %E = add <8 x i64> undef, undef |
19 | ;CHECK: cost of 0 {{.*}} ret |
20 | ret i32 undef |
21 | } |
22 | |
23 | |
24 | define i32 @xor(i32 %arg) { |
25 | ;CHECK: cost of 1 {{.*}} xor |
26 | %A = xor <4 x i32> undef, undef |
27 | ;CHECK: cost of 1 {{.*}} xor |
28 | %B = xor <8 x i32> undef, undef |
29 | ;CHECK: cost of 1 {{.*}} xor |
30 | %C = xor <2 x i64> undef, undef |
31 | ;CHECK: cost of 1 {{.*}} xor |
32 | %D = xor <4 x i64> undef, undef |
33 | ;CHECK: cost of 0 {{.*}} ret |
34 | ret i32 undef |
35 | } |
36 | |
37 | ; CHECK: mul |
38 | define void @mul() { |
39 | ; A <2 x i32> gets expanded to a <2 x i64> vector. |
40 | ; A <2 x i64> vector multiply is implemented using |
41 | ; 3 PMULUDQ and 2 PADDS and 4 shifts. |
42 | ;CHECK: cost of 9 {{.*}} mul |
43 | %A0 = mul <2 x i32> undef, undef |
44 | ;CHECK: cost of 9 {{.*}} mul |
45 | %A1 = mul <2 x i64> undef, undef |
46 | ;CHECK: cost of 18 {{.*}} mul |
47 | %A2 = mul <4 x i64> undef, undef |
48 | ret void |
49 | } |
50 | |
51 | ; SSE3: sse3mull |
52 | define void @sse3mull() { |
53 | ; SSE3: cost of 6 {{.*}} mul |
54 | %A0 = mul <4 x i32> undef, undef |
55 | ret void |
56 | ; SSE3: avx2mull |
57 | } |
58 | |
59 | ; AVX2: avx2mull |
60 | define void @avx2mull() { |
61 | ; AVX2: cost of 9 {{.*}} mul |
62 | %A0 = mul <4 x i64> undef, undef |
63 | ret void |
64 | ; AVX2: fmul |
65 | } |
66 | |
67 | ; CHECK: fmul |
68 | define i32 @fmul(i32 %arg) { |
69 | ;CHECK: cost of 2 {{.*}} fmul |
70 | %A = fmul <4 x float> undef, undef |
71 | ;CHECK: cost of 2 {{.*}} fmul |
72 | %B = fmul <8 x float> undef, undef |
73 | ret i32 undef |
74 | } |
75 | |
76 | ; AVX: shift |
77 | ; AVX2: shift |
78 | define void @shift() { |
79 | ; AVX: cost of 2 {{.*}} shl |
80 | ; AVX2: cost of 1 {{.*}} shl |
81 | %A0 = shl <4 x i32> undef, undef |
82 | ; AVX: cost of 2 {{.*}} shl |
83 | ; AVX2: cost of 1 {{.*}} shl |
84 | %A1 = shl <2 x i64> undef, undef |
85 | |
86 | ; AVX: cost of 2 {{.*}} lshr |
87 | ; AVX2: cost of 1 {{.*}} lshr |
88 | %B0 = lshr <4 x i32> undef, undef |
89 | ; AVX: cost of 2 {{.*}} lshr |
90 | ; AVX2: cost of 1 {{.*}} lshr |
91 | %B1 = lshr <2 x i64> undef, undef |
92 | |
93 | ; AVX: cost of 2 {{.*}} ashr |
94 | ; AVX2: cost of 1 {{.*}} ashr |
95 | %C0 = ashr <4 x i32> undef, undef |
96 | ; AVX: cost of 6 {{.*}} ashr |
97 | ; AVX2: cost of 4 {{.*}} ashr |
98 | %C1 = ashr <2 x i64> undef, undef |
99 | |
100 | ret void |
101 | } |
102 | |
103 | ; AVX: avx2shift |
104 | ; AVX2: avx2shift |
105 | define void @avx2shift() { |
106 | ; AVX: cost of 2 {{.*}} shl |
107 | ; AVX2: cost of 1 {{.*}} shl |
108 | %A0 = shl <8 x i32> undef, undef |
109 | ; AVX: cost of 2 {{.*}} shl |
110 | ; AVX2: cost of 1 {{.*}} shl |
111 | %A1 = shl <4 x i64> undef, undef |
112 | |
113 | ; AVX: cost of 2 {{.*}} lshr |
114 | ; AVX2: cost of 1 {{.*}} lshr |
115 | %B0 = lshr <8 x i32> undef, undef |
116 | ; AVX: cost of 2 {{.*}} lshr |
117 | ; AVX2: cost of 1 {{.*}} lshr |
118 | %B1 = lshr <4 x i64> undef, undef |
119 | |
120 | ; AVX: cost of 2 {{.*}} ashr |
121 | ; AVX2: cost of 1 {{.*}} ashr |
122 | %C0 = ashr <8 x i32> undef, undef |
123 | ; AVX: cost of 12 {{.*}} ashr |
124 | ; AVX2: cost of 4 {{.*}} ashr |
125 | %C1 = ashr <4 x i64> undef, undef |
126 | |
127 | ret void |
128 | } |
