1 | ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s |
---|
2 | ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=SSE3 |
---|
3 | ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX2 |
---|
4 | |
---|
5 | target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" |
---|
6 | target triple = "x86_64-apple-macosx10.8.0" |
---|
7 | |
---|
8 | define i32 @add(i32 %arg) { |
---|
9 | ;CHECK: cost of 1 {{.*}} add |
---|
10 | %A = add <4 x i32> undef, undef |
---|
11 | ;CHECK: cost of 4 {{.*}} add |
---|
12 | %B = add <8 x i32> undef, undef |
---|
13 | ;CHECK: cost of 1 {{.*}} add |
---|
14 | %C = add <2 x i64> undef, undef |
---|
15 | ;CHECK: cost of 4 {{.*}} add |
---|
16 | %D = add <4 x i64> undef, undef |
---|
17 | ;CHECK: cost of 8 {{.*}} add |
---|
18 | %E = add <8 x i64> undef, undef |
---|
19 | ;CHECK: cost of 0 {{.*}} ret |
---|
20 | ret i32 undef |
---|
21 | } |
---|
22 | |
---|
23 | |
---|
24 | define i32 @xor(i32 %arg) { |
---|
25 | ;CHECK: cost of 1 {{.*}} xor |
---|
26 | %A = xor <4 x i32> undef, undef |
---|
27 | ;CHECK: cost of 1 {{.*}} xor |
---|
28 | %B = xor <8 x i32> undef, undef |
---|
29 | ;CHECK: cost of 1 {{.*}} xor |
---|
30 | %C = xor <2 x i64> undef, undef |
---|
31 | ;CHECK: cost of 1 {{.*}} xor |
---|
32 | %D = xor <4 x i64> undef, undef |
---|
33 | ;CHECK: cost of 0 {{.*}} ret |
---|
34 | ret i32 undef |
---|
35 | } |
---|
36 | |
---|
37 | ; CHECK: mul |
---|
38 | define void @mul() { |
---|
39 | ; A <2 x i32> gets expanded to a <2 x i64> vector. |
---|
40 | ; A <2 x i64> vector multiply is implemented using |
---|
41 | ; 3 PMULUDQ and 2 PADDS and 4 shifts. |
---|
42 | ;CHECK: cost of 9 {{.*}} mul |
---|
43 | %A0 = mul <2 x i32> undef, undef |
---|
44 | ;CHECK: cost of 9 {{.*}} mul |
---|
45 | %A1 = mul <2 x i64> undef, undef |
---|
46 | ;CHECK: cost of 18 {{.*}} mul |
---|
47 | %A2 = mul <4 x i64> undef, undef |
---|
48 | ret void |
---|
49 | } |
---|
50 | |
---|
51 | ; SSE3: sse3mull |
---|
52 | define void @sse3mull() { |
---|
53 | ; SSE3: cost of 6 {{.*}} mul |
---|
54 | %A0 = mul <4 x i32> undef, undef |
---|
55 | ret void |
---|
56 | ; SSE3: avx2mull |
---|
57 | } |
---|
58 | |
---|
59 | ; AVX2: avx2mull |
---|
60 | define void @avx2mull() { |
---|
61 | ; AVX2: cost of 9 {{.*}} mul |
---|
62 | %A0 = mul <4 x i64> undef, undef |
---|
63 | ret void |
---|
64 | ; AVX2: fmul |
---|
65 | } |
---|
66 | |
---|
67 | ; CHECK: fmul |
---|
68 | define i32 @fmul(i32 %arg) { |
---|
69 | ;CHECK: cost of 2 {{.*}} fmul |
---|
70 | %A = fmul <4 x float> undef, undef |
---|
71 | ;CHECK: cost of 2 {{.*}} fmul |
---|
72 | %B = fmul <8 x float> undef, undef |
---|
73 | ret i32 undef |
---|
74 | } |
---|
75 | |
---|
76 | ; AVX: shift |
---|
77 | ; AVX2: shift |
---|
78 | define void @shift() { |
---|
79 | ; AVX: cost of 2 {{.*}} shl |
---|
80 | ; AVX2: cost of 1 {{.*}} shl |
---|
81 | %A0 = shl <4 x i32> undef, undef |
---|
82 | ; AVX: cost of 2 {{.*}} shl |
---|
83 | ; AVX2: cost of 1 {{.*}} shl |
---|
84 | %A1 = shl <2 x i64> undef, undef |
---|
85 | |
---|
86 | ; AVX: cost of 2 {{.*}} lshr |
---|
87 | ; AVX2: cost of 1 {{.*}} lshr |
---|
88 | %B0 = lshr <4 x i32> undef, undef |
---|
89 | ; AVX: cost of 2 {{.*}} lshr |
---|
90 | ; AVX2: cost of 1 {{.*}} lshr |
---|
91 | %B1 = lshr <2 x i64> undef, undef |
---|
92 | |
---|
93 | ; AVX: cost of 2 {{.*}} ashr |
---|
94 | ; AVX2: cost of 1 {{.*}} ashr |
---|
95 | %C0 = ashr <4 x i32> undef, undef |
---|
96 | ; AVX: cost of 6 {{.*}} ashr |
---|
97 | ; AVX2: cost of 4 {{.*}} ashr |
---|
98 | %C1 = ashr <2 x i64> undef, undef |
---|
99 | |
---|
100 | ret void |
---|
101 | } |
---|
102 | |
---|
103 | ; AVX: avx2shift |
---|
104 | ; AVX2: avx2shift |
---|
105 | define void @avx2shift() { |
---|
106 | ; AVX: cost of 2 {{.*}} shl |
---|
107 | ; AVX2: cost of 1 {{.*}} shl |
---|
108 | %A0 = shl <8 x i32> undef, undef |
---|
109 | ; AVX: cost of 2 {{.*}} shl |
---|
110 | ; AVX2: cost of 1 {{.*}} shl |
---|
111 | %A1 = shl <4 x i64> undef, undef |
---|
112 | |
---|
113 | ; AVX: cost of 2 {{.*}} lshr |
---|
114 | ; AVX2: cost of 1 {{.*}} lshr |
---|
115 | %B0 = lshr <8 x i32> undef, undef |
---|
116 | ; AVX: cost of 2 {{.*}} lshr |
---|
117 | ; AVX2: cost of 1 {{.*}} lshr |
---|
118 | %B1 = lshr <4 x i64> undef, undef |
---|
119 | |
---|
120 | ; AVX: cost of 2 {{.*}} ashr |
---|
121 | ; AVX2: cost of 1 {{.*}} ashr |
---|
122 | %C0 = ashr <8 x i32> undef, undef |
---|
123 | ; AVX: cost of 12 {{.*}} ashr |
---|
124 | ; AVX2: cost of 4 {{.*}} ashr |
---|
125 | %C1 = ashr <4 x i64> undef, undef |
---|
126 | |
---|
127 | ret void |
---|
128 | } |
---|