1 | \section{SIMD Scalability}\label{sec:AVX2} |
---|
2 | |
---|
3 | |
---|
4 | |
---|
5 | |
---|
6 | \begin{figure} |
---|
7 | \begin{center} |
---|
8 | \begin{tikzpicture} |
---|
9 | \begin{axis}[ |
---|
10 | xtick=data, |
---|
11 | ylabel=AVX Speedup, |
---|
12 | xticklabels={@,Date,Email,URIorEmail,xquote}, |
---|
13 | tick label style={font=\tiny}, |
---|
14 | enlarge x limits=0.15, |
---|
15 | enlarge y limits={0.15, upper}, |
---|
16 | ymin=0, |
---|
17 | legend style={at={(0.5,-0.15)}, |
---|
18 | anchor=north,legend columns=-1}, |
---|
19 | ybar, |
---|
20 | bar width=7pt, |
---|
21 | ] |
---|
22 | \addplot[fill=black] |
---|
23 | file {data/avxcycles1.dat}; |
---|
24 | \addplot[fill=gray] |
---|
25 | file {data/avxcycles2.dat}; |
---|
26 | \addplot[fill=white] |
---|
27 | file {data/avxcycles3.dat}; |
---|
28 | |
---|
29 | \legend{Bitstreams,NRGrep,Grep,Annot} |
---|
30 | \end{axis} |
---|
31 | \end{tikzpicture} |
---|
32 | \end{center} |
---|
33 | \caption{AVX Speedup} |
---|
34 | \end{figure} |
---|
35 | |
---|
36 | \begin{figure} |
---|
37 | \begin{center} |
---|
38 | \begin{tikzpicture} |
---|
39 | \begin{axis}[ |
---|
40 | xtick=data, |
---|
41 | ylabel=AVX Instruction Reduction, |
---|
42 | xticklabels={@,Date,Email,URIorEmail,xquote}, |
---|
43 | tick label style={font=\tiny}, |
---|
44 | enlarge x limits=0.15, |
---|
45 | enlarge y limits={0.15, upper}, |
---|
46 | ymin=0, |
---|
47 | legend style={at={(0.5,-0.15)}, |
---|
48 | anchor=north,legend columns=-1}, |
---|
49 | ybar, |
---|
50 | bar width=7pt, |
---|
51 | ] |
---|
52 | \addplot[fill=black] |
---|
53 | file {data/avxinstructions1.dat}; |
---|
54 | \addplot[fill=gray] |
---|
55 | file {data/avxinstructions2.dat}; |
---|
56 | \addplot[fill=white] |
---|
57 | file {data/avxinstructions3.dat}; |
---|
58 | |
---|
59 | \legend{Bitstreams,NRGrep,Grep,Annot} |
---|
60 | \end{axis} |
---|
61 | \end{tikzpicture} |
---|
62 | \end{center} |
---|
63 | \caption{Instruction Reduction} |
---|
64 | \end{figure} |
---|
65 | |
---|
66 | \begin{figure} |
---|
67 | \begin{center} |
---|
68 | \begin{tikzpicture} |
---|
69 | \begin{axis}[ |
---|
70 | xtick=data, |
---|
71 | ylabel=Change in Instructions per Cycle, |
---|
72 | xticklabels={@,Date,Email,URIorEmail,xquote}, |
---|
73 | tick label style={font=\tiny}, |
---|
74 | enlarge x limits=0.15, |
---|
75 | enlarge y limits={0.15, upper}, |
---|
76 | ymin=0, |
---|
77 | legend style={at={(0.5,-0.15)}, |
---|
78 | anchor=north,legend columns=-1}, |
---|
79 | ybar, |
---|
80 | bar width=7pt, |
---|
81 | ] |
---|
82 | \addplot[fill=black] |
---|
83 | file {data/avxipc1.dat}; |
---|
84 | \addplot[fill=gray] |
---|
85 | file {data/avxipc2.dat}; |
---|
86 | \addplot[fill=white] |
---|
87 | file {data/avxipc3.dat}; |
---|
88 | |
---|
89 | |
---|
90 | |
---|
91 | \legend{Bitstreams,NRGrep,Grep,Annot} |
---|
92 | \end{axis} |
---|
93 | \end{tikzpicture} |
---|
94 | \end{center} |
---|
95 | \caption{Change in Instructions Per Cycle With AVX} |
---|
96 | \end{figure} |
---|
97 | |
---|
98 | |
---|
99 | |
---|
100 | |
---|
101 | \subsection{AVX Stream Addition} |
---|
102 | \begin{figure*}[tbh] |
---|
103 | \begin{center} |
---|
104 | \begin{verbatim} |
---|
105 | void add_ci_co(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum) { |
---|
106 | bitblock256_t all_ones = simd256<1>::constant<1>(); |
---|
107 | bitblock256_t gen = simd_and(x, y); |
---|
108 | bitblock256_t prop = simd_xor(x, y); |
---|
109 | bitblock256_t partial_sum = simd256<64>::add(x, y); |
---|
110 | bitblock256_t carry = simd_or(gen, simd_andc(prop, partial_sum)); |
---|
111 | bitblock256_t bubble = simd256<64>::eq(partial_sum, all_ones); |
---|
112 | uint64_t carry_mask = hsimd256<64>::signmask(carry) * 2 + convert(carry_in); |
---|
113 | uint64_t bubble_mask = hsimd256<64>::signmask(bubble); |
---|
114 | uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask; |
---|
115 | uint64_t increments = carry_scan_thru_bubbles | (carry_scan_thru_bubbles - carry_mask); |
---|
116 | carry_out = convert(increments >> 4); |
---|
117 | uint64_t spread = 0x0000200040008001 * increments & 0x0001000100010001; |
---|
118 | sum = simd256<64>::add(partial_sum, _mm256_cvtepu16_epi64(avx_select_lo128(convert(spread)))); |
---|
119 | } |
---|
120 | |
---|
121 | \end{verbatim} |
---|
122 | |
---|
123 | \end{center} |
---|
124 | \caption{AVX2 256-bit Addition} |
---|
125 | \label{fig:AVX2add} |
---|
126 | \end{figure*} |
---|
127 | |
---|