source: docs/Working/re/avx2.tex @ 3498

Last change on this file since 3498 was 3498, checked in by bhull, 6 years ago

Chart changes.

File size: 3.0 KB
RevLine 
[3498]1\section{SIMD Scalability}\label{sec:AVX2}
2
3
4
5
6\begin{figure}
7\begin{center}
8\begin{tikzpicture}
9\begin{axis}[
10xtick=data,
11ylabel=AVX Speedup,
12xticklabels={@,Date,Email,URIorEmail,xquote},
13tick label style={font=\tiny},
14enlarge x limits=0.15,
15enlarge y limits={0.15, upper},
16ymin=0,
17legend style={at={(0.5,-0.15)},
18anchor=north,legend columns=-1},
19ybar,
20bar width=7pt,
21]
22\addplot[fill=black]
23file {data/avxcycles1.dat};
24\addplot[fill=gray]
25file {data/avxcycles2.dat};
26\addplot[fill=white]
27file {data/avxcycles3.dat};
28
29\legend{Bitstreams,NRGrep,Grep,Annot}
30\end{axis}
31\end{tikzpicture}
32\end{center}
33\caption{AVX Speedup}
34\end{figure}
35
36\begin{figure}
37\begin{center}
38\begin{tikzpicture}
39\begin{axis}[
40xtick=data,
41ylabel=AVX Instruction Reduction,
42xticklabels={@,Date,Email,URIorEmail,xquote},
43tick label style={font=\tiny},
44enlarge x limits=0.15,
45enlarge y limits={0.15, upper},
46ymin=0,
47legend style={at={(0.5,-0.15)},
48anchor=north,legend columns=-1},
49ybar,
50bar width=7pt,
51]
52\addplot[fill=black]
53file {data/avxinstructions1.dat};
54\addplot[fill=gray]
55file {data/avxinstructions2.dat};
56\addplot[fill=white]
57file {data/avxinstructions3.dat};
58
59\legend{Bitstreams,NRGrep,Grep,Annot}
60\end{axis}
61\end{tikzpicture}
62\end{center}
63\caption{Instruction Reduction}
64\end{figure}
65
66\begin{figure}
67\begin{center}
68\begin{tikzpicture}
69\begin{axis}[
70xtick=data,
71ylabel=Change in Instructions per Cycle,
72xticklabels={@,Date,Email,URIorEmail,xquote},
73tick label style={font=\tiny},
74enlarge x limits=0.15,
75enlarge y limits={0.15, upper},
76ymin=0,
77legend style={at={(0.5,-0.15)},
78anchor=north,legend columns=-1},
79ybar,
80bar width=7pt,
81]
82\addplot[fill=black]
83file {data/avxipc1.dat};
84\addplot[fill=gray]
85file {data/avxipc2.dat};
86\addplot[fill=white]
87file {data/avxipc3.dat};
88
89
90
91\legend{Bitstreams,NRGrep,Grep,Annot}
92\end{axis}
93\end{tikzpicture}
94\end{center}
95\caption{Change in Instructions Per Cycle With AVX}
96\end{figure}
97
98
99
100
101\subsection{AVX Stream Addition}
102 \begin{figure*}[tbh]
103\begin{center}
104\begin{verbatim}
105void add_ci_co(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum) {
106  bitblock256_t all_ones = simd256<1>::constant<1>();
107  bitblock256_t gen = simd_and(x, y);
108  bitblock256_t prop = simd_xor(x, y);
109  bitblock256_t partial_sum = simd256<64>::add(x, y);
110  bitblock256_t carry = simd_or(gen, simd_andc(prop, partial_sum));
111  bitblock256_t bubble = simd256<64>::eq(partial_sum, all_ones);
112  uint64_t carry_mask = hsimd256<64>::signmask(carry) * 2 + convert(carry_in);
113  uint64_t bubble_mask = hsimd256<64>::signmask(bubble);
114  uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask;
115  uint64_t increments = carry_scan_thru_bubbles | (carry_scan_thru_bubbles - carry_mask);
116  carry_out = convert(increments >> 4);
117  uint64_t spread = 0x0000200040008001 * increments & 0x0001000100010001;
118  sum = simd256<64>::add(partial_sum, _mm256_cvtepu16_epi64(avx_select_lo128(convert(spread))));
119}
120
121\end{verbatim}
122
123\end{center}
124\caption{AVX2 256-bit Addition}
125\label{fig:AVX2add}
126\end{figure*}
127
Note: See TracBrowser for help on using the repository browser.