Ignore:
Timestamp:
Sep 13, 2013, 4:57:30 PM (5 years ago)
Author:
cameron
Message:

Long-stream addition; clean-out old version.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • docs/Working/re/ppopp-re.tex

    r3472 r3473  
    409409\section{SIMD Scalability}\label{sec:AVX2}
    410410\subsection{AVX Stream Addition}
    411   We use MatchStar for carry propagation.
     411 \begin{figure*}[tbh]
     412\begin{center}
     413\begin{code}
     414static IDISA_ALWAYS_INLINE void add_ci_co(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum) {
     415  bitblock256_t all_ones = simd256<1>::constant<1>();
     416  bitblock256_t gen = simd_and(x, y);
     417  bitblock256_t prop = simd_xor(x, y);
     418  bitblock256_t partial_sum = simd256<64>::add(x, y);
     419  bitblock256_t carry = simd_or(gen, simd_andc(prop, partial_sum));
     420  bitblock256_t bubble = simd256<64>::eq(partial_sum, all_ones);
     421  uint64_t carry_mask = hsimd256<64>::signmask(carry) * 2 + convert(carry_in);
     422  uint64_t bubble_mask = hsimd256<64>::signmask(bubble);
     423  uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask;
     424  uint64_t increments = carry_scan_thru_bubbles | (carry_scan_thru_bubbles - carry_mask);
     425  carry_out = convert(increments >> 4);
     426  uint64_t spread = 0x0000200040008001 * increments & 0x0001000100010001;
     427  sum = simd256<64>::add(partial_sum, _mm256_cvtepu16_epi64(avx_select_lo128(convert(spread))));
     428}
     429
     430\end{code}
     431
     432\end{center}
     433\caption{Match Star}
     434\label{fig:matchstar1}
     435\end{figure*}
    412436
    413437\section{GPU Implementation}\label{sec:GPU}
Note: See TracChangeset for help on using the changeset viewer.