Changeset 3473
 Timestamp:
 Sep 13, 2013, 4:57:30 PM (6 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

docs/Working/re/ppoppre.tex
r3472 r3473 409 409 \section{SIMD Scalability}\label{sec:AVX2} 410 410 \subsection{AVX Stream Addition} 411 We use MatchStar for carry propagation. 411 \begin{figure*}[tbh] 412 \begin{center} 413 \begin{code} 414 static IDISA_ALWAYS_INLINE void add_ci_co(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum) { 415 bitblock256_t all_ones = simd256<1>::constant<1>(); 416 bitblock256_t gen = simd_and(x, y); 417 bitblock256_t prop = simd_xor(x, y); 418 bitblock256_t partial_sum = simd256<64>::add(x, y); 419 bitblock256_t carry = simd_or(gen, simd_andc(prop, partial_sum)); 420 bitblock256_t bubble = simd256<64>::eq(partial_sum, all_ones); 421 uint64_t carry_mask = hsimd256<64>::signmask(carry) * 2 + convert(carry_in); 422 uint64_t bubble_mask = hsimd256<64>::signmask(bubble); 423 uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask; 424 uint64_t increments = carry_scan_thru_bubbles  (carry_scan_thru_bubbles  carry_mask); 425 carry_out = convert(increments >> 4); 426 uint64_t spread = 0x0000200040008001 * increments & 0x0001000100010001; 427 sum = simd256<64>::add(partial_sum, _mm256_cvtepu16_epi64(avx_select_lo128(convert(spread)))); 428 } 429 430 \end{code} 431 432 \end{center} 433 \caption{Match Star} 434 \label{fig:matchstar1} 435 \end{figure*} 412 436 413 437 \section{GPU Implementation}\label{sec:GPU}
Note: See TracChangeset
for help on using the changeset viewer.