Changeset 2199
 Timestamp:
 May 30, 2012, 5:41:02 PM (7 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/bitblock256.hpp
r2196 r2199 67 67 } 68 68 69 #ifndef ADCMAGIC 69 70 IDISA_ALWAYS_INLINE void adc(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum) 70 71 { … … 86 87 carry_out = _mm256_castps128_ps256((__m128) c2); 87 88 } 89 #endif 90 91 #ifdef ADCMAGIC 92 static inline void adc(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum) { 93 bitblock128_t all_ones = simd128<1>::constant<1>(); 94 //bitblock256_t gen = simd_and(x, y); 95 //bitblock256_t prop = simd_xor(x, y); 96 bitblock128_t x0 = avx_select_lo128(x); 97 bitblock128_t x1 = avx_select_hi128(x); 98 bitblock128_t y0 = avx_select_lo128(y); 99 bitblock128_t y1 = avx_select_hi128(y); 100 bitblock128_t sum0 = simd128<64>::add(x0, y0); 101 bitblock128_t sum1 = simd128<64>::add(x1, y1); 102 //bitblock256_t icarry = simd_or(gen, simd_andc(prop, avx_general_combine256(sum1, sum0))); 103 bitblock128_t icarry0 = simd_or(simd_and(x0, y0), simd_andc(simd_or(x0, y0), sum0)); 104 bitblock128_t icarry1 = simd_or(simd_and(x1, y1), simd_andc(simd_or(x1, y1), sum1)); 105 // A carry may bubble through a field if it is all ones. 106 bitblock128_t bubble0 = simd128<64>::eq(sum0, all_ones); 107 bitblock128_t bubble1 = simd128<64>::eq(sum1, all_ones); 108 //bitblock256_t bubble = avx_general_combine256(bubble1, bubble0); 109 //uint64_t carry_mask = _mm256_movemask_pd((__m256d) icarry) * 2 + convert(carry_in); 110 uint64_t carry_mask = hsimd128<64>::signmask(icarry1) * 8 + hsimd128<64>::signmask(icarry0) * 2 + convert(carry_in); 111 //uint64_t bubble_mask = _mm256_movemask_pd((__m256d) bubble); 112 uint64_t bubble_mask = hsimd128<64>::signmask(bubble1) * 4 + hsimd128<64>::signmask(bubble0); 113 uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask; 114 uint64_t increments = carry_scan_thru_bubbles  (carry_scan_thru_bubbles  carry_mask); 115 carry_out = convert(increments >> 4); 116 uint64_t spread = 0x0000200040008001 * increments & 0x0001000100010001; 117 bitblock128_t inc_32 = _mm_cvtepu16_epi32(_mm_cvtsi64_si128(spread)); 118 bitblock128_t inc_64_0 = esimd128<32>::mergel(simd128<1>::constant<0>(), inc_32); 119 bitblock128_t inc_64_1 = esimd128<32>::mergeh(simd128<1>::constant<0>(), inc_32); 120 sum = avx_general_combine256(simd128<64>::add(sum1, inc_64_1), simd128<64>::add(sum0, inc_64_0)); 121 } 122 #endif 123 124 125 126 88 127 89 128 IDISA_ALWAYS_INLINE void sbb128(bitblock128_t x, bitblock128_t y, bitblock128_t borrow_in, bitblock128_t & borrow_out, bitblock128_t & difference)
Note: See TracChangeset
for help on using the changeset viewer.