Changeset 3438 for trunk/lib/bitblock256.hpp
 Timestamp:
 Sep 4, 2013, 3:40:38 PM (6 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/bitblock256.hpp
r2719 r3438 40 40 41 41 static IDISA_ALWAYS_INLINE bitblock256_t convert (uint64_t s); 42 static IDISA_ALWAYS_INLINE bitblock128_t convert_128 (uint64_t s); 42 43 static IDISA_ALWAYS_INLINE uint64_t convert (bitblock256_t v); 43 44 … … 89 90 90 91 #ifdef ADCMAGIC 92 93 #ifdef AVX2 91 94 static inline void adc(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum) { 92 bitblock128_t all_ones = simd128<1>::constant<1>(); 93 //bitblock256_t gen = simd_and(x, y); 94 //bitblock256_t prop = simd_xor(x, y); 95 bitblock128_t x0 = avx_select_lo128(x); 96 bitblock128_t x1 = avx_select_hi128(x); 97 bitblock128_t y0 = avx_select_lo128(y); 98 bitblock128_t y1 = avx_select_hi128(y); 95 bitblock256_t all_ones = simd256<1>::constant<1>(); 96 bitblock256_t gen = simd_and(x, y); 97 bitblock256_t prop = simd_xor(x, y); 98 bitblock256_t partial_sum = simd256<64>::add(x, y); 99 bitblock256_t carry = simd_or(gen, simd_andc(prop, partial_sum)); 100 bitblock256_t bubble = simd256<64>::eq(partial_sum, all_ones); 101 uint64_t carry_mask = hsimd256<64>::signmask(carry) * 2 + convert(carry_in); 102 uint64_t bubble_mask = hsimd256<64>::signmask(bubble); 103 uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask; 104 uint64_t increments = carry_scan_thru_bubbles  (carry_scan_thru_bubbles  carry_mask); 105 carry_out = convert(increments >> 4); 106 uint64_t spread = 0x0000200040008001 * increments & 0x0001000100010001; 107 sum = simd256<64>::add(partial_sum, _mm256_cvtepu8_epi64(convert_128(spread))); 108 } 109 #else 110 static inline void adc(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum) { 111 bitblock128_t all_ones = simd128<1>::constant<1>(); 112 //bitblock256_t gen = simd_and(x, y); 113 //bitblock256_t prop = simd_xor(x, y); 114 bitblock128_t x0 = avx_select_lo128(x); 115 bitblock128_t x1 = avx_select_hi128(x); 116 bitblock128_t y0 = avx_select_lo128(y); 117 bitblock128_t y1 = avx_select_hi128(y); 99 118 bitblock128_t sum0 = simd128<64>::add(x0, y0); 100 119 bitblock128_t sum1 = simd128<64>::add(x1, y1); 101 120 //bitblock256_t icarry = simd_or(gen, simd_andc(prop, avx_general_combine256(sum1, sum0))); 102 121 bitblock128_t icarry0 = simd_or(simd_and(x0, y0), simd_andc(simd_or(x0, y0), sum0)); 103 122 bitblock128_t icarry1 = simd_or(simd_and(x1, y1), simd_andc(simd_or(x1, y1), sum1)); … … 111 130 uint64_t bubble_mask = _mm256_movemask_pd((__m256d) bubble); 112 131 //uint64_t bubble_mask = hsimd128<64>::signmask(bubble1) * 4 + hsimd128<64>::signmask(bubble0); 113 114 132 //uint64_t bubble_mask = hsimd128<32>::signmask(bubble); 133 uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask; 115 134 uint64_t increments = carry_scan_thru_bubbles  (carry_scan_thru_bubbles  carry_mask); 116 135 carry_out = convert(increments >> 4); … … 121 140 sum = avx_general_combine256(simd128<64>::add(sum1, inc_64_1), simd128<64>::add(sum0, inc_64_0)); 122 141 } 142 #endif 143 123 144 #endif 124 145 … … 178 199 IDISA_ALWAYS_INLINE bitblock256_t convert(uint64_t s) 179 200 { 180 ubitblock b = {b._256 = simd256<128>::constant<0>()}; // = {0}; 181 b._64[0] = s; 182 return b._256; 201 ubitblock b = {b._256 = simd256<128>::constant<0>()}; // = {0}; 202 b._64[0] = s; 203 return b._256; 204 } 205 206 IDISA_ALWAYS_INLINE bitblock128_t convert_128(uint64_t s) 207 { 208 ubitblock b = {b._256 = simd256<128>::constant<0>()}; // = {0}; 209 b._64[0] = s; 210 return b._128[0]; 183 211 } 184 212 185 213 IDISA_ALWAYS_INLINE uint64_t convert (bitblock256_t v) 186 214 { 187 215 return (uint64_t) mvmd256<64>::extract<0>(v); 188 216 } 189 217
Note: See TracChangeset
for help on using the changeset viewer.