 Timestamp:
 Sep 6, 2013, 2:07:23 PM (5 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/bitblock256.hpp
r3438 r3439 46 46 static IDISA_ALWAYS_INLINE carry_t bitblock2carry(bitblock256_t carry) { return carry;} 47 47 48 49 48 static inline void add_ci_co(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum); 49 static inline void sub_bi_bo(bitblock256_t x, bitblock256_t y, carry_t borrow_in, carry_t & borrow_out, bitblock256_t & difference); 50 static IDISA_ALWAYS_INLINE void adv_ci_co(bitblock256_t cursor, bitblock256_t carry_in, bitblock256_t & carry_out, bitblock256_t & rslt); 51 52 53 static inline void add_ci_co(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum) { 54 bitblock256_t all_ones = simd256<1>::constant<1>(); 55 bitblock256_t gen = simd_and(x, y); 56 bitblock256_t prop = simd_xor(x, y); 57 bitblock256_t partial_sum = simd256<64>::add(x, y); 58 bitblock256_t carry = simd_or(gen, simd_andc(prop, partial_sum)); 59 bitblock256_t bubble = simd256<64>::eq(partial_sum, all_ones); 60 uint64_t carry_mask = hsimd256<64>::signmask(carry) * 2 + convert(carry_in); 61 uint64_t bubble_mask = hsimd256<64>::signmask(bubble); 62 uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask; 63 uint64_t increments = carry_scan_thru_bubbles  (carry_scan_thru_bubbles  carry_mask); 64 carry_out = convert(increments >> 4); 65 uint64_t spread = 0x0000200040008001 * increments & 0x0001000100010001; 66 sum = simd256<64>::add(partial_sum, _mm256_cvtepu8_epi64(avx_select_lo128(convert(spread)))); 67 } 68 69 IDISA_ALWAYS_INLINE void sub_bi_bo(bitblock256_t x, bitblock256_t y, carry_t borrow_in, carry_t & borrow_out, bitblock256_t & difference){ 70 bitblock256_t gen = simd_andc(y, x); 71 bitblock256_t prop = simd_not(simd_xor(x, y)); 72 bitblock256_t partial = simd256<128>::sub(simd256<128>::sub(x, y), borrow_in); 73 bitblock256_t b1 = simd256<256>::slli<128>(simd256<128>::srli<127>(simd_or(gen, simd_and(prop, partial)))); 74 difference = simd256<128>::sub(partial, b1); 75 borrow_out = simd_or(gen, simd_and(prop, difference)); 76 77 } 78 79 static IDISA_ALWAYS_INLINE void adv_ci_co(bitblock256_t cursor, bitblock256_t carry_in, bitblock256_t & carry_out, bitblock256_t & rslt){ 80 bitblock256_t shift_out = simd256<64>::srli<63>(cursor); 81 bitblock256_t low_bits = simd_or(mvmd256<64>::slli<1>(shift_out), carry_in); 82 carry_out = cursor; 83 rslt = simd_or(simd256<64>::add(cursor, cursor), low_bits); 84 } 85 86 87 88 #ifdef AVX 50 89 #define avx_select_lo128(x) \ 51 90 ((__m128i) _mm256_castps256_ps128(x)) … … 196 235 carry_out = _mm256_castps128_ps256((__m128)carry2); 197 236 } 237 #endif 198 238 199 239 IDISA_ALWAYS_INLINE bitblock256_t convert(uint64_t s)
Note: See TracChangeset
for help on using the changeset viewer.