Changeset 3439 for trunk


Ignore:
Timestamp:
Sep 6, 2013, 2:07:23 PM (4 years ago)
Author:
cameron
Message:

Updates for AVX2 - current Pablo compiler; untested

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/bitblock256.hpp

    r3438 r3439  
    4646static IDISA_ALWAYS_INLINE carry_t bitblock2carry(bitblock256_t carry) {  return carry;}
    4747
    48 
    49 
     48static inline void add_ci_co(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum);
     49static inline void sub_bi_bo(bitblock256_t x, bitblock256_t y, carry_t borrow_in, carry_t & borrow_out, bitblock256_t & difference);
     50static IDISA_ALWAYS_INLINE void adv_ci_co(bitblock256_t cursor, bitblock256_t carry_in, bitblock256_t & carry_out, bitblock256_t & rslt);
     51
     52
     53static inline void add_ci_co(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum) {
     54bitblock256_t all_ones = simd256<1>::constant<1>();
     55bitblock256_t gen = simd_and(x, y);
     56bitblock256_t prop = simd_xor(x, y);
     57bitblock256_t partial_sum = simd256<64>::add(x, y);
     58bitblock256_t carry = simd_or(gen, simd_andc(prop, partial_sum));
     59bitblock256_t bubble = simd256<64>::eq(partial_sum, all_ones);
     60uint64_t carry_mask = hsimd256<64>::signmask(carry) * 2 + convert(carry_in);
     61uint64_t bubble_mask = hsimd256<64>::signmask(bubble);
     62uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask;
     63uint64_t increments = carry_scan_thru_bubbles | (carry_scan_thru_bubbles - carry_mask);
     64carry_out = convert(increments >> 4);
     65uint64_t spread = 0x0000200040008001 * increments & 0x0001000100010001;
     66sum = simd256<64>::add(partial_sum, _mm256_cvtepu8_epi64(avx_select_lo128(convert(spread))));
     67}
     68
     69IDISA_ALWAYS_INLINE void sub_bi_bo(bitblock256_t x, bitblock256_t y, carry_t borrow_in, carry_t & borrow_out, bitblock256_t & difference){
     70        bitblock256_t gen = simd_andc(y, x);
     71        bitblock256_t prop = simd_not(simd_xor(x, y));
     72        bitblock256_t partial = simd256<128>::sub(simd256<128>::sub(x, y), borrow_in);
     73        bitblock256_t b1 = simd256<256>::slli<128>(simd256<128>::srli<127>(simd_or(gen, simd_and(prop, partial))));
     74        difference = simd256<128>::sub(partial, b1);
     75        borrow_out = simd_or(gen, simd_and(prop, difference));
     76
     77}
     78
     79static IDISA_ALWAYS_INLINE void adv_ci_co(bitblock256_t cursor, bitblock256_t carry_in, bitblock256_t & carry_out, bitblock256_t & rslt){
     80        bitblock256_t shift_out = simd256<64>::srli<63>(cursor);
     81        bitblock256_t low_bits = simd_or(mvmd256<64>::slli<1>(shift_out), carry_in);
     82        carry_out = cursor;
     83        rslt = simd_or(simd256<64>::add(cursor, cursor), low_bits);
     84}
     85
     86
     87
     88#ifdef AVX
    5089#define avx_select_lo128(x) \
    5190        ((__m128i) _mm256_castps256_ps128(x))
     
    196235  carry_out = _mm256_castps128_ps256((__m128)carry2);
    197236}
     237#endif
    198238
    199239IDISA_ALWAYS_INLINE bitblock256_t convert(uint64_t s)
Note: See TracChangeset for help on using the changeset viewer.