Ignore:
Timestamp:
Sep 7, 2013, 3:05:51 PM (6 years ago)
Author:
linmengl
Message:

make avx2 can run now

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/bitblock256.hpp

    r3439 r3441  
    1818union ubitblock {
    1919        bitblock256_t _256;
    20         bitblock256_t _128[sizeof(bitblock256_t)/sizeof(bitblock256_t)];
     20        bitblock128_t _128[sizeof(bitblock256_t)/sizeof(bitblock256_t)];
    2121        uint64_t _64[sizeof(bitblock256_t)/sizeof(uint64_t)];
    2222        uint32_t _32[sizeof(bitblock256_t)/sizeof(uint32_t)];
     
    2828typedef bitblock256_t carry_t;
    2929
     30static IDISA_ALWAYS_INLINE void add_ci_co(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum);
     31static IDISA_ALWAYS_INLINE void sub_bi_bo(bitblock256_t x, bitblock256_t y, carry_t borrow_in, carry_t & borrow_out, bitblock256_t & difference);
     32static IDISA_ALWAYS_INLINE void adv_ci_co(bitblock256_t cursor, bitblock256_t carry_in, bitblock256_t & carry_out, bitblock256_t & rslt);
     33
     34
     35
     36
    3037static IDISA_ALWAYS_INLINE bitblock256_t carry2bitblock(carry_t carry);
    3138static IDISA_ALWAYS_INLINE carry_t bitblock2carry(bitblock256_t carry);
     
    4653static IDISA_ALWAYS_INLINE carry_t bitblock2carry(bitblock256_t carry) {  return carry;}
    4754
    48 static inline void add_ci_co(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum);
    49 static inline void sub_bi_bo(bitblock256_t x, bitblock256_t y, carry_t borrow_in, carry_t & borrow_out, bitblock256_t & difference);
    50 static IDISA_ALWAYS_INLINE void adv_ci_co(bitblock256_t cursor, bitblock256_t carry_in, bitblock256_t & carry_out, bitblock256_t & rslt);
    51 
    52 
    5355static inline void add_ci_co(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum) {
    54 bitblock256_t all_ones = simd256<1>::constant<1>();
    55 bitblock256_t gen = simd_and(x, y);
    56 bitblock256_t prop = simd_xor(x, y);
    57 bitblock256_t partial_sum = simd256<64>::add(x, y);
    58 bitblock256_t carry = simd_or(gen, simd_andc(prop, partial_sum));
    59 bitblock256_t bubble = simd256<64>::eq(partial_sum, all_ones);
    60 uint64_t carry_mask = hsimd256<64>::signmask(carry) * 2 + convert(carry_in);
    61 uint64_t bubble_mask = hsimd256<64>::signmask(bubble);
    62 uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask;
    63 uint64_t increments = carry_scan_thru_bubbles | (carry_scan_thru_bubbles - carry_mask);
    64 carry_out = convert(increments >> 4);
    65 uint64_t spread = 0x0000200040008001 * increments & 0x0001000100010001;
    66 sum = simd256<64>::add(partial_sum, _mm256_cvtepu8_epi64(avx_select_lo128(convert(spread))));
     56  bitblock256_t all_ones = simd256<1>::constant<1>();
     57  bitblock256_t gen = simd_and(x, y);
     58  bitblock256_t prop = simd_xor(x, y);
     59  bitblock256_t partial_sum = simd256<64>::add(x, y);
     60  bitblock256_t carry = simd_or(gen, simd_andc(prop, partial_sum));
     61  bitblock256_t bubble = simd256<64>::eq(partial_sum, all_ones);
     62  uint64_t carry_mask = hsimd256<64>::signmask(carry) * 2 + convert(carry_in);
     63  uint64_t bubble_mask = hsimd256<64>::signmask(bubble);
     64  uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask;
     65  uint64_t increments = carry_scan_thru_bubbles | (carry_scan_thru_bubbles - carry_mask);
     66  carry_out = convert(increments >> 4);
     67  uint64_t spread = 0x0000200040008001 * increments & 0x0001000100010001;
     68  sum = simd256<64>::add(partial_sum, _mm256_cvtepu8_epi64(avx_select_lo128(convert(spread))));
    6769}
    6870
     
    7476        difference = simd256<128>::sub(partial, b1);
    7577        borrow_out = simd_or(gen, simd_and(prop, difference));
    76 
    7778}
    7879
     
    8384        rslt = simd_or(simd256<64>::add(cursor, cursor), low_bits);
    8485}
    85 
    8686
    8787
     
    251251}
    252252
    253 IDISA_ALWAYS_INLINE uint64_t convert (bitblock256_t v)
     253IDISA_ALWAYS_INLINE uint64_t convert(bitblock256_t v)
    254254{
    255255  return (uint64_t) mvmd256<64>::extract<0>(v);
Note: See TracChangeset for help on using the changeset viewer.