Changeset 3438 for trunk


Ignore:
Timestamp:
Sep 4, 2013, 3:40:38 PM (6 years ago)
Author:
lindanl
Message:

Long stream addition for AVX2 (not tested)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/bitblock256.hpp

    r2719 r3438  
    4040
    4141static IDISA_ALWAYS_INLINE bitblock256_t convert (uint64_t s);
     42static IDISA_ALWAYS_INLINE bitblock128_t convert_128 (uint64_t s);
    4243static IDISA_ALWAYS_INLINE uint64_t convert (bitblock256_t v);
    4344
     
    8990
    9091#ifdef ADCMAGIC
     92
     93#ifdef AVX2
    9194static inline void adc(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum) {
    92         bitblock128_t all_ones = simd128<1>::constant<1>();
    93         //bitblock256_t gen = simd_and(x, y);
    94         //bitblock256_t prop = simd_xor(x, y);
    95         bitblock128_t x0 = avx_select_lo128(x);
    96         bitblock128_t x1 = avx_select_hi128(x);
    97         bitblock128_t y0 = avx_select_lo128(y);
    98         bitblock128_t y1 = avx_select_hi128(y);
     95        bitblock256_t all_ones = simd256<1>::constant<1>();
     96        bitblock256_t gen = simd_and(x, y);
     97        bitblock256_t prop = simd_xor(x, y);
     98  bitblock256_t partial_sum = simd256<64>::add(x, y);
     99        bitblock256_t carry = simd_or(gen, simd_andc(prop, partial_sum));
     100  bitblock256_t bubble = simd256<64>::eq(partial_sum, all_ones);
     101  uint64_t carry_mask = hsimd256<64>::signmask(carry) * 2 + convert(carry_in);
     102  uint64_t bubble_mask = hsimd256<64>::signmask(bubble);
     103        uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask;
     104  uint64_t increments = carry_scan_thru_bubbles | (carry_scan_thru_bubbles - carry_mask);
     105  carry_out = convert(increments >> 4);
     106  uint64_t spread = 0x0000200040008001 * increments & 0x0001000100010001;
     107  sum = simd256<64>::add(partial_sum, _mm256_cvtepu8_epi64(convert_128(spread)));
     108}
     109#else
     110static inline void adc(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum) {
     111  bitblock128_t all_ones = simd128<1>::constant<1>();
     112  //bitblock256_t gen = simd_and(x, y);
     113  //bitblock256_t prop = simd_xor(x, y);
     114  bitblock128_t x0 = avx_select_lo128(x);
     115  bitblock128_t x1 = avx_select_hi128(x);
     116  bitblock128_t y0 = avx_select_lo128(y);
     117  bitblock128_t y1 = avx_select_hi128(y);
    99118        bitblock128_t sum0 = simd128<64>::add(x0, y0);
    100119        bitblock128_t sum1 = simd128<64>::add(x1, y1);
    101         //bitblock256_t icarry = simd_or(gen, simd_andc(prop, avx_general_combine256(sum1, sum0)));
     120  //bitblock256_t icarry = simd_or(gen, simd_andc(prop, avx_general_combine256(sum1, sum0)));
    102121        bitblock128_t icarry0 = simd_or(simd_and(x0, y0), simd_andc(simd_or(x0, y0), sum0));
    103122        bitblock128_t icarry1 = simd_or(simd_and(x1, y1), simd_andc(simd_or(x1, y1), sum1));
     
    111130        uint64_t bubble_mask = _mm256_movemask_pd((__m256d) bubble);
    112131        //uint64_t bubble_mask = hsimd128<64>::signmask(bubble1) * 4 + hsimd128<64>::signmask(bubble0);
    113         //uint64_t bubble_mask = hsimd128<32>::signmask(bubble);
    114         uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask;
     132  //uint64_t bubble_mask = hsimd128<32>::signmask(bubble);
     133  uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask;
    115134        uint64_t increments = carry_scan_thru_bubbles | (carry_scan_thru_bubbles - carry_mask);
    116135        carry_out = convert(increments >> 4);
     
    121140        sum = avx_general_combine256(simd128<64>::add(sum1, inc_64_1), simd128<64>::add(sum0, inc_64_0));
    122141}
     142#endif
     143
    123144#endif
    124145
     
    178199IDISA_ALWAYS_INLINE bitblock256_t convert(uint64_t s)
    179200{
    180         ubitblock b = {b._256 = simd256<128>::constant<0>()}; // = {0};
    181         b._64[0] = s;
    182         return b._256;
     201  ubitblock b = {b._256 = simd256<128>::constant<0>()}; // = {0};
     202  b._64[0] = s;
     203  return b._256;
     204}
     205
     206IDISA_ALWAYS_INLINE bitblock128_t convert_128(uint64_t s)
     207{
     208  ubitblock b = {b._256 = simd256<128>::constant<0>()}; // = {0};
     209  b._64[0] = s;
     210  return b._128[0];
    183211}
    184212
    185213IDISA_ALWAYS_INLINE uint64_t convert (bitblock256_t v)
    186214{
    187         return (uint64_t) mvmd256<64>::extract<0>(v);
     215  return (uint64_t) mvmd256<64>::extract<0>(v);
    188216}
    189217
Note: See TracChangeset for help on using the changeset viewer.