Ignore:
Timestamp:
May 30, 2012, 5:41:02 PM (7 years ago)
Author:
cameron
Message:

ADCMAGIC version for 256 bit adc

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/bitblock256.hpp

    r2196 r2199  
    6767}
    6868
     69#ifndef ADCMAGIC
    6970IDISA_ALWAYS_INLINE void adc(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum)
    7071{
     
    8687  carry_out = _mm256_castps128_ps256((__m128) c2);
    8788}
     89#endif
     90
     91#ifdef ADCMAGIC
     92static inline void adc(bitblock256_t x, bitblock256_t y, carry_t carry_in, carry_t & carry_out, bitblock256_t & sum) {
     93        bitblock128_t all_ones = simd128<1>::constant<1>();
     94        //bitblock256_t gen = simd_and(x, y);
     95        //bitblock256_t prop = simd_xor(x, y);
     96        bitblock128_t x0 = avx_select_lo128(x);
     97        bitblock128_t x1 = avx_select_hi128(x);
     98        bitblock128_t y0 = avx_select_lo128(y);
     99        bitblock128_t y1 = avx_select_hi128(y);
     100        bitblock128_t sum0 = simd128<64>::add(x0, y0);
     101        bitblock128_t sum1 = simd128<64>::add(x1, y1);
     102        //bitblock256_t icarry = simd_or(gen, simd_andc(prop, avx_general_combine256(sum1, sum0)));
     103        bitblock128_t icarry0 = simd_or(simd_and(x0, y0), simd_andc(simd_or(x0, y0), sum0));
     104        bitblock128_t icarry1 = simd_or(simd_and(x1, y1), simd_andc(simd_or(x1, y1), sum1));
     105        // A carry may bubble through a field if it is all ones.
     106        bitblock128_t bubble0 = simd128<64>::eq(sum0, all_ones);
     107        bitblock128_t bubble1 = simd128<64>::eq(sum1, all_ones);
     108        //bitblock256_t bubble = avx_general_combine256(bubble1, bubble0);
     109        //uint64_t carry_mask = _mm256_movemask_pd((__m256d) icarry) * 2 + convert(carry_in);
     110        uint64_t carry_mask = hsimd128<64>::signmask(icarry1) * 8 + hsimd128<64>::signmask(icarry0) * 2 + convert(carry_in);
     111        //uint64_t bubble_mask = _mm256_movemask_pd((__m256d) bubble);
     112        uint64_t bubble_mask = hsimd128<64>::signmask(bubble1) * 4 + hsimd128<64>::signmask(bubble0);
     113        uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask;
     114        uint64_t increments = carry_scan_thru_bubbles | (carry_scan_thru_bubbles - carry_mask);
     115        carry_out = convert(increments >> 4);
     116        uint64_t spread = 0x0000200040008001 * increments & 0x0001000100010001;
     117        bitblock128_t inc_32 = _mm_cvtepu16_epi32(_mm_cvtsi64_si128(spread));
     118        bitblock128_t inc_64_0 = esimd128<32>::mergel(simd128<1>::constant<0>(), inc_32);
     119        bitblock128_t inc_64_1 = esimd128<32>::mergeh(simd128<1>::constant<0>(), inc_32);
     120        sum = avx_general_combine256(simd128<64>::add(sum1, inc_64_1), simd128<64>::add(sum0, inc_64_0));
     121}
     122#endif
     123
     124
     125
     126
    88127
    89128IDISA_ALWAYS_INLINE void sbb128(bitblock128_t x, bitblock128_t y, bitblock128_t borrow_in, bitblock128_t & borrow_out, bitblock128_t & difference)
Note: See TracChangeset for help on using the changeset viewer.