Changeset 971


Ignore:
Timestamp:
Mar 22, 2011, 4:52:06 PM (8 years ago)
Author:
cameron
Message:

Fix for ADCMAGIC version of adc256

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/block_carry_avx.h

    r965 r971  
    378378        __m128i sum1 = sse_add_64(x1, y1);
    379379        BitBlock icarry = simd_or(gen, simd_andc(prop, simd_combine256(sum1, sum0)));
    380         __m128i max0 = sse_eq_64(sum0, sse_const_1(1));
    381         __m128i max1 = sse_eq_64(sum1, sse_const_1(1));
    382         BitBlock max = simd_combine256(max1, max0);
     380        // A carry may bubble through a field if it is all ones.
     381        __m128i bubble0 = sse_eq_64(sum0, sse_const_1(1));
     382        __m128i bubble1 = sse_eq_64(sum1, sse_const_1(1));
     383        BitBlock bubble = simd_combine256(max1, max0);
    383384        uint64_t carry_mask = _mm256_movemask_pd((__m256d) icarry) * 2 + carry;
    384         uint64_t max_mask = _mm256_movemask_pd((__m256d) max);
    385         uint64_t increments = max_mask + carry_mask;
     385        uint64_t bubble_mask = _mm256_movemask_pd((__m256d) bubble);
     386        uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask;
     387        uint64_t increments = carry_scan_thru_bubbles | (carry_scan_thru_bubbles - carry_mask);
    386388        carry = increments >> 4;
    387389        uint64_t spread = 0x0000200040008001 * increments & 0x0001000100010001;
Note: See TracChangeset for help on using the changeset viewer.