Changeset 3447 for trunk


Ignore:
Timestamp:
Sep 8, 2013, 12:58:28 PM (5 years ago)
Author:
cameron
Message:

Fixes to leave carry-out variables in carry-in form.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/bitblock256.hpp

    r3446 r3447  
    7070
    7171static IDISA_ALWAYS_INLINE void sub_bi_bo(bitblock256_t x, bitblock256_t y, carry_t borrow_in, carry_t & borrow_out, bitblock256_t & difference){
    72         bitblock256_t gen = simd_andc(y, x);
    73         bitblock256_t prop = simd_not(simd_xor(x, y));
    74         bitblock256_t partial = simd256<128>::sub(simd256<128>::sub(x, y), borrow_in);
    75         bitblock256_t b1 = simd256<256>::slli<128>(simd256<128>::srli<127>(simd_or(gen, simd_and(prop, partial))));
    76         difference = simd256<128>::sub(partial, b1);
    77         borrow_out = simd_or(gen, simd_and(prop, difference));
     72  bitblock256_t gen = simd_andc(y, x);
     73  bitblock256_t prop = simd_not(simd_xor(x, y));
     74  bitblock256_t partial_diff = simd256<64>::sub(x, y);
     75  bitblock256_t borrow = simd_or(gen, simd_and(prop, partial_diff));
     76  bitblock256_t bubble = simd256<64>::eq(partial_diff, simd<1>::constant<0>());
     77  uint64_t borrow_mask = hsimd256<64>::signmask(borrow) * 2 + convert(borrow_in);
     78  uint64_t bubble_mask = hsimd256<64>::signmask(bubble);
     79  uint64_t borrow_scan_thru_bubbles = (borrow_mask + bubble_mask) &~ bubble_mask;
     80  uint64_t decrements = borrow_scan_thru_bubbles | (borrow_scan_thru_bubbles - borrow_mask);
     81  borrow_out = convert(decrements >> 4);
     82  uint64_t spread = 0x0000200040008001 * decrements & 0x0001000100010001;
     83  difference = simd256<64>::sub(partial_diff, _mm256_cvtepu16_epi64(avx_select_lo128(convert(spread))));
    7884}
    7985
     
    8187        bitblock256_t shift_out = simd256<64>::srli<63>(cursor);
    8288        bitblock256_t low_bits = simd_or(mvmd256<64>::slli<1>(shift_out), carry_in);
    83         carry_out = cursor;
     89        carry_out = mvmd256<64>::srli<3>(shift_out);
    8490        rslt = simd_or(simd256<64>::add(cursor, cursor), low_bits);
    8591}
Note: See TracChangeset for help on using the changeset viewer.