Ignore:
Timestamp:
Oct 27, 2017, 11:17:29 AM (19 months ago)
Author:
cameron
Message:

Fixes for indexed advance

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pablo/carrypack_manager.cpp

    r5710 r5712  
    626626
    627627
     628/** ------------------------------------------------------------------------------------------------------------- *
     629 * @brief indexedAdvanceCarryInCarryOut
     630 ** ------------------------------------------------------------------------------------------------------------- */
    628631Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) {
    629632    const auto shiftAmount = advance->getAmount();
    630     if (LLVM_LIKELY(shiftAmount < mElementWidth)) {
     633    Value * popcount_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::ctpop, b->getSizeTy());
     634    Value * PEXT_f = nullptr;
     635    Value * PDEP_f = nullptr;
     636    unsigned bitWidth = sizeof(size_t) * 8;
     637    if (bitWidth == 64) {
     638        PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64);
     639        PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64);
     640    }
     641    else if ((bitWidth == 32)  && (shiftAmount < 32)) {
     642        PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_32);
     643        PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32);
     644    }
     645    else {
     646        llvm::report_fatal_error("indexed_advance unsupported bit width");
     647    }
     648    if (LLVM_LIKELY(shiftAmount < bitWidth)) {
    631649        Value * const carryIn = getNextCarryIn(b);
    632         unsigned bitWidth = sizeof(size_t) * 8;
    633         Value * popcount_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::ctpop, b->getSizeTy());
    634         Value * PEXT_f = nullptr;
    635         Value * PDEP_f = nullptr;
    636         if (bitWidth == 64) {
    637             PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64);
    638             PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64);
    639         }
    640         else if ((bitWidth == 32)  && (shiftAmount < 32)) {
    641             PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_32);
    642             PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32);
    643         }
    644         else {
    645             llvm::report_fatal_error("indexed_advance unsupported bit width");
    646         }
    647650        Value * shiftVal = b->getSize(shiftAmount);
    648651        Value * carry = b->mvmd_extract(bitWidth, carryIn, 0);
     
    656659            Value * popcount_small = b->CreateICmpULT(ix_popcnt, shiftVal);
    657660            Value * carry_if_popcount_small =
    658                 b->CreateOr(b->CreateShl(bits, b->CreateSub(shiftVal, ix_popcnt)),
    659                             b->CreateLShr(carry, ix_popcnt));
     661            b->CreateOr(b->CreateShl(bits, b->CreateSub(shiftVal, ix_popcnt)),
     662                        b->CreateLShr(carry, ix_popcnt));
    660663            Value * carry_if_popcount_large = b->CreateLShr(bits, b->CreateSub(ix_popcnt, shiftVal));
    661664            carry = b->CreateSelect(popcount_small, carry_if_popcount_small, carry_if_popcount_large);
     
    665668        setNextCarryOut(b, carryOut);
    666669        return result;
     670    } else if (shiftAmount <= b->getBitBlockWidth()) {
     671        // A single bitblock still holds all the shifted bits.   In this case, we know
     672        // that the shift amount is always greater than the popcount of the individual
     673        // elements that we deal with.   This simplifies some of the logic.
     674        Type * iBitBlock = b->getIntNTy(b->getBitBlockWidth());
     675        Value * carryInPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex++), b->getInt32(0)});
     676        Value * carryIn = b->CreateBlockAlignedLoad(carryInPtr);
     677        Value * shiftVal = b->getSize(shiftAmount);
     678        Value * carry = b->CreateBitCast(carryIn, iBitBlock);
     679        Value * result = b->allZeroes();
     680        for (unsigned i = 0; i < b->getBitBlockWidth()/bitWidth; i++) {
     681            Value * s = b->mvmd_extract(bitWidth, strm, i);
     682            Value * ix = b->mvmd_extract(bitWidth, index_strm, i);
     683            Value * ix_popcnt = b->CreateCall(popcount_f, {ix});
     684            Value * bits = b->CreateCall(PEXT_f, {s, ix});  // All these bits are shifted out (appended to carry).
     685            result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {b->mvmd_extract(bitWidth, carry, 0), ix}), i);
     686            carry = b->CreateLShr(carry, b->CreateZExt(ix_popcnt, iBitBlock)); // Remove the carry bits consumed, make room for new bits.
     687            carry = b->CreateOr(carry, b->CreateShl(b->CreateZExt(bits, iBitBlock), b->CreateZExt(b->CreateSub(shiftVal, ix_popcnt), iBitBlock)));
     688        }
     689        b->CreateBlockAlignedStore(b->CreateBitCast(carry, b->getBitBlockType()), carryInPtr);
     690        if ((mIfDepth > 0) && mCarryInfo->hasExplicitSummary()) {
     691            addToCarryOutSummary(b, strm);
     692        }
     693        return result;
    667694    } else {
    668         llvm::report_fatal_error("IndexedAdvance > mElementWidth not yet supported.");
     695        mIndexedLongAdvanceIndex++;
     696        llvm::report_fatal_error("IndexedAdvance > BlockSize not yet supported.");
    669697    }
    670698}
Note: See TracChangeset for help on using the changeset viewer.