Ignore:
Timestamp:
Oct 27, 2017, 1:21:46 PM (20 months ago)
Author:
cameron
Message:

Moving indexed advance operation into IDISA builder

Location:
icGREP/icgrep-devel/icgrep/pablo
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r5712 r5713  
    618618Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) {
    619619    const auto shiftAmount = advance->getAmount();
    620     Value * popcount_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::ctpop, b->getSizeTy());
    621     Value * PEXT_f = nullptr;
    622     Value * PDEP_f = nullptr;
    623     unsigned bitWidth = sizeof(size_t) * 8;
    624     if (bitWidth == 64) {
    625         PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64);
    626         PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64);
    627     }
    628     else if ((bitWidth == 32)  && (shiftAmount < 32)) {
    629         PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_32);
    630         PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32);
    631     }
    632     else {
    633         llvm::report_fatal_error("indexed_advance unsupported bit width");
    634     }
    635     if (LLVM_LIKELY(shiftAmount < bitWidth)) {
     620    if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) {
    636621        Value * const carryIn = getNextCarryIn(b);
    637         Value * shiftVal = b->getSize(shiftAmount);
    638         Value * carry = b->mvmd_extract(bitWidth, carryIn, 0);
    639         Value * result = b->allZeroes();
    640         for (unsigned i = 0; i < b->getBitBlockWidth()/bitWidth; i++) {
    641             Value * s = b->mvmd_extract(bitWidth, strm, i);
    642             Value * ix = b->mvmd_extract(bitWidth, index_strm, i);
    643             Value * ix_popcnt = b->CreateCall(popcount_f, {ix});
    644             Value * bits = b->CreateCall(PEXT_f, {s, ix});
    645             Value * adv = b->CreateOr(b->CreateShl(bits, shiftAmount), carry);
    646             Value * popcount_small = b->CreateICmpULT(ix_popcnt, shiftVal);
    647             Value * carry_if_popcount_small =
    648                 b->CreateOr(b->CreateShl(bits, b->CreateSub(shiftVal, ix_popcnt)),
    649                             b->CreateLShr(carry, ix_popcnt));
    650             Value * carry_if_popcount_large = b->CreateLShr(bits, b->CreateSub(ix_popcnt, shiftVal));
    651             carry = b->CreateSelect(popcount_small, carry_if_popcount_small, carry_if_popcount_large);
    652             result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {adv, ix}), i);
    653         }
    654         Value * carryOut = b->mvmd_insert(bitWidth, b->allZeroes(), carry, 0);
     622        Value * carryOut, * result;
     623        std::tie(carryOut, result) = b->bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount);
    655624        setNextCarryOut(b, carryOut);
    656625        return result;
    657626    } else if (shiftAmount <= b->getBitBlockWidth()) {
    658         // A single bitblock still holds all the shifted bits.   In this case, we know
    659         // that the shift amount is always greater than the popcount of the individual
    660         // elements that we deal with.   This simplifies some of the logic.
    661         Type * iBitBlock = b->getIntNTy(b->getBitBlockWidth());
    662         Value * carryInPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex++), b->getInt32(0)});
    663         Value * carryIn = b->CreateBlockAlignedLoad(carryInPtr);
    664         Value * shiftVal = b->getSize(shiftAmount);
    665         Value * carry = b->CreateBitCast(carryIn, iBitBlock);
    666         Value * result = b->allZeroes();
    667         for (unsigned i = 0; i < b->getBitBlockWidth()/bitWidth; i++) {
    668             Value * s = b->mvmd_extract(bitWidth, strm, i);
    669             Value * ix = b->mvmd_extract(bitWidth, index_strm, i);
    670             Value * ix_popcnt = b->CreateCall(popcount_f, {ix});
    671             Value * bits = b->CreateCall(PEXT_f, {s, ix});  // All these bits are shifted out (appended to carry).
    672             result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {b->mvmd_extract(bitWidth, carry, 0), ix}), i);
    673             carry = b->CreateLShr(carry, b->CreateZExt(ix_popcnt, iBitBlock)); // Remove the carry bits consumed, make room for new bits.
    674             carry = b->CreateOr(carry, b->CreateShl(b->CreateZExt(bits, iBitBlock), b->CreateZExt(b->CreateSub(shiftVal, ix_popcnt), iBitBlock)));
    675         }
    676         b->CreateBlockAlignedStore(b->CreateBitCast(carry, b->getBitBlockType()), carryInPtr);
     627        Value * carryPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex++), b->getInt32(0)});
     628        Value * carryIn = b->CreateBlockAlignedLoad(carryPtr);
     629        Value * carryOut, * result;
     630        std::tie(carryOut, result) = b->bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount);
     631        b->CreateBlockAlignedStore(carryOut, carryPtr);
    677632        if ((mIfDepth > 0) && mCarryInfo->hasExplicitSummary()) {
    678633            addToCarryOutSummary(b, strm);
  • icGREP/icgrep-devel/icgrep/pablo/carrypack_manager.cpp

    r5712 r5713  
    631631Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) {
    632632    const auto shiftAmount = advance->getAmount();
    633     Value * popcount_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::ctpop, b->getSizeTy());
    634     Value * PEXT_f = nullptr;
    635     Value * PDEP_f = nullptr;
    636     unsigned bitWidth = sizeof(size_t) * 8;
    637     if (bitWidth == 64) {
    638         PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64);
    639         PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64);
    640     }
    641     else if ((bitWidth == 32)  && (shiftAmount < 32)) {
    642         PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_32);
    643         PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32);
    644     }
    645     else {
    646         llvm::report_fatal_error("indexed_advance unsupported bit width");
    647     }
    648     if (LLVM_LIKELY(shiftAmount < bitWidth)) {
     633    if (LLVM_LIKELY(shiftAmount < mElementWidth)) {
    649634        Value * const carryIn = getNextCarryIn(b);
    650         Value * shiftVal = b->getSize(shiftAmount);
    651         Value * carry = b->mvmd_extract(bitWidth, carryIn, 0);
    652         Value * result = b->allZeroes();
    653         for (unsigned i = 0; i < b->getBitBlockWidth()/bitWidth; i++) {
    654             Value * s = b->mvmd_extract(bitWidth, strm, i);
    655             Value * ix = b->mvmd_extract(bitWidth, index_strm, i);
    656             Value * ix_popcnt = b->CreateCall(popcount_f, {ix});
    657             Value * bits = b->CreateCall(PEXT_f, {s, ix});
    658             Value * adv = b->CreateOr(b->CreateShl(bits, shiftAmount), carry);
    659             Value * popcount_small = b->CreateICmpULT(ix_popcnt, shiftVal);
    660             Value * carry_if_popcount_small =
    661             b->CreateOr(b->CreateShl(bits, b->CreateSub(shiftVal, ix_popcnt)),
    662                         b->CreateLShr(carry, ix_popcnt));
    663             Value * carry_if_popcount_large = b->CreateLShr(bits, b->CreateSub(ix_popcnt, shiftVal));
    664             carry = b->CreateSelect(popcount_small, carry_if_popcount_small, carry_if_popcount_large);
    665             result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {adv, ix}), i);
    666         }
    667         Value * carryOut = b->mvmd_insert(bitWidth, b->allZeroes(), carry, 0);
     635        Value * carryOut, * result;
     636        std::tie(carryOut, result) = b->bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount);
    668637        setNextCarryOut(b, carryOut);
    669638        return result;
    670639    } else if (shiftAmount <= b->getBitBlockWidth()) {
    671         // A single bitblock still holds all the shifted bits.   In this case, we know
    672         // that the shift amount is always greater than the popcount of the individual
    673         // elements that we deal with.   This simplifies some of the logic.
    674         Type * iBitBlock = b->getIntNTy(b->getBitBlockWidth());
    675         Value * carryInPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex++), b->getInt32(0)});
    676         Value * carryIn = b->CreateBlockAlignedLoad(carryInPtr);
    677         Value * shiftVal = b->getSize(shiftAmount);
    678         Value * carry = b->CreateBitCast(carryIn, iBitBlock);
    679         Value * result = b->allZeroes();
    680         for (unsigned i = 0; i < b->getBitBlockWidth()/bitWidth; i++) {
    681             Value * s = b->mvmd_extract(bitWidth, strm, i);
    682             Value * ix = b->mvmd_extract(bitWidth, index_strm, i);
    683             Value * ix_popcnt = b->CreateCall(popcount_f, {ix});
    684             Value * bits = b->CreateCall(PEXT_f, {s, ix});  // All these bits are shifted out (appended to carry).
    685             result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {b->mvmd_extract(bitWidth, carry, 0), ix}), i);
    686             carry = b->CreateLShr(carry, b->CreateZExt(ix_popcnt, iBitBlock)); // Remove the carry bits consumed, make room for new bits.
    687             carry = b->CreateOr(carry, b->CreateShl(b->CreateZExt(bits, iBitBlock), b->CreateZExt(b->CreateSub(shiftVal, ix_popcnt), iBitBlock)));
    688         }
    689         b->CreateBlockAlignedStore(b->CreateBitCast(carry, b->getBitBlockType()), carryInPtr);
     640        Value * carryPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex++), b->getInt32(0)});
     641        Value * carryIn = b->CreateBlockAlignedLoad(carryPtr);
     642        Value * carryOut, * result;
     643        std::tie(carryOut, result) = b->bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount);
     644        b->CreateBlockAlignedStore(carryOut, carryPtr);
    690645        if ((mIfDepth > 0) && mCarryInfo->hasExplicitSummary()) {
    691646            addToCarryOutSummary(b, strm);
     
    697652    }
    698653}
    699 
    700654
    701655
Note: See TracChangeset for help on using the changeset viewer.