Changeset 5713


Ignore:
Timestamp:
Oct 27, 2017, 1:21:46 PM (16 months ago)
Author:
cameron
Message:

Moving indexed advance operation into IDISA builder

Location:
icGREP/icgrep-devel/icgrep
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r5464 r5713  
    160160}
    161161   
    162 }
     162std::pair<Value *, Value *> IDISA_AVX2_Builder::bitblock_indexed_advance(Value * strm, Value * index_strm, Value * shiftIn, unsigned shiftAmount) {
     163    Value * popcount_f = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, getSizeTy());
     164    Value * PEXT_f = nullptr;
     165    Value * PDEP_f = nullptr;
     166    unsigned bitWidth = sizeof(size_t) * 8;
     167    if (bitWidth == 64) {
     168        PEXT_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pext_64);
     169        PDEP_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pdep_64);
     170    }
     171    else if ((bitWidth == 32)  && (shiftAmount < 32)) {
     172        PEXT_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pext_32);
     173        PDEP_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pdep_32);
     174    }
     175    else {
     176        llvm::report_fatal_error("indexed_advance unsupported bit width");
     177    }
     178    Type * iBitBlock = getIntNTy(getBitBlockWidth());
     179    Value * shiftVal = getSize(shiftAmount);
     180    if (LLVM_LIKELY(shiftAmount < bitWidth)) {
     181        Value * carry = mvmd_extract(bitWidth, shiftIn, 0);
     182        Value * result = allZeroes();
     183        for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     184            Value * s = mvmd_extract(bitWidth, strm, i);
     185            Value * ix = mvmd_extract(bitWidth, index_strm, i);
     186            Value * ix_popcnt = CreateCall(popcount_f, {ix});
     187            Value * bits = CreateCall(PEXT_f, {s, ix});
     188            Value * adv = CreateOr(CreateShl(bits, shiftAmount), carry);
     189            // We have two cases depending on whether the popcount of the index pack is < shiftAmount or not.
     190            Value * popcount_small = CreateICmpULT(ix_popcnt, shiftVal);
     191            Value * carry_if_popcount_small =
     192                CreateOr(CreateShl(bits, CreateSub(shiftVal, ix_popcnt)),
     193                            CreateLShr(carry, ix_popcnt));
     194            Value * carry_if_popcount_large = CreateLShr(bits, CreateSub(ix_popcnt, shiftVal));
     195            carry = CreateSelect(popcount_small, carry_if_popcount_small, carry_if_popcount_large);
     196            result = mvmd_insert(bitWidth, result, CreateCall(PDEP_f, {adv, ix}), i);
     197        }
     198        Value * carryOut = mvmd_insert(bitWidth, allZeroes(), carry, 0);
     199        return std::pair<Value *, Value *>{bitCast(carryOut), bitCast(result)};
     200    }
     201    else {
     202        // The shift amount is always greater than the popcount of the individual
     203        // elements that we deal with.   This simplifies some of the logic.
     204       Value * carry = CreateBitCast(shiftIn, iBitBlock);
     205       Value * result = allZeroes();
     206        for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     207            Value * s = mvmd_extract(bitWidth, strm, i);
     208            Value * ix = mvmd_extract(bitWidth, index_strm, i);
     209            Value * ix_popcnt = CreateCall(popcount_f, {ix});
     210            Value * bits = CreateCall(PEXT_f, {s, ix});  // All these bits are shifted out (appended to carry).
     211            result = mvmd_insert(bitWidth, result, CreateCall(PDEP_f, {mvmd_extract(bitWidth, carry, 0), ix}), i);
     212            carry = CreateLShr(carry, CreateZExt(ix_popcnt, iBitBlock)); // Remove the carry bits consumed, make room for new bits.
     213            carry = CreateOr(carry, CreateShl(CreateZExt(bits, iBitBlock), CreateZExt(CreateSub(shiftVal, ix_popcnt), iBitBlock)));
     214        }
     215        return std::pair<Value *, Value *>{bitCast(carry), bitCast(result)};
     216    }
     217}
     218
     219}
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.h

    r5489 r5713  
    4646    llvm::Value * hsimd_packl_in_lanes(unsigned lanes, unsigned fw, llvm::Value * a, llvm::Value * b) override;
    4747    std::pair<llvm::Value *, llvm::Value *> bitblock_add_with_carry(llvm::Value * a, llvm::Value * b, llvm::Value * carryin) override;
     48    std::pair<llvm::Value *, llvm::Value *> bitblock_indexed_advance(llvm::Value * a, llvm::Value * index_strm, llvm::Value * shiftin, unsigned shift) override;
    4849
    4950    ~IDISA_AVX2_Builder() {}
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r5493 r5713  
    346346}
    347347
     348// full shift producing {shiftout, shifted}
     349std::pair<Value *, Value *> IDISA_Builder::bitblock_indexed_advance(Value * a, Value * index, Value * shiftin, unsigned shift) {
     350    llvm::report_fatal_error("bitblock_indexed_advance unimplemented for this architecture");
     351}
     352
    348353Value * IDISA_Builder::bitblock_mask_from(Value * pos) {
    349354    Type * bitBlockInt = getIntNTy(getBitBlockWidth());
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r5493 r5713  
    126126    // full shift producing {shiftout, shifted}
    127127    virtual std::pair<llvm::Value *, llvm::Value *> bitblock_advance(llvm::Value * a, llvm::Value * shiftin, unsigned shift);
     128    virtual std::pair<llvm::Value *, llvm::Value *> bitblock_indexed_advance(llvm::Value * a, llvm::Value * index_strm, llvm::Value * shiftin, unsigned shift);
    128129    virtual llvm::Value * bitblock_mask_from(llvm::Value * pos);
    129130    virtual llvm::Value * bitblock_set_bit(llvm::Value * pos);
     131   
    130132
    131133    virtual void CreateBaseFunctions() {}
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r5712 r5713  
    618618Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) {
    619619    const auto shiftAmount = advance->getAmount();
    620     Value * popcount_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::ctpop, b->getSizeTy());
    621     Value * PEXT_f = nullptr;
    622     Value * PDEP_f = nullptr;
    623     unsigned bitWidth = sizeof(size_t) * 8;
    624     if (bitWidth == 64) {
    625         PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64);
    626         PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64);
    627     }
    628     else if ((bitWidth == 32)  && (shiftAmount < 32)) {
    629         PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_32);
    630         PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32);
    631     }
    632     else {
    633         llvm::report_fatal_error("indexed_advance unsupported bit width");
    634     }
    635     if (LLVM_LIKELY(shiftAmount < bitWidth)) {
     620    if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) {
    636621        Value * const carryIn = getNextCarryIn(b);
    637         Value * shiftVal = b->getSize(shiftAmount);
    638         Value * carry = b->mvmd_extract(bitWidth, carryIn, 0);
    639         Value * result = b->allZeroes();
    640         for (unsigned i = 0; i < b->getBitBlockWidth()/bitWidth; i++) {
    641             Value * s = b->mvmd_extract(bitWidth, strm, i);
    642             Value * ix = b->mvmd_extract(bitWidth, index_strm, i);
    643             Value * ix_popcnt = b->CreateCall(popcount_f, {ix});
    644             Value * bits = b->CreateCall(PEXT_f, {s, ix});
    645             Value * adv = b->CreateOr(b->CreateShl(bits, shiftAmount), carry);
    646             Value * popcount_small = b->CreateICmpULT(ix_popcnt, shiftVal);
    647             Value * carry_if_popcount_small =
    648                 b->CreateOr(b->CreateShl(bits, b->CreateSub(shiftVal, ix_popcnt)),
    649                             b->CreateLShr(carry, ix_popcnt));
    650             Value * carry_if_popcount_large = b->CreateLShr(bits, b->CreateSub(ix_popcnt, shiftVal));
    651             carry = b->CreateSelect(popcount_small, carry_if_popcount_small, carry_if_popcount_large);
    652             result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {adv, ix}), i);
    653         }
    654         Value * carryOut = b->mvmd_insert(bitWidth, b->allZeroes(), carry, 0);
     622        Value * carryOut, * result;
     623        std::tie(carryOut, result) = b->bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount);
    655624        setNextCarryOut(b, carryOut);
    656625        return result;
    657626    } else if (shiftAmount <= b->getBitBlockWidth()) {
    658         // A single bitblock still holds all the shifted bits.   In this case, we know
    659         // that the shift amount is always greater than the popcount of the individual
    660         // elements that we deal with.   This simplifies some of the logic.
    661         Type * iBitBlock = b->getIntNTy(b->getBitBlockWidth());
    662         Value * carryInPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex++), b->getInt32(0)});
    663         Value * carryIn = b->CreateBlockAlignedLoad(carryInPtr);
    664         Value * shiftVal = b->getSize(shiftAmount);
    665         Value * carry = b->CreateBitCast(carryIn, iBitBlock);
    666         Value * result = b->allZeroes();
    667         for (unsigned i = 0; i < b->getBitBlockWidth()/bitWidth; i++) {
    668             Value * s = b->mvmd_extract(bitWidth, strm, i);
    669             Value * ix = b->mvmd_extract(bitWidth, index_strm, i);
    670             Value * ix_popcnt = b->CreateCall(popcount_f, {ix});
    671             Value * bits = b->CreateCall(PEXT_f, {s, ix});  // All these bits are shifted out (appended to carry).
    672             result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {b->mvmd_extract(bitWidth, carry, 0), ix}), i);
    673             carry = b->CreateLShr(carry, b->CreateZExt(ix_popcnt, iBitBlock)); // Remove the carry bits consumed, make room for new bits.
    674             carry = b->CreateOr(carry, b->CreateShl(b->CreateZExt(bits, iBitBlock), b->CreateZExt(b->CreateSub(shiftVal, ix_popcnt), iBitBlock)));
    675         }
    676         b->CreateBlockAlignedStore(b->CreateBitCast(carry, b->getBitBlockType()), carryInPtr);
     627        Value * carryPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex++), b->getInt32(0)});
     628        Value * carryIn = b->CreateBlockAlignedLoad(carryPtr);
     629        Value * carryOut, * result;
     630        std::tie(carryOut, result) = b->bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount);
     631        b->CreateBlockAlignedStore(carryOut, carryPtr);
    677632        if ((mIfDepth > 0) && mCarryInfo->hasExplicitSummary()) {
    678633            addToCarryOutSummary(b, strm);
  • icGREP/icgrep-devel/icgrep/pablo/carrypack_manager.cpp

    r5712 r5713  
    631631Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) {
    632632    const auto shiftAmount = advance->getAmount();
    633     Value * popcount_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::ctpop, b->getSizeTy());
    634     Value * PEXT_f = nullptr;
    635     Value * PDEP_f = nullptr;
    636     unsigned bitWidth = sizeof(size_t) * 8;
    637     if (bitWidth == 64) {
    638         PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64);
    639         PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64);
    640     }
    641     else if ((bitWidth == 32)  && (shiftAmount < 32)) {
    642         PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_32);
    643         PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32);
    644     }
    645     else {
    646         llvm::report_fatal_error("indexed_advance unsupported bit width");
    647     }
    648     if (LLVM_LIKELY(shiftAmount < bitWidth)) {
     633    if (LLVM_LIKELY(shiftAmount < mElementWidth)) {
    649634        Value * const carryIn = getNextCarryIn(b);
    650         Value * shiftVal = b->getSize(shiftAmount);
    651         Value * carry = b->mvmd_extract(bitWidth, carryIn, 0);
    652         Value * result = b->allZeroes();
    653         for (unsigned i = 0; i < b->getBitBlockWidth()/bitWidth; i++) {
    654             Value * s = b->mvmd_extract(bitWidth, strm, i);
    655             Value * ix = b->mvmd_extract(bitWidth, index_strm, i);
    656             Value * ix_popcnt = b->CreateCall(popcount_f, {ix});
    657             Value * bits = b->CreateCall(PEXT_f, {s, ix});
    658             Value * adv = b->CreateOr(b->CreateShl(bits, shiftAmount), carry);
    659             Value * popcount_small = b->CreateICmpULT(ix_popcnt, shiftVal);
    660             Value * carry_if_popcount_small =
    661             b->CreateOr(b->CreateShl(bits, b->CreateSub(shiftVal, ix_popcnt)),
    662                         b->CreateLShr(carry, ix_popcnt));
    663             Value * carry_if_popcount_large = b->CreateLShr(bits, b->CreateSub(ix_popcnt, shiftVal));
    664             carry = b->CreateSelect(popcount_small, carry_if_popcount_small, carry_if_popcount_large);
    665             result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {adv, ix}), i);
    666         }
    667         Value * carryOut = b->mvmd_insert(bitWidth, b->allZeroes(), carry, 0);
     635        Value * carryOut, * result;
     636        std::tie(carryOut, result) = b->bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount);
    668637        setNextCarryOut(b, carryOut);
    669638        return result;
    670639    } else if (shiftAmount <= b->getBitBlockWidth()) {
    671         // A single bitblock still holds all the shifted bits.   In this case, we know
    672         // that the shift amount is always greater than the popcount of the individual
    673         // elements that we deal with.   This simplifies some of the logic.
    674         Type * iBitBlock = b->getIntNTy(b->getBitBlockWidth());
    675         Value * carryInPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex++), b->getInt32(0)});
    676         Value * carryIn = b->CreateBlockAlignedLoad(carryInPtr);
    677         Value * shiftVal = b->getSize(shiftAmount);
    678         Value * carry = b->CreateBitCast(carryIn, iBitBlock);
    679         Value * result = b->allZeroes();
    680         for (unsigned i = 0; i < b->getBitBlockWidth()/bitWidth; i++) {
    681             Value * s = b->mvmd_extract(bitWidth, strm, i);
    682             Value * ix = b->mvmd_extract(bitWidth, index_strm, i);
    683             Value * ix_popcnt = b->CreateCall(popcount_f, {ix});
    684             Value * bits = b->CreateCall(PEXT_f, {s, ix});  // All these bits are shifted out (appended to carry).
    685             result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {b->mvmd_extract(bitWidth, carry, 0), ix}), i);
    686             carry = b->CreateLShr(carry, b->CreateZExt(ix_popcnt, iBitBlock)); // Remove the carry bits consumed, make room for new bits.
    687             carry = b->CreateOr(carry, b->CreateShl(b->CreateZExt(bits, iBitBlock), b->CreateZExt(b->CreateSub(shiftVal, ix_popcnt), iBitBlock)));
    688         }
    689         b->CreateBlockAlignedStore(b->CreateBitCast(carry, b->getBitBlockType()), carryInPtr);
     640        Value * carryPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex++), b->getInt32(0)});
     641        Value * carryIn = b->CreateBlockAlignedLoad(carryPtr);
     642        Value * carryOut, * result;
     643        std::tie(carryOut, result) = b->bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount);
     644        b->CreateBlockAlignedStore(carryOut, carryPtr);
    690645        if ((mIfDepth > 0) && mCarryInfo->hasExplicitSummary()) {
    691646            addToCarryOutSummary(b, strm);
     
    697652    }
    698653}
    699 
    700654
    701655
Note: See TracChangeset for help on using the changeset viewer.