Ignore:
Timestamp:
Oct 26, 2017, 5:28:05 PM (19 months ago)
Author:
cameron
Message:

Enabling Unicode log2 bounded repetition uwing indexed advance (for n<64 and AVX2 only)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pablo/carrypack_manager.cpp

    r5708 r5710  
    136136    if (mHasLongAdvance) {
    137137        kernel->addScalar(iBuilder->getSizeTy(), "CarryBlockIndex");
    138     }
    139     for (unsigned i = 0; i < mIndexedLongAdvanceTotal; i++) {
    140         kernel->addScalar(iBuilder->getSizeTy(), "LongAdvancePosition" + std::to_string(i));
    141138    }
    142139}
     
    629626
    630627
    631 Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const IndexedAdvance * const advance, Value * const value, Value * const index_strm) {
    632     report_fatal_error("IndexedAdvance not yet supported.");
     628Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) {
     629    const auto shiftAmount = advance->getAmount();
     630    if (LLVM_LIKELY(shiftAmount < mElementWidth)) {
     631        Value * const carryIn = getNextCarryIn(b);
     632        unsigned bitWidth = sizeof(size_t) * 8;
     633        Value * popcount_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::ctpop, b->getSizeTy());
     634        Value * PEXT_f = nullptr;
     635        Value * PDEP_f = nullptr;
     636        if (bitWidth == 64) {
     637            PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64);
     638            PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64);
     639        }
     640        else if ((bitWidth == 32)  && (shiftAmount < 32)) {
     641            PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_32);
     642            PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32);
     643        }
     644        else {
     645            llvm::report_fatal_error("indexed_advance unsupported bit width");
     646        }
     647        Value * shiftVal = b->getSize(shiftAmount);
     648        Value * carry = b->mvmd_extract(bitWidth, carryIn, 0);
     649        Value * result = b->allZeroes();
     650        for (unsigned i = 0; i < b->getBitBlockWidth()/bitWidth; i++) {
     651            Value * s = b->mvmd_extract(bitWidth, strm, i);
     652            Value * ix = b->mvmd_extract(bitWidth, index_strm, i);
     653            Value * ix_popcnt = b->CreateCall(popcount_f, {ix});
     654            Value * bits = b->CreateCall(PEXT_f, {s, ix});
     655            Value * adv = b->CreateOr(b->CreateShl(bits, shiftAmount), carry);
     656            Value * popcount_small = b->CreateICmpULT(ix_popcnt, shiftVal);
     657            Value * carry_if_popcount_small =
     658                b->CreateOr(b->CreateShl(bits, b->CreateSub(shiftVal, ix_popcnt)),
     659                            b->CreateLShr(carry, ix_popcnt));
     660            Value * carry_if_popcount_large = b->CreateLShr(bits, b->CreateSub(ix_popcnt, shiftVal));
     661            carry = b->CreateSelect(popcount_small, carry_if_popcount_small, carry_if_popcount_large);
     662            result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {adv, ix}), i);
     663        }
     664        Value * carryOut = b->mvmd_insert(bitWidth, b->allZeroes(), carry, 0);
     665        setNextCarryOut(b, carryOut);
     666        return result;
     667    } else {
     668        llvm::report_fatal_error("IndexedAdvance > mElementWidth not yet supported.");
     669    }
    633670}
    634671
     
    11061143            if (carryGroup.groupSize == 0) {
    11071144                Type * packTy = carryPackTy;
    1108                 if (LLVM_UNLIKELY(isa<Advance>(stmt) || isa<IndexedAdvance>(stmt))) {
    1109                     const auto amount = isa<Advance>(stmt) ? : cast<Advance>(stmt)->getAmount() : cast<IndexedAdvance>(stmt)->getAmount();
     1145                if (LLVM_UNLIKELY(isa<Advance>(stmt))) {
     1146                    const auto amount = cast<Advance>(stmt)->getAmount();
    11101147                    if (LLVM_UNLIKELY(amount >= mElementWidth)) {
    11111148                        if (LLVM_UNLIKELY(ifDepth > 0 && amount > iBuilder->getBitBlockWidth())) {
     
    11141151                        }
    11151152                        mHasLongAdvance = true;
    1116                         if isa<IndexedAdvance>(stmt) mIndexedLongAdvanceTotal++;
    11171153                        const auto blocks = ceil_udiv(amount, iBuilder->getBitBlockWidth());
    11181154                        packTy = ArrayType::get(carryTy, nearest_pow2(blocks + ((loopDepth != 0) ? 1 : 0)));
    11191155                    }
     1156                }
     1157                if (LLVM_UNLIKELY(isa<IndexedAdvance>(stmt))) {
     1158                    // The carry data for the indexed advance stores N bits of carry data,
     1159                    // organized in packs that can be processed with GR instructions (such as PEXT, PDEP, popcount).
     1160                    // A circular buffer is used.  Because the number of bits to be dequeued
     1161                    // and enqueued is variable (based on the popcount of the index), an extra
     1162                    // pack stores the offset position in the circular buffer.
     1163                    const auto amount = cast<IndexedAdvance>(stmt)->getAmount();
     1164                    const auto packWidth = sizeof(size_t) * 8;
     1165                    const auto packs = ceil_udiv(amount, packWidth);
     1166                    packTy = ArrayType::get(iBuilder->getSizeTy(), nearest_pow2(packs) + 1);
    11201167                }
    11211168                state.push_back(packTy);
     
    11831230, mIfDepth(0)
    11841231, mHasLongAdvance(false)
    1185 , mIndexedLongAdvanceTotal(0)
    1186 , mIndexedLongAdvanceIndex(0)
    11871232, mHasNonCarryCollapsingLoops(false)
    11881233, mHasLoop(false)
Note: See TracChangeset for help on using the changeset viewer.