Changeset 5711 for icGREP


Ignore:
Timestamp:
Oct 27, 2017, 9:28:02 AM (18 months ago)
Author:
cameron
Message:

Indexed advance up to 256

Location:
icGREP/icgrep-devel/icgrep
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r5710 r5711  
    615615Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) {
    616616    const auto shiftAmount = advance->getAmount();
    617     if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) {
     617    Value * popcount_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::ctpop, b->getSizeTy());
     618    Value * PEXT_f = nullptr;
     619    Value * PDEP_f = nullptr;
     620    unsigned bitWidth = sizeof(size_t) * 8;
     621    if (bitWidth == 64) {
     622        PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64);
     623        PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64);
     624    }
     625    else if ((bitWidth == 32)  && (shiftAmount < 32)) {
     626        PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_32);
     627        PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32);
     628    }
     629    else {
     630        llvm::report_fatal_error("indexed_advance unsupported bit width");
     631    }
     632    if (LLVM_LIKELY(shiftAmount < bitWidth)) {
    618633        Value * const carryIn = getNextCarryIn(b);
    619         unsigned bitWidth = sizeof(size_t) * 8;
    620         Value * popcount_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::ctpop, b->getSizeTy());
    621         Value * PEXT_f = nullptr;
    622         Value * PDEP_f = nullptr;
    623         if (bitWidth == 64) {
    624             PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64);
    625             PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64);
    626         }
    627         else if ((bitWidth == 32)  && (shiftAmount < 32)) {
    628             PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_32);
    629             PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32);
    630         }
    631         else {
    632             llvm::report_fatal_error("indexed_advance unsupported bit width");
    633         }
    634634        Value * shiftVal = b->getSize(shiftAmount);
    635635        Value * carry = b->mvmd_extract(bitWidth, carryIn, 0);
     
    652652        setNextCarryOut(b, carryOut);
    653653        return result;
     654    } else if (shiftAmount <= b->getBitBlockWidth()) {
     655        // A single bitblock still holds all the shifted bits.   In this case, we know
     656        // that the shift amount is always greater than the popcount of the individual
     657        // elements that we deal with.   This simplifies some of the logic.
     658        Type * iBitBlock = b->getIntNTy(b->getBitBlockWidth());
     659        Value * carryInPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex++), b->getInt32(0)});
     660        Value * carryIn = b->CreateBlockAlignedLoad(carryInPtr);
     661        Value * shiftVal = b->getSize(shiftAmount);
     662        Value * carry = b->CreateBitCast(carryIn, iBitBlock);
     663        Value * result = b->allZeroes();
     664        for (unsigned i = 0; i < b->getBitBlockWidth()/bitWidth; i++) {
     665            Value * s = b->mvmd_extract(bitWidth, strm, i);
     666            Value * ix = b->mvmd_extract(bitWidth, index_strm, i);
     667            Value * ix_popcnt = b->CreateCall(popcount_f, {ix});
     668            Value * bits = b->CreateCall(PEXT_f, {s, ix});  // All these bits are shifted out (appended to carry).
     669            result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {b->mvmd_extract(bitWidth, carry, 0), ix}), i);
     670            carry = b->CreateLShr(carry, b->CreateZExt(ix_popcnt, iBitBlock)); // Remove the carry bits consumed, make room for new bits.
     671            carry = b->CreateOr(carry, b->CreateShl(b->CreateZExt(bits, iBitBlock), b->CreateZExt(b->CreateSub(shiftVal, ix_popcnt), iBitBlock)));
     672        }
     673        b->CreateBlockAlignedStore(b->CreateBitCast(carry, b->getBitBlockType()), carryInPtr);
     674        return result;
    654675    } else {
    655         llvm::report_fatal_error("IndexedAdvance > LONG_ADVANCE_BREAKPOINT not yet supported.");
     676        mIndexedLongAdvanceIndex++;
     677        llvm::report_fatal_error("IndexedAdvance > BlockSize not yet supported.");
    656678    }
    657679}
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5710 r5711  
    399399        PabloAST * marker_fwd = pb.createAdvance(markerVar(marker), pos);
    400400        return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
    401     } else if (!mGraphemeBoundaryRule && isUnicodeUnitLength(repeated) && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition) && (lb < sizeof(size_t) * 8) && AVX2_available()) {
     401    } else if (!mGraphemeBoundaryRule && isUnicodeUnitLength(repeated) && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition) && (lb <= codegen::BlockSize) && AVX2_available()) {
    402402        PabloAST * cc = markerVar(compile(repeated, pb));
    403403        PabloAST * cc_lb = consecutive_matches(cc, 1, lb, mFinal, pb);
     
    445445        PabloAST * bounded = pb.createMatchStar(cursor, pb.createOr(masked, nonFinal), "bounded");
    446446        return makeMarker(MarkerPosition::FinalPostPositionUnit, bounded);
    447     } else if (!mGraphemeBoundaryRule && isUnicodeUnitLength(repeated) && ub > 1 && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition)&& (ub < sizeof(size_t) * 8) && AVX2_available()) {
     447    } else if (!mGraphemeBoundaryRule && isUnicodeUnitLength(repeated) && ub > 1 && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition)&& (ub <= codegen::BlockSize) && AVX2_available()) {
    448448        // log2 upper bound for fixed length (=1) class
    449449        // Create a mask of positions reachable within ub from current marker.
Note: See TracChangeset for help on using the changeset viewer.