Ignore:
Timestamp:
Oct 26, 2017, 5:28:05 PM (21 months ago)
Author:
cameron
Message:

Enabling Unicode log2 bounded repetition uwing indexed advance (for n<64 and AVX2 only)

Location:
icGREP/icgrep-devel/icgrep
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r5708 r5710  
    632632            llvm::report_fatal_error("indexed_advance unsupported bit width");
    633633        }
     634        Value * shiftVal = b->getSize(shiftAmount);
    634635        Value * carry = b->mvmd_extract(bitWidth, carryIn, 0);
    635636        Value * result = b->allZeroes();
     
    640641            Value * bits = b->CreateCall(PEXT_f, {s, ix});
    641642            Value * adv = b->CreateOr(b->CreateShl(bits, shiftAmount), carry);
    642             Value * overflow = b->CreateLShr(bits, bitWidth - shiftAmount);
     643            Value * popcount_small = b->CreateICmpULT(ix_popcnt, shiftVal);
     644            Value * carry_if_popcount_small =
     645                b->CreateOr(b->CreateShl(bits, b->CreateSub(shiftVal, ix_popcnt)),
     646                            b->CreateLShr(carry, ix_popcnt));
     647            Value * carry_if_popcount_large = b->CreateLShr(bits, b->CreateSub(ix_popcnt, shiftVal));
     648            carry = b->CreateSelect(popcount_small, carry_if_popcount_small, carry_if_popcount_large);
    643649            result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {adv, ix}), i);
    644             carry = b->CreateOr(b->CreateLShr(adv, ix_popcnt), b->CreateShl(overflow, b->CreateSub(b->getSize(bitWidth), ix_popcnt)));
    645         }
    646         setNextCarryOut(b, carry);
     650        }
     651        Value * carryOut = b->mvmd_insert(bitWidth, b->allZeroes(), carry, 0);
     652        setNextCarryOut(b, carryOut);
    647653        return result;
    648654    } else {
  • icGREP/icgrep-devel/icgrep/pablo/carrypack_manager.cpp

    r5708 r5710  
    136136    if (mHasLongAdvance) {
    137137        kernel->addScalar(iBuilder->getSizeTy(), "CarryBlockIndex");
    138     }
    139     for (unsigned i = 0; i < mIndexedLongAdvanceTotal; i++) {
    140         kernel->addScalar(iBuilder->getSizeTy(), "LongAdvancePosition" + std::to_string(i));
    141138    }
    142139}
     
    629626
    630627
    631 Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const IndexedAdvance * const advance, Value * const value, Value * const index_strm) {
    632     report_fatal_error("IndexedAdvance not yet supported.");
     628Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) {
     629    const auto shiftAmount = advance->getAmount();
     630    if (LLVM_LIKELY(shiftAmount < mElementWidth)) {
     631        Value * const carryIn = getNextCarryIn(b);
     632        unsigned bitWidth = sizeof(size_t) * 8;
     633        Value * popcount_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::ctpop, b->getSizeTy());
     634        Value * PEXT_f = nullptr;
     635        Value * PDEP_f = nullptr;
     636        if (bitWidth == 64) {
     637            PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64);
     638            PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64);
     639        }
     640        else if ((bitWidth == 32)  && (shiftAmount < 32)) {
     641            PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_32);
     642            PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32);
     643        }
     644        else {
     645            llvm::report_fatal_error("indexed_advance unsupported bit width");
     646        }
     647        Value * shiftVal = b->getSize(shiftAmount);
     648        Value * carry = b->mvmd_extract(bitWidth, carryIn, 0);
     649        Value * result = b->allZeroes();
     650        for (unsigned i = 0; i < b->getBitBlockWidth()/bitWidth; i++) {
     651            Value * s = b->mvmd_extract(bitWidth, strm, i);
     652            Value * ix = b->mvmd_extract(bitWidth, index_strm, i);
     653            Value * ix_popcnt = b->CreateCall(popcount_f, {ix});
     654            Value * bits = b->CreateCall(PEXT_f, {s, ix});
     655            Value * adv = b->CreateOr(b->CreateShl(bits, shiftAmount), carry);
     656            Value * popcount_small = b->CreateICmpULT(ix_popcnt, shiftVal);
     657            Value * carry_if_popcount_small =
     658                b->CreateOr(b->CreateShl(bits, b->CreateSub(shiftVal, ix_popcnt)),
     659                            b->CreateLShr(carry, ix_popcnt));
     660            Value * carry_if_popcount_large = b->CreateLShr(bits, b->CreateSub(ix_popcnt, shiftVal));
     661            carry = b->CreateSelect(popcount_small, carry_if_popcount_small, carry_if_popcount_large);
     662            result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {adv, ix}), i);
     663        }
     664        Value * carryOut = b->mvmd_insert(bitWidth, b->allZeroes(), carry, 0);
     665        setNextCarryOut(b, carryOut);
     666        return result;
     667    } else {
     668        llvm::report_fatal_error("IndexedAdvance > mElementWidth not yet supported.");
     669    }
    633670}
    634671
     
    11061143            if (carryGroup.groupSize == 0) {
    11071144                Type * packTy = carryPackTy;
    1108                 if (LLVM_UNLIKELY(isa<Advance>(stmt) || isa<IndexedAdvance>(stmt))) {
    1109                     const auto amount = isa<Advance>(stmt) ? : cast<Advance>(stmt)->getAmount() : cast<IndexedAdvance>(stmt)->getAmount();
     1145                if (LLVM_UNLIKELY(isa<Advance>(stmt))) {
     1146                    const auto amount = cast<Advance>(stmt)->getAmount();
    11101147                    if (LLVM_UNLIKELY(amount >= mElementWidth)) {
    11111148                        if (LLVM_UNLIKELY(ifDepth > 0 && amount > iBuilder->getBitBlockWidth())) {
     
    11141151                        }
    11151152                        mHasLongAdvance = true;
    1116                         if isa<IndexedAdvance>(stmt) mIndexedLongAdvanceTotal++;
    11171153                        const auto blocks = ceil_udiv(amount, iBuilder->getBitBlockWidth());
    11181154                        packTy = ArrayType::get(carryTy, nearest_pow2(blocks + ((loopDepth != 0) ? 1 : 0)));
    11191155                    }
     1156                }
     1157                if (LLVM_UNLIKELY(isa<IndexedAdvance>(stmt))) {
     1158                    // The carry data for the indexed advance stores N bits of carry data,
     1159                    // organized in packs that can be processed with GR instructions (such as PEXT, PDEP, popcount).
     1160                    // A circular buffer is used.  Because the number of bits to be dequeued
     1161                    // and enqueued is variable (based on the popcount of the index), an extra
     1162                    // pack stores the offset position in the circular buffer.
     1163                    const auto amount = cast<IndexedAdvance>(stmt)->getAmount();
     1164                    const auto packWidth = sizeof(size_t) * 8;
     1165                    const auto packs = ceil_udiv(amount, packWidth);
     1166                    packTy = ArrayType::get(iBuilder->getSizeTy(), nearest_pow2(packs) + 1);
    11201167                }
    11211168                state.push_back(packTy);
     
    11831230, mIfDepth(0)
    11841231, mHasLongAdvance(false)
    1185 , mIndexedLongAdvanceTotal(0)
    1186 , mIndexedLongAdvanceIndex(0)
    11871232, mHasNonCarryCollapsingLoops(false)
    11881233, mHasLoop(false)
  • icGREP/icgrep-devel/icgrep/pablo/expression_map.hpp

    r5267 r5710  
    282282                return mBinary.findOrAdd(stmt, stmt->getClassTypeId(), stmt->getOperand(0), stmt->getOperand(1));
    283283            case PabloAST::ClassTypeId::Sel:
     284            case PabloAST::ClassTypeId::IndexedAdvance:
    284285                return mTernary.findOrAdd(stmt, stmt->getClassTypeId(), stmt->getOperand(0), stmt->getOperand(1), stmt->getOperand(2));
    285286            default:
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5706 r5710  
    3030#include "cc/cc_compiler.h"         // for CC_Compiler
    3131#include "pablo/builder.hpp"        // for PabloBuilder
     32#include "toolchain/toolchain.h"    // for AVX2_available
    3233#include <llvm/Support/ErrorHandling.h>
    3334
     
    342343*/
    343344
    344 inline PabloAST * RE_Compiler::consecutive_matches(PabloAST * repeated, int length, int repeat_count, PabloBuilder & pb) {
     345inline PabloAST * RE_Compiler::consecutive_matches(PabloAST * repeated, int length, int repeat_count, PabloAST * indexStream, PabloBuilder & pb) {
    345346    int i = length;
    346347    int total = repeat_count * length;
     
    348349    while ((i * 2) < total) {
    349350        PabloAST * v = consecutive_i;
    350         PabloAST * v2 =  pb.createAdvance(v, i);
     351        PabloAST * v2 = nullptr;
     352        if (indexStream == nullptr) v2 =  pb.createAdvance(v, i);
     353        else v2 =  pb.createIndexedAdvance(v, indexStream, i);
    351354        i *= 2;
    352355        consecutive_i = pb.createAnd(v, v2, "at" + std::to_string(i) + "of" + std::to_string(total));
     
    354357    if (LLVM_LIKELY(i < total)) {
    355358        PabloAST * v = consecutive_i;
    356         PabloAST * v2 = pb.createAdvance(v, total - i);
     359        PabloAST * v2 =  nullptr;
     360        if (indexStream == nullptr) v2 = pb.createAdvance(v, total - i);
     361        else v2 = pb.createIndexedAdvance(v, indexStream, total - i);
    357362        consecutive_i = pb.createAnd(v, v2, "at" + std::to_string(total));
    358363    }
     
    360365}
    361366
    362 inline PabloAST * RE_Compiler::reachable(PabloAST * repeated, int length, int repeat_count, PabloBuilder & pb) {
     367inline PabloAST * RE_Compiler::reachable(PabloAST * repeated, int length, int repeat_count, PabloAST * indexStream, PabloBuilder & pb) {
    363368    int i = length;
    364369    int total_lgth = repeat_count * length;
     
    369374    while ((i * 2) < total_lgth) {
    370375        PabloAST * v = reachable;
    371         PabloAST * v2 =  pb.createAdvance(v, i);
     376        PabloAST * v2 = nullptr;
     377        if (indexStream == nullptr) v2 = pb.createAdvance(v, i);
     378        else v2 = pb.createIndexedAdvance(v, indexStream, i);
    372379        i *= 2;
    373380        reachable = pb.createOr(v, v2, "within" + std::to_string(i));
     
    375382    if (LLVM_LIKELY(i < total_lgth)) {
    376383        PabloAST * v = reachable;
    377         PabloAST * v2 = pb.createAdvance(v, total_lgth - i);
     384        PabloAST * v2 = nullptr;
     385        if (indexStream == nullptr) v2 = pb.createAdvance(v, total_lgth - i);
     386        else v2 = pb.createIndexedAdvance(v, indexStream, total_lgth - i);
    378387        reachable = pb.createOr(v, v2, "within" + std::to_string(total_lgth));
    379388    }
     
    386395    } else if (!mGraphemeBoundaryRule && isByteLength(repeated) && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition)) {
    387396        PabloAST * cc = markerVar(compile(repeated, pb));
    388         PabloAST * cc_lb = consecutive_matches(cc, 1, lb, pb);
     397        PabloAST * cc_lb = consecutive_matches(cc, 1, lb, nullptr, pb);
    389398        const auto pos = markerPos(marker) == MarkerPosition::FinalMatchUnit ? lb : lb - 1;
    390399        PabloAST * marker_fwd = pb.createAdvance(markerVar(marker), pos);
     400        return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
     401    } else if (!mGraphemeBoundaryRule && isUnicodeUnitLength(repeated) && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition) && (lb < sizeof(size_t) * 8) && AVX2_available()) {
     402        PabloAST * cc = markerVar(compile(repeated, pb));
     403        PabloAST * cc_lb = consecutive_matches(cc, 1, lb, mFinal, pb);
     404        const auto pos = markerPos(marker) == MarkerPosition::FinalMatchUnit ? lb : lb - 1;
     405        PabloAST * marker_fwd = pb.createIndexedAdvance(markerVar(marker), mFinal, pos);
    391406        return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
    392407    }
     
    423438        // If we've advanced the cursor to the post position unit, cursor begins on the first masked bit of the bounded mask.
    424439        // Extend the mask by ub - 1 byte positions to ensure the mask ends on the FinalMatchUnit of the repeated region.
    425         PabloAST * upperLimitMask = reachable(cursor, 1, ub - 1, pb);
     440        PabloAST * upperLimitMask = reachable(cursor, 1, ub - 1, nullptr, pb);
    426441        PabloAST * masked = pb.createAnd(markerVar(compile(repeated, pb)), upperLimitMask);
    427442        PabloAST * nonFinal = mNonFinal;
    428         if (mGraphemeBoundaryRule) {
    429             nonFinal = pb.createOr(nonFinal, pb.createNot(mGraphemeBoundaryRule, "gext"));
    430         }
     443        // MatchStar deposits any cursors on the post position. However those cursors may may land on the initial "byte" of a
     444        // "multi-byte" character. Combine the masked range with any nonFinals.
     445        PabloAST * bounded = pb.createMatchStar(cursor, pb.createOr(masked, nonFinal), "bounded");
     446        return makeMarker(MarkerPosition::FinalPostPositionUnit, bounded);
     447    } else if (!mGraphemeBoundaryRule && isUnicodeUnitLength(repeated) && ub > 1 && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition)&& (ub < sizeof(size_t) * 8) && AVX2_available()) {
     448        // log2 upper bound for fixed length (=1) class
     449        // Create a mask of positions reachable within ub from current marker.
     450        // Use matchstar, then apply filter.
     451        PabloAST * cursor = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
     452        // If we've advanced the cursor to the post position unit, cursor begins on the first masked bit of the bounded mask.
     453        // Extend the mask by ub - 1 byte positions to ensure the mask ends on the FinalMatchUnit of the repeated region.
     454        PabloAST * upperLimitMask = reachable(cursor, 1, ub - 1, mFinal, pb);
     455        PabloAST * masked = pb.createAnd(markerVar(compile(repeated, pb)), upperLimitMask);
     456        PabloAST * nonFinal = mNonFinal;
    431457        // MatchStar deposits any cursors on the post position. However those cursors may may land on the initial "byte" of a
    432458        // "multi-byte" character. Combine the masked range with any nonFinals.
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r5646 r5710  
    102102    MarkerType compileDiff(Diff * diff, MarkerType marker, pablo::PabloBuilder & cg);
    103103    MarkerType compileIntersect(Intersect * x, MarkerType marker, pablo::PabloBuilder & cg);
    104     pablo::PabloAST * consecutive_matches(pablo::PabloAST * repeated,  int length, int repeat_count, pablo::PabloBuilder & pb);
    105     pablo::PabloAST * reachable(pablo::PabloAST * repeated,  int length, int repeat_count, pablo::PabloBuilder & pb);
     104    pablo::PabloAST * consecutive_matches(pablo::PabloAST * repeated, int length, int repeat_count, pablo::PabloAST * indexStream, pablo::PabloBuilder & pb);
     105    pablo::PabloAST * reachable(pablo::PabloAST * repeated, int length, int repeat_count, pablo::PabloAST * indexStream, pablo::PabloBuilder & pb);
    106106    static bool isFixedLength(RE * regexp);
    107107    MarkerType processLowerBound(RE * repeated,  int lb, MarkerType marker, int ifGroupSize, pablo::PabloBuilder & pb);
Note: See TracChangeset for help on using the changeset viewer.