Changeset 5812


Ignore:
Timestamp:
Dec 28, 2017, 1:15:13 PM (12 months ago)
Author:
nmedfort
Message:

Bug fix for RE local + some clean up of RE local and the RE Compiler

Location:
icGREP/icgrep-devel/icgrep
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5782 r5812  
    13111311        if (Align > 1) {
    13121312            ConstantInt * align = ConstantInt::get(intPtrTy, Align);
    1313             CreateAssertZero(CreateURem(intSrc, align), "CreateMemCpy: Src pointer is misaligned");
    1314             CreateAssertZero(CreateURem(intDst, align), "CreateMemCpy: Dst pointer is misaligned");
     1313            CreateAssertZero(CreateURem(intSrc, align), "CreateMemCpy: Src is misaligned");
     1314            CreateAssertZero(CreateURem(intDst, align), "CreateMemCpy: Dst is misaligned");
    13151315        }
    13161316        Value * intSize = CreateZExtOrTrunc(Size, intPtrTy);
     
    13261326    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    13271327        CHECK_ADDRESS(Ptr, Size, "CreateMemSet");
     1328        if (Align > 1) {
     1329            DataLayout DL(getModule());
     1330            IntegerType * const intPtrTy = DL.getIntPtrType(getContext());
     1331            Value * intPtr = CreatePtrToInt(Ptr, intPtrTy);
     1332            ConstantInt * align = ConstantInt::get(intPtrTy, Align);
     1333            CreateAssertZero(CreateURem(intPtr, align), "CreateMemSet: Ptr is misaligned");
     1334        }
    13281335    }
    13291336    return IRBuilder<>::CreateMemSet(Ptr, Val, Size, Align, isVolatile, TBAATag, ScopeTag, NoAliasTag);
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5804 r5812  
    484484        mGrepDriver->performIncrementalCacheCleanupStep();
    485485    }
    486 }
    487 
    488 }
     486    return nullptr;
     487}
     488
     489}
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5810 r5812  
    4242using namespace llvm;
    4343
     44using FollowMap = std::map<re::CC *, re::CC*>;
     45
    4446namespace re {
    4547
    46 PabloAST * RE_Compiler::compile(RE * re) {
    47     return markerVar(AdvanceMarker(compile(re, mPB), MarkerPosition::FinalPostPositionUnit, mPB));
    48 }
    49    
    50 MarkerType RE_Compiler::compile(RE * re, PabloBuilder & pb) {
    51     return process(re, makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createOnes()), pb);
     48using MarkerType = RE_Compiler::MarkerType;
     49
     50PabloAST * RE_Compiler::compile(RE * re, PabloAST * const initialCursors) {
     51    const auto markers = initialCursors ? compile(re, initialCursors, mPB) : compile(re, mPB);
     52    return markerVar(AdvanceMarker(markers, FinalPostPositionUnit, mPB));
     53}
     54
     55inline MarkerType RE_Compiler::compile(RE * re, PabloAST * const cursors, PabloBuilder & pb) {
     56    return process(re, makeMarker(FinalMatchUnit, cursors), pb);
     57}
     58
     59inline MarkerType RE_Compiler::compile(RE * re, PabloBuilder & pb) {
     60    return process(re, makeMarker(FinalPostPositionUnit, pb.createOnes()), pb);
    5261}
    5362   
     
    8291
    8392inline MarkerType RE_Compiler::compileAny(const MarkerType m, PabloBuilder & pb) {
    84     PabloAST * const nextFinalByte = markerVar(AdvanceMarker(m, MarkerPosition::FinalPostPositionUnit, pb));
    85     return makeMarker(MarkerPosition::FinalMatchUnit, nextFinalByte);
     93    PabloAST * const nextFinalByte = markerVar(AdvanceMarker(m, FinalPostPositionUnit, pb));
     94    return makeMarker(FinalMatchUnit, nextFinalByte);
    8695}
    8796
     
    8998    // If Unicode CCs weren't pulled out earlier, we generate the equivalent
    9099    // byte sequence as an RE.
    91     if (cc->getAlphabet() == &cc::Unicode) return process(toUTF8(cc), marker, pb);
     100    if (cc->getAlphabet() == &cc::Unicode) {
     101        return process(toUTF8(cc), marker, pb);
     102    }
    92103    PabloAST * nextPos = markerVar(marker);
    93104    if (isByteLength(cc)) {
    94         if (marker.pos == MarkerPosition::FinalMatchUnit) nextPos = pb.createAdvance(nextPos, 1);
    95     }
    96     else {
    97         nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
    98     }
    99     return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(nextPos, mCCCompiler.compileCC(cc, pb)));
     105        if (marker.pos == FinalMatchUnit) {
     106            nextPos = pb.createAdvance(nextPos, 1);
     107        }
     108    } else {
     109        nextPos = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb));
     110    }
     111    return makeMarker(FinalMatchUnit, pb.createAnd(nextPos, mCCCompiler.compileCC(cc, pb)));
    100112}
    101113
     
    110122    } else if (isUnicodeUnitLength(name)) {
    111123        MarkerType nameMarker = compileName(name, pb);
    112         MarkerType nextPos = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);
     124        MarkerType nextPos = AdvanceMarker(marker, FinalPostPositionUnit, pb);
    113125        nameMarker.stream = pb.createAnd(markerVar(nextPos), markerVar(nameMarker), name->getName());
    114126        return nameMarker;
     
    172184}
    173185
    174 MarkerType RE_Compiler::compileAlt(Alt * alt, MarkerType marker, PabloBuilder & pb) {
    175     std::vector<PabloAST *>  accum(3, pb.createZeroes());
    176     MarkerType const base = marker;
     186MarkerType RE_Compiler::compileAlt(Alt * alt, const MarkerType base, PabloBuilder & pb) {
     187    std::array<PabloAST *, 3> accum;
    177188    // The following may be useful to force a common Advance rather than separate
    178189    // Advances in each alternative.
     190    accum.fill(pb.createZeroes());
    179191    for (RE * re : *alt) {
    180192        MarkerType m = process(re, base, pb);
    181         MarkerPosition p = markerPos(m);
     193        const MarkerPosition p = markerPos(m);
    182194        accum[p] = pb.createOr(accum[p], markerVar(m), "alt");
    183195    }
    184     if (isa<Zeroes>(accum[MarkerPosition::InitialPostPositionUnit]) && isa<Zeroes>(accum[MarkerPosition::FinalPostPositionUnit])) {
    185         return makeMarker(MarkerPosition::FinalMatchUnit, accum[MarkerPosition::FinalMatchUnit]);
    186     }
    187     PabloAST * combine = pb.createOr(accum[InitialPostPositionUnit], pb.createAdvance(accum[MarkerPosition::FinalMatchUnit], 1), "alt");
     196    if (isa<Zeroes>(accum[InitialPostPositionUnit]) && isa<Zeroes>(accum[FinalPostPositionUnit])) {
     197        return makeMarker(FinalMatchUnit, accum[FinalMatchUnit]);
     198    }
     199
     200    PabloAST * combine = pb.createOr(accum[InitialPostPositionUnit], pb.createAdvance(accum[FinalMatchUnit], 1), "alt");
    188201    if (isa<Zeroes>(accum[FinalPostPositionUnit])) {
    189202        return makeMarker(InitialPostPositionUnit, combine);
    190203    }
    191     combine = pb.createOr(pb.createScanThru(pb.createAnd(mInitial, combine), mNonFinal), accum[MarkerPosition::FinalPostPositionUnit], "alt");
    192     return makeMarker(MarkerPosition::FinalPostPositionUnit, combine);
     204    combine = pb.createOr(pb.createScanThru(pb.createAnd(mInitial, combine), mNonFinal), accum[FinalPostPositionUnit], "alt");
     205    return makeMarker(FinalPostPositionUnit, combine);
    193206}
    194207
     
    205218    } else if (a->getKind() == Assertion::Kind::Boundary) {
    206219        MarkerType cond = compile(asserted, pb);
    207         if (LLVM_LIKELY(markerPos(cond) == MarkerPosition::FinalMatchUnit)) {
    208             MarkerType postCond = AdvanceMarker(cond, MarkerPosition::FinalPostPositionUnit, pb);
     220        if (LLVM_LIKELY(markerPos(cond) == FinalMatchUnit)) {
     221            MarkerType postCond = AdvanceMarker(cond, FinalPostPositionUnit, pb);
    209222            PabloAST * boundaryCond = pb.createXor(markerVar(cond), markerVar(postCond));
    210223            if (a->getSense() == Assertion::Sense::Negative) {
    211224                boundaryCond = pb.createNot(boundaryCond);
    212225            }
    213             MarkerType fbyte = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);
    214             return makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createAnd(markerVar(fbyte), boundaryCond, "boundary"));
     226            MarkerType fbyte = AdvanceMarker(marker, FinalPostPositionUnit, pb);
     227            return makeMarker(FinalPostPositionUnit, pb.createAnd(markerVar(fbyte), boundaryCond, "boundary"));
    215228        }
    216229        else UnsupportedRE("Unsupported boundary assertion");
    217230    } else if (isUnicodeUnitLength(asserted)) {
    218231        MarkerType lookahead = compile(asserted, pb);
    219         if (LLVM_LIKELY(markerPos(lookahead) == MarkerPosition::FinalMatchUnit)) {
     232        if (LLVM_LIKELY(markerPos(lookahead) == FinalMatchUnit)) {
    220233            PabloAST * la = markerVar(lookahead);
    221234            if (a->getSense() == Assertion::Sense::Negative) {
    222235                la = pb.createNot(la);
    223236            }
    224             MarkerType fbyte = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);
    225             return makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createAnd(markerVar(fbyte), la, "lookahead"));
     237            MarkerType fbyte = AdvanceMarker(marker, FinalPostPositionUnit, pb);
     238            return makeMarker(FinalPostPositionUnit, pb.createAnd(markerVar(fbyte), la, "lookahead"));
    226239        }
    227240    }
     
    335348
    336349MarkerType RE_Compiler::processLowerBound(RE * repeated, int lb, MarkerType marker, int ifGroupSize, PabloBuilder & pb) {
    337     if (LLVM_UNLIKELY(lb == 0)) return marker;
    338     else if (LLVM_UNLIKELY(lb == 1)) return process(repeated, marker, pb);
     350    if (LLVM_UNLIKELY(lb == 0)) {
     351        return marker;
     352    } else if (LLVM_UNLIKELY(lb == 1)) {
     353        return process(repeated, marker, pb);
     354    }
    339355    //
    340356    // A bounded repetition with an upper bound of at least 2.
     
    345361            PabloAST * cc = markerVar(compile(repeated, pb));
    346362            PabloAST * cc_lb = consecutive_matches(cc, 1, lb, nullptr, pb);
    347             const auto pos = markerPos(marker) == MarkerPosition::FinalMatchUnit ? lb : lb - 1;
     363            const auto pos = markerPos(marker) == FinalMatchUnit ? lb : lb - 1;
    348364            PabloAST * marker_fwd = pb.createAdvance(markerVar(marker), pos);
    349             return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
     365            return makeMarker(FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
    350366        }
    351367        else if (isUnicodeUnitLength(repeated)) {
    352368            PabloAST * cc = markerVar(compile(repeated, pb));
    353369            PabloAST * cc_lb = consecutive_matches(cc, 1, lb, mFinal, pb);
    354             const auto pos = markerPos(marker) == MarkerPosition::FinalMatchUnit ? lb : lb - 1;
     370            const auto pos = markerPos(marker) == FinalMatchUnit ? lb : lb - 1;
    355371            PabloAST * marker_fwd = pb.createIndexedAdvance(markerVar(marker), mFinal, pos);
    356             return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
     372            return makeMarker(FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
    357373        }
    358374        else if (isTypeForLocal(repeated)) {
    359             CC * firstSymSet = RE_Local::first(repeated);
    360             std::map<CC *, CC*> followMap;
    361             RE_Local::follow(repeated, followMap);
    362             bool firstSymSet_found_in_follow = false;
    363             for (auto & entry : followMap) {
    364                 if (entry.second->intersects(*firstSymSet)) {
    365                     firstSymSet_found_in_follow = true;
    366                 }
    367             }
    368             if (!firstSymSet_found_in_follow) {
    369                 // When the first symbol is unique, we can use it as an index stream.
    370                 PabloAST * firstCCstream = markerVar(compile(firstSymSet, pb));
     375            CC * const first = RE_Local::getFirstUniqueSymbol(repeated);
     376            if (first) { // if the first symbol is unique, we can use it as an index stream.
     377                PabloAST * firstCCstream = markerVar(compile(first, pb));
    371378                // Find all matches to the repeated regexp.
    372                 PabloAST * submatch = markerVar(AdvanceMarker(compile(repeated, pb), MarkerPosition::FinalPostPositionUnit, pb));
     379                PabloAST * submatch = markerVar(AdvanceMarker(compile(repeated, pb), FinalPostPositionUnit, pb));
    373380                // Consecutive submatches require that the symbol following the end of one submatch is the first symbol for
    374381                // the next submatch.   lb-1 such submatches are required.
    375382                PabloAST * consecutive_submatch = consecutive_matches(submatch, 1, lb-1, firstCCstream, pb);
    376383                // Find submatch positions which are lb-2 start symbols forward from the current marker position.
    377                 PabloAST * base = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
     384                PabloAST * base = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb));
    378385                PabloAST * marker_fwd = pb.createIndexedAdvance(base, firstCCstream, lb - 1);
    379386                PabloAST * consecutive_lb_1 = pb.createAnd(marker_fwd, consecutive_submatch);
    380387                // From any of these positions, any position reachable by one more occurrence of the
    381388                // regexp is a match.
    382                 return process(repeated, makeMarker(MarkerPosition::FinalPostPositionUnit, consecutive_lb_1), pb);
     389                return process(repeated, makeMarker(FinalPostPositionUnit, consecutive_lb_1), pb);
    383390            }
    384391        }
     
    404411   
    405412MarkerType RE_Compiler::processBoundedRep(RE * repeated, int ub, MarkerType marker, int ifGroupSize,  PabloBuilder & pb) {
    406     if (LLVM_UNLIKELY(ub == 0)) return marker;
     413    if (LLVM_UNLIKELY(ub == 0)) {
     414        return marker;
     415    }
    407416    //
    408417    // A bounded repetition with an upper bound of at least 2.
     
    414423            // Create a mask of positions reachable within ub from current marker.
    415424            // Use matchstar, then apply filter.
    416             PabloAST * cursor = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionUnit, pb));
     425            PabloAST * cursor = markerVar(AdvanceMarker(marker, InitialPostPositionUnit, pb));
    417426            // If we've advanced the cursor to the post position unit, cursor begins on the first masked bit of the bounded mask.
    418427            // Extend the mask by ub - 1 byte positions to ensure the mask ends on the FinalMatchUnit of the repeated region.
     
    422431            // "multi-byte" character. Combine the masked range with any nonFinals.
    423432            PabloAST * bounded = pb.createMatchStar(cursor, pb.createOr(masked, mNonFinal), "bounded");
    424             return makeMarker(MarkerPosition::InitialPostPositionUnit, bounded);
     433            return makeMarker(InitialPostPositionUnit, bounded);
    425434        }
    426435        else if (isUnicodeUnitLength(repeated)) {
    427436            // For a regexp which represent a single Unicode codepoint, we can use the mFinal stream
    428437            // as an index stream for an indexed advance operation.
    429             PabloAST * cursor = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
     438            PabloAST * cursor = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb));
    430439            PabloAST * upperLimitMask = reachable(cursor, 1, ub - 1, mFinal, pb);
    431440            PabloAST * masked = pb.createAnd(markerVar(compile(repeated, pb)), upperLimitMask, "masked");
    432441            PabloAST * bounded = pb.createMatchStar(cursor, pb.createOr(masked, mNonFinal), "bounded");
    433             return makeMarker(MarkerPosition::FinalPostPositionUnit, bounded);
     442            return makeMarker(FinalPostPositionUnit, bounded);
    434443        }
    435444        else if (isTypeForLocal(repeated)) {
    436             CC * firstSymSet = RE_Local::first(repeated);
    437             std::map<CC *, CC*> followMap;
    438             RE_Local::follow(repeated, followMap);
    439             bool firstSymSet_found_in_follow = false;
    440             for (auto & entry : followMap) {
    441                 if (entry.second->intersects(*firstSymSet)) {
    442                     firstSymSet_found_in_follow = true;
    443                 }
    444             }
    445             if (!firstSymSet_found_in_follow) {
    446                 // When the first symbol is unique, we can use it as an index stream.
    447                 PabloAST * firstCCstream = markerVar(compile(firstSymSet, pb));
    448                 PabloAST * cursor = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
     445            CC * const first = RE_Local::getFirstUniqueSymbol(repeated);
     446            if (first) { // if the first symbol is unique, we can use it as an index stream.
     447                PabloAST * firstCCstream = markerVar(compile(first, pb));
     448                PabloAST * cursor = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb));
    449449                PabloAST * upperLimitMask = reachable(cursor, 1, ub - 1, firstCCstream, pb);
    450                 PabloAST * masked = pb.createAnd(markerVar(AdvanceMarker(compile(repeated, pb), MarkerPosition::FinalPostPositionUnit, pb)), upperLimitMask, "masked");
     450                PabloAST * masked = pb.createAnd(markerVar(AdvanceMarker(compile(repeated, pb), FinalPostPositionUnit, pb)), upperLimitMask, "masked");
    451451                PabloAST * bounded = pb.createMatchStar(cursor, pb.createOr(masked, mNonFinal), "bounded");
    452                 return makeMarker(MarkerPosition::FinalPostPositionUnit, bounded);
     452                return makeMarker(FinalPostPositionUnit, bounded);
    453453            }
    454454        }
     
    478478MarkerType RE_Compiler::processUnboundedRep(RE * repeated, MarkerType marker, PabloBuilder & pb) {
    479479    // always use PostPosition markers for unbounded repetition.
    480     PabloAST * base = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
     480    PabloAST * base = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb));
    481481    if (isByteLength(repeated)  && !AlgorithmOptionIsSet(DisableMatchStar)) {
    482482        PabloAST * mask = markerVar(compile(repeated, pb));
     
    484484        // The post position character may land on the initial byte of a multi-byte character. Combine them with the masked range.
    485485        PabloAST * unbounded = pb.createMatchStar(base, pb.createOr(mask, nonFinal), "unbounded");
    486         return makeMarker(MarkerPosition::FinalPostPositionUnit, unbounded);
     486        return makeMarker(FinalPostPositionUnit, unbounded);
    487487    } else if (isUnicodeUnitLength(repeated) && !AlgorithmOptionIsSet(DisableMatchStar) && !AlgorithmOptionIsSet(DisableUnicodeMatchStar)) {
    488488        PabloAST * cc = markerVar(compile(repeated, pb));
     
    492492        mstar = pb.createMatchStar(base, cc);
    493493        PabloAST * final = mFinal;
    494         return makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createAnd(mstar, final, "unbounded"));
     494        return makeMarker(FinalPostPositionUnit, pb.createAnd(mstar, final, "unbounded"));
    495495    } else if (mStarDepth > 0){
    496496        PabloBuilder * const outer = pb.getParent();
     
    500500        PabloAST * m1 = pb.createOr(base, starPending);
    501501        PabloAST * m2 = pb.createOr(base, starAccum);
    502         MarkerType result = process(repeated, makeMarker(MarkerPosition::FinalPostPositionUnit, m1), pb);
    503         result = AdvanceMarker(result, MarkerPosition::FinalPostPositionUnit, pb);
     502        MarkerType result = process(repeated, makeMarker(FinalPostPositionUnit, m1), pb);
     503        result = AdvanceMarker(result, FinalPostPositionUnit, pb);
    504504        PabloAST * loopComputation = markerVar(result);
    505505        pb.createAssign(starPending, pb.createAnd(loopComputation, pb.createNot(m2)));
     
    517517        mCompiledName = &nestedMap;
    518518        mStarDepth++;
    519         MarkerType result = process(repeated, makeMarker(MarkerPosition::FinalPostPositionUnit, whilePending), wb);
    520         result = AdvanceMarker(result, MarkerPosition::FinalPostPositionUnit, wb);
     519        MarkerType result = process(repeated, makeMarker(FinalPostPositionUnit, whilePending), wb);
     520        result = AdvanceMarker(result, FinalPostPositionUnit, wb);
    521521        PabloAST * loopComputation = markerVar(result);
    522522        wb.createAssign(whilePending, wb.createAnd(loopComputation, wb.createNot(whileAccum)));
     
    532532inline MarkerType RE_Compiler::compileStart(MarkerType marker, pablo::PabloBuilder & pb) {
    533533    PabloAST * sol = pb.createNot(pb.createAdvance(pb.createNot(mLineBreak), 1));
    534     MarkerType m = AdvanceMarker(marker, MarkerPosition::InitialPostPositionUnit, pb);
    535     return makeMarker(MarkerPosition::InitialPostPositionUnit, pb.createAnd(markerVar(m), sol, "sol"));
     534    MarkerType m = AdvanceMarker(marker, InitialPostPositionUnit, pb);
     535    return makeMarker(InitialPostPositionUnit, pb.createAnd(markerVar(m), sol, "sol"));
    536536}
    537537
    538538inline MarkerType RE_Compiler::compileEnd(MarkerType marker, pablo::PabloBuilder & pb) {
    539     PabloAST * const nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
    540     PabloAST * atEOL = pb.createOr(pb.createAnd(mLineBreak, nextPos), pb.createAdvance(pb.createAnd(nextPos, mCRLF), 1), "eol");
    541     return makeMarker(MarkerPosition::FinalPostPositionUnit, atEOL);
     539    PabloAST * const nextPos = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb));
     540    PabloAST * const atEOL = pb.createOr(pb.createAnd(mLineBreak, nextPos), pb.createAdvance(pb.createAnd(nextPos, mCRLF), 1), "eol");
     541    return makeMarker(FinalPostPositionUnit, atEOL);
    542542}
    543543
    544544inline MarkerType RE_Compiler::AdvanceMarker(MarkerType marker, const MarkerPosition newpos, PabloBuilder & pb) {
    545545    if (marker.pos != newpos) {
    546         if (marker.pos == MarkerPosition::FinalMatchUnit) {
     546        if (marker.pos == FinalMatchUnit) {
    547547            marker.stream = pb.createAdvance(marker.stream, 1, "ipp");
    548             marker.pos = MarkerPosition::InitialPostPositionUnit;
    549         }
    550         if (newpos == MarkerPosition::FinalPostPositionUnit) {
    551             PabloAST * nonFinal = mNonFinal;
    552             marker.stream = pb.createScanThru(pb.createAnd(mInitial, marker.stream), nonFinal, "fpp");
    553             marker.pos = MarkerPosition::FinalPostPositionUnit;
     548            marker.pos = InitialPostPositionUnit;
     549        }
     550        if (newpos == FinalPostPositionUnit) {
     551            marker.stream = pb.createScanThru(pb.createAnd(mInitial, marker.stream), mNonFinal, "fpp");
     552            marker.pos = FinalPostPositionUnit;
    554553        }
    555554    }
     
    589588    mNonFinal = mPB.createExtract(required, mPB.getInteger(1));
    590589    mFinal = mPB.createExtract(required, mPB.getInteger(2));
    591 
    592590}
    593591
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r5810 r5812  
    2525namespace re { class CC; }
    2626
    27 //namespace UCD {
    28 //class UnicodeSet;
    29 //}
    30 
    3127/*   Marker streams represent the results of matching steps.
    3228     Three types of marker streams are used internally.
     
    4339namespace re {
    4440
    45 enum MarkerPosition {FinalMatchUnit, InitialPostPositionUnit, FinalPostPositionUnit};
    46 
    47 struct MarkerType {
    48     MarkerPosition pos;
    49     pablo::PabloAST * stream;
    50     MarkerType & operator =(const MarkerType &) = default;
    51 };
    52 
    53 inline MarkerPosition markerPos(const MarkerType & m) {return m.pos; }
    54 
    55 inline pablo::PabloAST * markerVar(const MarkerType & m) {return m.stream; }
    56 
    57 inline MarkerType makeMarker(MarkerPosition newpos, pablo::PabloAST * strm) {return {newpos, strm};}
    58 
    59 
    6041class RE_Compiler {
    6142public:
    6243
     44    enum MarkerPosition {FinalMatchUnit, InitialPostPositionUnit, FinalPostPositionUnit};
     45
     46    struct MarkerType {
     47        MarkerPosition pos;
     48        pablo::PabloAST * stream;
     49        MarkerType & operator =(const MarkerType &) = default;
     50    };
     51
    6352    RE_Compiler(pablo::PabloKernel * kernel, cc::CC_Compiler & ccCompiler);
    64     pablo::PabloAST * compile(RE * re);
     53    pablo::PabloAST * compile(RE * re, pablo::PabloAST * const initialCursors = nullptr);
    6554
    6655    static LLVM_ATTRIBUTE_NORETURN void UnsupportedRE(std::string errmsg);
     
    8877    };
    8978
    90     MarkerType compile(RE * re, pablo::PabloBuilder & cg);
     79
     80    MarkerType compile(RE * re, pablo::PabloBuilder & pb);
     81    MarkerType compile(RE * re, pablo::PabloAST * const cursors, pablo::PabloBuilder & pb);
    9182
    9283    MarkerType process(RE * re, MarkerType marker, pablo::PabloBuilder & pb);
     
    9586    MarkerType compileSeq(Seq * seq, MarkerType marker, pablo::PabloBuilder & pb);
    9687    MarkerType compileSeqTail(Seq::iterator current, const Seq::iterator end, int matchLenSoFar, MarkerType marker, pablo::PabloBuilder & pb);
    97     MarkerType compileAlt(Alt * alt, MarkerType marker, pablo::PabloBuilder & pb);
     88    MarkerType compileAlt(Alt * alt, MarkerType base, pablo::PabloBuilder & pb);
    9889    MarkerType compileAssertion(Assertion * a, MarkerType marker, pablo::PabloBuilder & pb);
    9990    MarkerType compileRep(Rep * rep, MarkerType marker, pablo::PabloBuilder & pb);
     
    114105    MarkerType AdvanceMarker(MarkerType marker, const MarkerPosition newpos, pablo::PabloBuilder & pb);
    115106    void AlignMarkers(MarkerType & m1, MarkerType & m2, pablo::PabloBuilder & pb);
     107
     108    static inline MarkerPosition markerPos(const MarkerType & m) {return m.pos; }
     109    static inline pablo::PabloAST * markerVar(const MarkerType & m) {return m.stream; }
     110    static inline MarkerType makeMarker(MarkerPosition newpos, pablo::PabloAST * strm) {return {newpos, strm};}
    116111
    117112private:
  • icGREP/icgrep-devel/icgrep/re/re_local.cpp

    r5748 r5812  
    88#include <re/re_intersect.h>
    99#include <re/re_assertion.h>
    10 #include <re/re_any.h>
    1110#include <re/re_analysis.h>
    12 #include <UCD/resolve_properties.h>
    13 #include <boost/container/flat_set.hpp>
     11#include <re/re_nullable.h>
     12#include <boost/container/flat_map.hpp>
    1413#include <boost/range/adaptor/reversed.hpp>
    15 #include <util/slab_allocator.h>
    16 #include <map>
    1714
    1815using namespace boost::container;
     
    2017
    2118namespace re {
    22  
    23 inline void combine(CC *& a, CC * b) {
     19
     20using FollowMap = flat_map<const CC *, const CC*>;
     21
     22inline const CC * combine(const CC * a, const CC * b) {
    2423    if (a && b) {
    25         a = makeCC(a, b);
     24        return makeCC(a, b);
    2625    } else if (b) {
    27         a = b;
     26        return b;
    2827    }
     28    return a;
    2929}
    3030
    31 CC * RE_Local::first(RE * re) {
    32     if (Name * name = dyn_cast<Name>(re)) {
     31const CC * first(const RE * re) {
     32    if (const Name * name = dyn_cast<Name>(re)) {
    3333        if (LLVM_LIKELY(name->getDefinition() != nullptr)) {
    3434            return first(name->getDefinition());
     
    3636            UndefinedNameError(name);
    3737        }
    38     } else if (CC * cc = dyn_cast<CC>(re)) {
     38    } else if (const CC * cc = dyn_cast<CC>(re)) {
    3939        return cc;
    40     } else if (Seq * seq = dyn_cast<Seq>(re)) {
    41         CC * cc = nullptr;
     40    } else if (const Seq * seq = dyn_cast<Seq>(re)) {
     41        const CC * cc = nullptr;
    4242        for (auto & si : *seq) {
    43             combine(cc, first(si));
    44             if (!isNullable(si)) {
     43            cc = combine(cc, first(si));
     44            if (!RE_Nullable::isNullable(si)) {
    4545                break;
    4646            }
    4747        }
    4848        return cc;
    49     } else if (Alt * alt = dyn_cast<Alt>(re)) {
    50         CC * cc = nullptr;
     49    } else if (const Alt * alt = dyn_cast<Alt>(re)) {
     50        const CC * cc = nullptr;
    5151        for (auto & ai : *alt) {
    52             combine(cc, first(ai));
     52            cc = combine(cc, first(ai));
    5353        }
    5454        return cc;
    55     } else if (Rep * rep = dyn_cast<Rep>(re)) {
     55    } else if (const Rep * rep = dyn_cast<Rep>(re)) {
    5656        return first(rep->getRE());
    57     } else if (Diff * diff = dyn_cast<Diff>(re)) {
    58         if (CC * lh = first(diff->getLH())) {
    59             if (CC * rh = first(diff->getRH())) {
     57    } else if (const Diff * diff = dyn_cast<Diff>(re)) {
     58        if (const CC * lh = first(diff->getLH())) {
     59            if (const CC * rh = first(diff->getRH())) {
    6060                return subtractCC(lh, rh);
    6161            }
    6262        }
    63     } else if (Intersect * ix = dyn_cast<Intersect>(re)) {
    64         if (CC * lh = first(ix->getLH())) {
    65             if (CC * rh = first(ix->getRH())) {
     63    } else if (const Intersect * ix = dyn_cast<Intersect>(re)) {
     64        if (const CC * lh = first(ix->getLH())) {
     65            if (const CC * rh = first(ix->getRH())) {
    6666                return intersectCC(lh, rh);
    6767            }
     
    7171}
    7272
    73 CC * RE_Local::final(RE * re) {
    74     if (Name * name = dyn_cast<Name>(re)) {
     73const CC * final(const RE * re) {
     74    if (const Name * name = dyn_cast<Name>(re)) {
    7575        if (LLVM_LIKELY(name->getDefinition() != nullptr)) {
    7676            return final(name->getDefinition());
     
    7878            UndefinedNameError(name);
    7979        }
    80     } else if (CC * cc = dyn_cast<CC>(re)) {
     80    } else if (const CC * cc = dyn_cast<CC>(re)) {
    8181        return cc;
    82     } else if (Seq * seq = dyn_cast<Seq>(re)) {
    83         CC * cc = nullptr;
     82    } else if (const Seq * seq = dyn_cast<Seq>(re)) {
     83        const CC * cc = nullptr;
    8484        for (auto & si : boost::adaptors::reverse(*seq)) {
    85             combine(cc, first(si));
    86             if (!isNullable(si)) {
     85            cc = combine(cc, final(si));
     86            if (!RE_Nullable::isNullable(si)) {
    8787                break;
    8888            }
    8989        }
    9090        return cc;
    91     } else if (Alt * alt = dyn_cast<Alt>(re)) {
    92         CC * cc = nullptr;
     91    } else if (const Alt * alt = dyn_cast<Alt>(re)) {
     92        const CC * cc = nullptr;
    9393        for (auto & ai : *alt) {
    94             combine(cc, final(ai));
     94            cc = combine(cc, final(ai));
    9595        }
    9696        return cc;
    97     } else if (Rep * rep = dyn_cast<Rep>(re)) {
     97    } else if (const Rep * rep = dyn_cast<Rep>(re)) {
    9898        return final(rep->getRE());
    99     } else if (Diff * diff = dyn_cast<Diff>(re)) {
    100         if (CC * lh = final(diff->getLH())) {
    101             if (CC * rh = final(diff->getRH())) {
     99    } else if (const Diff * diff = dyn_cast<Diff>(re)) {
     100        if (const CC * lh = final(diff->getLH())) {
     101            if (const CC * rh = final(diff->getRH())) {
    102102                return subtractCC(lh, rh);
    103103            }
    104104        }
    105     } else if (Intersect * ix = dyn_cast<Intersect>(re)) {
    106         if (CC * lh = final(ix->getLH())) {
    107             if (CC * rh = final(ix->getRH())) {
     105    } else if (const Intersect * ix = dyn_cast<Intersect>(re)) {
     106        if (const CC * lh = final(ix->getLH())) {
     107            if (const CC * rh = final(ix->getRH())) {
    108108                return intersectCC(lh, rh);
    109109            }
     
    114114}
    115115
    116 void RE_Local::follow(RE * re, std::map<CC *, CC*> &follow_map) {
    117     if (Name * name = dyn_cast<Name>(re)) {
     116void follow(const RE * re, FollowMap & follows) {
     117    if (const Name * name = dyn_cast<Name>(re)) {
    118118        if (LLVM_LIKELY(name->getDefinition() != nullptr)) {
    119             return follow(name->getDefinition(), follow_map);
     119            return follow(name->getDefinition(), follows);
    120120        } else {
    121121            UndefinedNameError(name);
    122122        }
    123     } else if (Seq * seq = dyn_cast<Seq>(re)) {
    124         if (seq->size() > 0) {
    125             RE * re_first = *(seq->begin());
    126             RE * re_follow = makeSeq(seq->begin() + 1, seq->end());
     123    } else if (const Seq * seq = dyn_cast<Seq>(re)) {
     124        if (!seq->empty()) {
     125            const RE * const re_first = *(seq->begin());
     126            const RE * const re_follow = makeSeq(seq->begin() + 1, seq->end());
    127127            auto e1 = final(re_first);
    128128            auto e2 = first(re_follow);
    129129            if (e1 && e2) {
    130                 auto e = follow_map.find(e1);
    131                 if (e != follow_map.end()) {
     130                auto e = follows.find(e1);
     131                if (e != follows.end()) {
    132132                    e->second = makeCC(e->second, e2);
    133133                } else {
    134                     follow_map.emplace(e1, e2);
     134                    follows.emplace(e1, e2);
    135135                }
    136136            }
    137             follow(re_first, follow_map);
    138             follow(re_follow, follow_map);
     137            follow(re_first, follows);
     138            follow(re_follow, follows);
    139139        }
    140     } else if (Alt * alt = dyn_cast<Alt>(re)) {
     140    } else if (const Alt * alt = dyn_cast<Alt>(re)) {
    141141        for (auto ai = alt->begin(); ai != alt->end(); ++ai) {
    142             follow(*ai, follow_map);
     142            follow(*ai, follows);
    143143        }
    144     } else if (Rep * rep = dyn_cast<Rep>(re)) {
    145         auto e1 = final(rep->getRE());
     144    } else if (const Rep * rep = dyn_cast<Rep>(re)) {
     145        const auto e1 = final(rep->getRE());
    146146        auto e2 = first(rep->getRE());
    147147        if (e1 && e2) {
    148             auto e = follow_map.find(e1);
    149             if (e != follow_map.end()) {
     148            auto e = follows.find(e1);
     149            if (e != follows.end()) {
    150150                e->second = makeCC(e->second, e2);
    151151            } else {
    152                 follow_map.emplace(e1, e2);
     152                follows.emplace(e1, e2);
    153153            }
    154154        }
    155         follow(rep->getRE(), follow_map);
     155        follow(rep->getRE(), follows);
    156156    }
    157157}
    158158
    159 bool RE_Local::isLocalLanguage(RE * re) {   
    160     UCD::UnicodeSet seen;
    161     return isLocalLanguage_helper(re, seen);
    162 }
    163 
    164 bool RE_Local::isLocalLanguage_helper(const RE * re, UCD::UnicodeSet & seen) {
    165     assert ("RE object cannot be null!" && re);
    166     if (isa<Any>(re)) {
    167         const bool no_intersect = seen.empty();
    168         seen.insert_range(0x00, 0x10FFFF);
    169         return no_intersect;
    170     } else if (const CC * cc = dyn_cast<CC>(re)) {
    171         const bool has_intersect = cc->intersects(seen);
    172         seen = seen + *cc;
    173         return !has_intersect;
    174     } else if (const Name * n = dyn_cast<Name>(re)) {
    175         return isLocalLanguage_helper(n->getDefinition(), seen);
    176     } else if (const Seq * seq = dyn_cast<Seq>(re)) {
    177         for (const RE * item : *seq) {
    178             if (!isLocalLanguage_helper(item, seen)) return false;
    179         }
    180         return true;
    181     } else if (const Alt * alt = dyn_cast<Alt>(re)) {
    182         for (RE * item : *alt) {
    183             if (!isLocalLanguage_helper(item, seen)) return false;
    184         }
    185         return true;
    186     } else if (const Rep * rep = dyn_cast<Rep>(re)) {
    187         if (rep->getUB() > 1) return false;
    188         return isLocalLanguage_helper(rep->getRE(), seen);
    189     } else {
    190         // A local language cannot contain Intersects, Differences, Assertions, Start, End.
    191         return false;
    192     }
    193 }
    194 
    195 bool RE_Local::isNullable(const RE * re) {
    196     if (const Seq * re_seq = dyn_cast<const Seq>(re)) {
    197         for (const RE * re : *re_seq) {
    198             if (!isNullable(re)) {
    199                 return false;
     159CC * RE_Local::getFirstUniqueSymbol(RE * const re) {
     160    const CC * const re_first = first(re);
     161    if (re_first) {
     162        FollowMap follows;
     163        follow(re, follows);
     164        for (const auto & entry : follows) {
     165            if (entry.second->intersects(*re_first)) {
     166                return nullptr;
    200167            }
    201168        }
    202         return true;
    203     } else if (const Alt * re_alt = dyn_cast<const Alt>(re)) {
    204         for (const RE * re : *re_alt) {
    205             if (isNullable(re)) {
    206                 return true;
    207             }
    208         }
    209     } else if (const Rep* re_rep = dyn_cast<const Rep>(re)) {
    210         return re_rep->getLB() == 0 ? true : isNullable(re_rep->getRE());
    211     } else if (const Diff * diff = dyn_cast<const Diff>(re)) {
    212         return isNullable(diff->getLH()) && !isNullable(diff->getRH());
    213     } else if (const Intersect * e = dyn_cast<const Intersect>(re)) {
    214         return isNullable(e->getLH()) && isNullable(e->getRH());
    215     }
    216     return false;
     169    }
     170    return const_cast<CC *>(re_first);
    217171}
    218172
  • icGREP/icgrep-devel/icgrep/re/re_local.h

    r5632 r5812  
    22#define RE_LOCAL_H
    33
    4 #include <UCD/ucd_compiler.hpp>
    5 #include <map>
    6 
    74namespace re {
    85
    9 class RE;
     6class RE; class CC;
    107
    11 class RE_Local {
    12 public:
    13     static CC * first(RE * re);
    14     static CC * final(RE * re);
    15     static void follow(RE * re, std::map<CC*, CC*> &follow_map);
    16         static bool isLocalLanguage(RE * re);
    17 private:
    18         static bool isLocalLanguage_helper(const RE * re, UCD::UnicodeSet & seen);
    19         static bool isNullable(const RE * re);
     8struct RE_Local {
     9    static CC * getFirstUniqueSymbol(RE * re);
    2010};
    2111
  • icGREP/icgrep-devel/icgrep/re/re_nullable.h

    r5806 r5812  
    11#ifndef RE_NULLABLE_H
    22#define RE_NULLABLE_H
    3 
    4 #include <llvm/Support/Compiler.h>
    53
    64namespace re { class RE; }
  • icGREP/icgrep-devel/icgrep/re/re_reverse.cpp

    r5805 r5812  
    1919#include <re/re_intersect.h>
    2020#include <re/re_assertion.h>
    21 #include <re/printer_re.h>
    2221#include <llvm/Support/ErrorHandling.h>
    23 #include <llvm/Support/raw_ostream.h>
    24 #include <llvm/Support/Debug.h>
    2522#include <map>
     23
    2624
    2725using namespace llvm;
     
    2927namespace re {
    3028
    31 RE * reverse_helper(RE * re, std::map<std::string, Name *> & captureMap) {
    32     if (CC * cc = dyn_cast<CC>(re)) {
     29using CaptureMap = std::map<std::string, re::Name *>;
     30
     31RE * reverse(RE * re, CaptureMap & captureMap) {
     32    if (isa<CC>(re)) {
    3333        return re;
    3434    } else if (Range * rg = dyn_cast<Range>(re)) {
     
    3737        std::vector<RE*> list;
    3838        for (auto i = seq->rbegin(); i != seq->rend(); ++i) {
    39             list.push_back(reverse_helper(*i, captureMap));
     39            list.push_back(reverse(*i, captureMap));
    4040        }
    4141        return makeSeq(list.begin(), list.end());
     
    4343        std::vector<RE*> list;
    4444        for (auto i = alt->begin(); i != alt->end(); ++i) {
    45             list.push_back(reverse_helper(*i, captureMap));
     45            list.push_back(reverse(*i, captureMap));
    4646        }
    4747        return makeAlt(list.begin(), list.end());
    4848    } else if (Rep * rep = dyn_cast<Rep>(re)) {
    49         return makeRep(reverse_helper(rep->getRE(), captureMap), rep->getLB(), rep->getUB());
     49        return makeRep(reverse(rep->getRE(), captureMap), rep->getLB(), rep->getUB());
    5050    } else if (Group * g = dyn_cast<Group>(re)) {
    51         return makeGroup(g->getMode(), reverse_helper(g->getRE(), captureMap), g->getSense());
     51        return makeGroup(g->getMode(), reverse(g->getRE(), captureMap), g->getSense());
    5252    } else if (Diff * diff = dyn_cast<Diff>(re)) {
    53         return makeDiff(reverse_helper(diff->getLH(), captureMap), reverse_helper(diff->getRH(), captureMap));
     53        return makeDiff(reverse(diff->getLH(), captureMap), reverse(diff->getRH(), captureMap));
    5454    } else if (Intersect * e = dyn_cast<Intersect>(re)) {
    55         return makeIntersect(reverse_helper(e->getLH(), captureMap), reverse_helper(e->getRH(), captureMap));
     55        return makeIntersect(reverse(e->getLH(), captureMap), reverse(e->getRH(), captureMap));
    5656    } else if (Assertion * a = dyn_cast<Assertion>(re)) {
    57         return makeAssertion(reverse_helper(a->getAsserted(), captureMap), Assertion::reverseKind(a->getKind()), a->getSense());
     57        return makeAssertion(reverse(a->getAsserted(), captureMap), Assertion::reverseKind(a->getKind()), a->getSense());
    5858    } else if (isa<Start>(re)) {
    5959        return makeEnd();
     
    6767                return makeName(n->getNamespace(), n->getName(), Name::Type::UnicodeProperty);
    6868            case Name::Type::ZeroWidth:
    69                 return makeZeroWidth(n->getName(), reverse_helper(n->getDefinition(), captureMap));
    70             case Name::Type::Capture: 
     69                return makeZeroWidth(n->getName(), reverse(n->getDefinition(), captureMap));
     70            case Name::Type::Capture:
    7171                {
    7272                    std::string cname = n->getName();
     
    7777                    else {
    7878                        std::string newName = "\\" + std::to_string(captureMap.size() + 1);
    79                         Name * capture = makeCapture(newName, reverse_helper(n->getDefinition(), captureMap));
     79                        Name * capture = makeCapture(newName, reverse(n->getDefinition(), captureMap));
    8080                        captureMap.insert(std::make_pair(cname, capture));
    8181                        return capture;
     
    9292                    else {
    9393                        std::string newName = "\\" + std::to_string(captureMap.size() + 1);
    94                         Name * capture = makeCapture(newName, reverse_helper(referent->getDefinition(), captureMap));
     94                        Name * capture = makeCapture(newName, reverse(referent->getDefinition(), captureMap));
    9595                        captureMap.insert(std::make_pair(cname, capture));
    9696                        return capture;
     
    111111
    112112RE * reverse(RE * re) {
    113     std::map<std::string, Name *> captureMap;
    114     return reverse_helper(re, captureMap);
     113    CaptureMap captureMap;
     114    return reverse(re, captureMap);
    115115}
     116
    116117}
  • icGREP/icgrep-devel/icgrep/re/re_reverse.h

    r5493 r5812  
    88#define RE_REVERSE_H
    99
    10 //#include <map>                           // for map
    11 namespace re { class RE; class Name;}
     10namespace re { class RE; }
    1211
    1312namespace re {
    1413   
    15 /*
    16 class Reverser (
    17 public:
    18     Reverser();
    19     */
    20     RE * reverse(RE * re);
    21     /*
    22 private:
    23     using NameMap = std::map<std::pair<std::string, std::string>, re::Name *>;
    24     unsigned                    mCaptureGroupCount;
    25     NameMap                     mNameMap;
    26     Memoizer                    mMemoizer;
    27 }
    28 */
     14RE * reverse(RE * re);
     15
    2916}
    3017
Note: See TracChangeset for help on using the changeset viewer.