Changeset 5812
- Timestamp:
- Dec 28, 2017, 1:15:13 PM (14 months ago)
- Location:
- icGREP/icgrep-devel/icgrep
- Files:
-
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp
r5782 r5812 1311 1311 if (Align > 1) { 1312 1312 ConstantInt * align = ConstantInt::get(intPtrTy, Align); 1313 CreateAssertZero(CreateURem(intSrc, align), "CreateMemCpy: Src pointeris misaligned");1314 CreateAssertZero(CreateURem(intDst, align), "CreateMemCpy: Dst pointeris misaligned");1313 CreateAssertZero(CreateURem(intSrc, align), "CreateMemCpy: Src is misaligned"); 1314 CreateAssertZero(CreateURem(intDst, align), "CreateMemCpy: Dst is misaligned"); 1315 1315 } 1316 1316 Value * intSize = CreateZExtOrTrunc(Size, intPtrTy); … … 1326 1326 if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) { 1327 1327 CHECK_ADDRESS(Ptr, Size, "CreateMemSet"); 1328 if (Align > 1) { 1329 DataLayout DL(getModule()); 1330 IntegerType * const intPtrTy = DL.getIntPtrType(getContext()); 1331 Value * intPtr = CreatePtrToInt(Ptr, intPtrTy); 1332 ConstantInt * align = ConstantInt::get(intPtrTy, Align); 1333 CreateAssertZero(CreateURem(intPtr, align), "CreateMemSet: Ptr is misaligned"); 1334 } 1328 1335 } 1329 1336 return IRBuilder<>::CreateMemSet(Ptr, Val, Size, Align, isVolatile, TBAATag, ScopeTag, NoAliasTag); -
icGREP/icgrep-devel/icgrep/grep_engine.cpp
r5804 r5812 484 484 mGrepDriver->performIncrementalCacheCleanupStep(); 485 485 } 486 } 487 488 } 486 return nullptr; 487 } 488 489 } -
icGREP/icgrep-devel/icgrep/re/re_compiler.cpp
r5810 r5812 42 42 using namespace llvm; 43 43 44 using FollowMap = std::map<re::CC *, re::CC*>; 45 44 46 namespace re { 45 47 46 PabloAST * RE_Compiler::compile(RE * re) { 47 return markerVar(AdvanceMarker(compile(re, mPB), MarkerPosition::FinalPostPositionUnit, mPB)); 48 } 49 50 MarkerType RE_Compiler::compile(RE * re, PabloBuilder & pb) { 51 return process(re, makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createOnes()), pb); 48 using MarkerType = RE_Compiler::MarkerType; 49 50 PabloAST * RE_Compiler::compile(RE * re, PabloAST * const initialCursors) { 51 const auto markers = initialCursors ? compile(re, initialCursors, mPB) : compile(re, mPB); 52 return markerVar(AdvanceMarker(markers, FinalPostPositionUnit, mPB)); 53 } 54 55 inline MarkerType RE_Compiler::compile(RE * re, PabloAST * const cursors, PabloBuilder & pb) { 56 return process(re, makeMarker(FinalMatchUnit, cursors), pb); 57 } 58 59 inline MarkerType RE_Compiler::compile(RE * re, PabloBuilder & pb) { 60 return process(re, makeMarker(FinalPostPositionUnit, pb.createOnes()), pb); 52 61 } 53 62 … … 82 91 83 92 inline MarkerType RE_Compiler::compileAny(const MarkerType m, PabloBuilder & pb) { 84 PabloAST * const nextFinalByte = markerVar(AdvanceMarker(m, MarkerPosition::FinalPostPositionUnit, pb));85 return makeMarker( MarkerPosition::FinalMatchUnit, nextFinalByte);93 PabloAST * const nextFinalByte = markerVar(AdvanceMarker(m, FinalPostPositionUnit, pb)); 94 return makeMarker(FinalMatchUnit, nextFinalByte); 86 95 } 87 96 … … 89 98 // If Unicode CCs weren't pulled out earlier, we generate the equivalent 90 99 // byte sequence as an RE. 91 if (cc->getAlphabet() == &cc::Unicode) return process(toUTF8(cc), marker, pb); 100 if (cc->getAlphabet() == &cc::Unicode) { 101 return process(toUTF8(cc), marker, pb); 102 } 92 103 PabloAST * nextPos = markerVar(marker); 93 104 if (isByteLength(cc)) { 94 if (marker.pos == MarkerPosition::FinalMatchUnit) nextPos = pb.createAdvance(nextPos, 1); 95 } 96 else { 97 nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb)); 98 } 99 return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(nextPos, mCCCompiler.compileCC(cc, pb))); 105 if (marker.pos == FinalMatchUnit) { 106 nextPos = pb.createAdvance(nextPos, 1); 107 } 108 } else { 109 nextPos = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb)); 110 } 111 return makeMarker(FinalMatchUnit, pb.createAnd(nextPos, mCCCompiler.compileCC(cc, pb))); 100 112 } 101 113 … … 110 122 } else if (isUnicodeUnitLength(name)) { 111 123 MarkerType nameMarker = compileName(name, pb); 112 MarkerType nextPos = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);124 MarkerType nextPos = AdvanceMarker(marker, FinalPostPositionUnit, pb); 113 125 nameMarker.stream = pb.createAnd(markerVar(nextPos), markerVar(nameMarker), name->getName()); 114 126 return nameMarker; … … 172 184 } 173 185 174 MarkerType RE_Compiler::compileAlt(Alt * alt, MarkerType marker, PabloBuilder & pb) { 175 std::vector<PabloAST *> accum(3, pb.createZeroes()); 176 MarkerType const base = marker; 186 MarkerType RE_Compiler::compileAlt(Alt * alt, const MarkerType base, PabloBuilder & pb) { 187 std::array<PabloAST *, 3> accum; 177 188 // The following may be useful to force a common Advance rather than separate 178 189 // Advances in each alternative. 190 accum.fill(pb.createZeroes()); 179 191 for (RE * re : *alt) { 180 192 MarkerType m = process(re, base, pb); 181 MarkerPosition p = markerPos(m);193 const MarkerPosition p = markerPos(m); 182 194 accum[p] = pb.createOr(accum[p], markerVar(m), "alt"); 183 195 } 184 if (isa<Zeroes>(accum[MarkerPosition::InitialPostPositionUnit]) && isa<Zeroes>(accum[MarkerPosition::FinalPostPositionUnit])) { 185 return makeMarker(MarkerPosition::FinalMatchUnit, accum[MarkerPosition::FinalMatchUnit]); 186 } 187 PabloAST * combine = pb.createOr(accum[InitialPostPositionUnit], pb.createAdvance(accum[MarkerPosition::FinalMatchUnit], 1), "alt"); 196 if (isa<Zeroes>(accum[InitialPostPositionUnit]) && isa<Zeroes>(accum[FinalPostPositionUnit])) { 197 return makeMarker(FinalMatchUnit, accum[FinalMatchUnit]); 198 } 199 200 PabloAST * combine = pb.createOr(accum[InitialPostPositionUnit], pb.createAdvance(accum[FinalMatchUnit], 1), "alt"); 188 201 if (isa<Zeroes>(accum[FinalPostPositionUnit])) { 189 202 return makeMarker(InitialPostPositionUnit, combine); 190 203 } 191 combine = pb.createOr(pb.createScanThru(pb.createAnd(mInitial, combine), mNonFinal), accum[ MarkerPosition::FinalPostPositionUnit], "alt");192 return makeMarker( MarkerPosition::FinalPostPositionUnit, combine);204 combine = pb.createOr(pb.createScanThru(pb.createAnd(mInitial, combine), mNonFinal), accum[FinalPostPositionUnit], "alt"); 205 return makeMarker(FinalPostPositionUnit, combine); 193 206 } 194 207 … … 205 218 } else if (a->getKind() == Assertion::Kind::Boundary) { 206 219 MarkerType cond = compile(asserted, pb); 207 if (LLVM_LIKELY(markerPos(cond) == MarkerPosition::FinalMatchUnit)) {208 MarkerType postCond = AdvanceMarker(cond, MarkerPosition::FinalPostPositionUnit, pb);220 if (LLVM_LIKELY(markerPos(cond) == FinalMatchUnit)) { 221 MarkerType postCond = AdvanceMarker(cond, FinalPostPositionUnit, pb); 209 222 PabloAST * boundaryCond = pb.createXor(markerVar(cond), markerVar(postCond)); 210 223 if (a->getSense() == Assertion::Sense::Negative) { 211 224 boundaryCond = pb.createNot(boundaryCond); 212 225 } 213 MarkerType fbyte = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);214 return makeMarker( MarkerPosition::FinalPostPositionUnit, pb.createAnd(markerVar(fbyte), boundaryCond, "boundary"));226 MarkerType fbyte = AdvanceMarker(marker, FinalPostPositionUnit, pb); 227 return makeMarker(FinalPostPositionUnit, pb.createAnd(markerVar(fbyte), boundaryCond, "boundary")); 215 228 } 216 229 else UnsupportedRE("Unsupported boundary assertion"); 217 230 } else if (isUnicodeUnitLength(asserted)) { 218 231 MarkerType lookahead = compile(asserted, pb); 219 if (LLVM_LIKELY(markerPos(lookahead) == MarkerPosition::FinalMatchUnit)) {232 if (LLVM_LIKELY(markerPos(lookahead) == FinalMatchUnit)) { 220 233 PabloAST * la = markerVar(lookahead); 221 234 if (a->getSense() == Assertion::Sense::Negative) { 222 235 la = pb.createNot(la); 223 236 } 224 MarkerType fbyte = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);225 return makeMarker( MarkerPosition::FinalPostPositionUnit, pb.createAnd(markerVar(fbyte), la, "lookahead"));237 MarkerType fbyte = AdvanceMarker(marker, FinalPostPositionUnit, pb); 238 return makeMarker(FinalPostPositionUnit, pb.createAnd(markerVar(fbyte), la, "lookahead")); 226 239 } 227 240 } … … 335 348 336 349 MarkerType RE_Compiler::processLowerBound(RE * repeated, int lb, MarkerType marker, int ifGroupSize, PabloBuilder & pb) { 337 if (LLVM_UNLIKELY(lb == 0)) return marker; 338 else if (LLVM_UNLIKELY(lb == 1)) return process(repeated, marker, pb); 350 if (LLVM_UNLIKELY(lb == 0)) { 351 return marker; 352 } else if (LLVM_UNLIKELY(lb == 1)) { 353 return process(repeated, marker, pb); 354 } 339 355 // 340 356 // A bounded repetition with an upper bound of at least 2. … … 345 361 PabloAST * cc = markerVar(compile(repeated, pb)); 346 362 PabloAST * cc_lb = consecutive_matches(cc, 1, lb, nullptr, pb); 347 const auto pos = markerPos(marker) == MarkerPosition::FinalMatchUnit ? lb : lb - 1;363 const auto pos = markerPos(marker) == FinalMatchUnit ? lb : lb - 1; 348 364 PabloAST * marker_fwd = pb.createAdvance(markerVar(marker), pos); 349 return makeMarker( MarkerPosition::FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));365 return makeMarker(FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound")); 350 366 } 351 367 else if (isUnicodeUnitLength(repeated)) { 352 368 PabloAST * cc = markerVar(compile(repeated, pb)); 353 369 PabloAST * cc_lb = consecutive_matches(cc, 1, lb, mFinal, pb); 354 const auto pos = markerPos(marker) == MarkerPosition::FinalMatchUnit ? lb : lb - 1;370 const auto pos = markerPos(marker) == FinalMatchUnit ? lb : lb - 1; 355 371 PabloAST * marker_fwd = pb.createIndexedAdvance(markerVar(marker), mFinal, pos); 356 return makeMarker( MarkerPosition::FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));372 return makeMarker(FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound")); 357 373 } 358 374 else if (isTypeForLocal(repeated)) { 359 CC * firstSymSet = RE_Local::first(repeated); 360 std::map<CC *, CC*> followMap; 361 RE_Local::follow(repeated, followMap); 362 bool firstSymSet_found_in_follow = false; 363 for (auto & entry : followMap) { 364 if (entry.second->intersects(*firstSymSet)) { 365 firstSymSet_found_in_follow = true; 366 } 367 } 368 if (!firstSymSet_found_in_follow) { 369 // When the first symbol is unique, we can use it as an index stream. 370 PabloAST * firstCCstream = markerVar(compile(firstSymSet, pb)); 375 CC * const first = RE_Local::getFirstUniqueSymbol(repeated); 376 if (first) { // if the first symbol is unique, we can use it as an index stream. 377 PabloAST * firstCCstream = markerVar(compile(first, pb)); 371 378 // Find all matches to the repeated regexp. 372 PabloAST * submatch = markerVar(AdvanceMarker(compile(repeated, pb), MarkerPosition::FinalPostPositionUnit, pb));379 PabloAST * submatch = markerVar(AdvanceMarker(compile(repeated, pb), FinalPostPositionUnit, pb)); 373 380 // Consecutive submatches require that the symbol following the end of one submatch is the first symbol for 374 381 // the next submatch. lb-1 such submatches are required. 375 382 PabloAST * consecutive_submatch = consecutive_matches(submatch, 1, lb-1, firstCCstream, pb); 376 383 // Find submatch positions which are lb-2 start symbols forward from the current marker position. 377 PabloAST * base = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));384 PabloAST * base = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb)); 378 385 PabloAST * marker_fwd = pb.createIndexedAdvance(base, firstCCstream, lb - 1); 379 386 PabloAST * consecutive_lb_1 = pb.createAnd(marker_fwd, consecutive_submatch); 380 387 // From any of these positions, any position reachable by one more occurrence of the 381 388 // regexp is a match. 382 return process(repeated, makeMarker( MarkerPosition::FinalPostPositionUnit, consecutive_lb_1), pb);389 return process(repeated, makeMarker(FinalPostPositionUnit, consecutive_lb_1), pb); 383 390 } 384 391 } … … 404 411 405 412 MarkerType RE_Compiler::processBoundedRep(RE * repeated, int ub, MarkerType marker, int ifGroupSize, PabloBuilder & pb) { 406 if (LLVM_UNLIKELY(ub == 0)) return marker; 413 if (LLVM_UNLIKELY(ub == 0)) { 414 return marker; 415 } 407 416 // 408 417 // A bounded repetition with an upper bound of at least 2. … … 414 423 // Create a mask of positions reachable within ub from current marker. 415 424 // Use matchstar, then apply filter. 416 PabloAST * cursor = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionUnit, pb));425 PabloAST * cursor = markerVar(AdvanceMarker(marker, InitialPostPositionUnit, pb)); 417 426 // If we've advanced the cursor to the post position unit, cursor begins on the first masked bit of the bounded mask. 418 427 // Extend the mask by ub - 1 byte positions to ensure the mask ends on the FinalMatchUnit of the repeated region. … … 422 431 // "multi-byte" character. Combine the masked range with any nonFinals. 423 432 PabloAST * bounded = pb.createMatchStar(cursor, pb.createOr(masked, mNonFinal), "bounded"); 424 return makeMarker( MarkerPosition::InitialPostPositionUnit, bounded);433 return makeMarker(InitialPostPositionUnit, bounded); 425 434 } 426 435 else if (isUnicodeUnitLength(repeated)) { 427 436 // For a regexp which represent a single Unicode codepoint, we can use the mFinal stream 428 437 // as an index stream for an indexed advance operation. 429 PabloAST * cursor = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));438 PabloAST * cursor = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb)); 430 439 PabloAST * upperLimitMask = reachable(cursor, 1, ub - 1, mFinal, pb); 431 440 PabloAST * masked = pb.createAnd(markerVar(compile(repeated, pb)), upperLimitMask, "masked"); 432 441 PabloAST * bounded = pb.createMatchStar(cursor, pb.createOr(masked, mNonFinal), "bounded"); 433 return makeMarker( MarkerPosition::FinalPostPositionUnit, bounded);442 return makeMarker(FinalPostPositionUnit, bounded); 434 443 } 435 444 else if (isTypeForLocal(repeated)) { 436 CC * firstSymSet = RE_Local::first(repeated); 437 std::map<CC *, CC*> followMap; 438 RE_Local::follow(repeated, followMap); 439 bool firstSymSet_found_in_follow = false; 440 for (auto & entry : followMap) { 441 if (entry.second->intersects(*firstSymSet)) { 442 firstSymSet_found_in_follow = true; 443 } 444 } 445 if (!firstSymSet_found_in_follow) { 446 // When the first symbol is unique, we can use it as an index stream. 447 PabloAST * firstCCstream = markerVar(compile(firstSymSet, pb)); 448 PabloAST * cursor = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb)); 445 CC * const first = RE_Local::getFirstUniqueSymbol(repeated); 446 if (first) { // if the first symbol is unique, we can use it as an index stream. 447 PabloAST * firstCCstream = markerVar(compile(first, pb)); 448 PabloAST * cursor = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb)); 449 449 PabloAST * upperLimitMask = reachable(cursor, 1, ub - 1, firstCCstream, pb); 450 PabloAST * masked = pb.createAnd(markerVar(AdvanceMarker(compile(repeated, pb), MarkerPosition::FinalPostPositionUnit, pb)), upperLimitMask, "masked");450 PabloAST * masked = pb.createAnd(markerVar(AdvanceMarker(compile(repeated, pb), FinalPostPositionUnit, pb)), upperLimitMask, "masked"); 451 451 PabloAST * bounded = pb.createMatchStar(cursor, pb.createOr(masked, mNonFinal), "bounded"); 452 return makeMarker( MarkerPosition::FinalPostPositionUnit, bounded);452 return makeMarker(FinalPostPositionUnit, bounded); 453 453 } 454 454 } … … 478 478 MarkerType RE_Compiler::processUnboundedRep(RE * repeated, MarkerType marker, PabloBuilder & pb) { 479 479 // always use PostPosition markers for unbounded repetition. 480 PabloAST * base = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));480 PabloAST * base = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb)); 481 481 if (isByteLength(repeated) && !AlgorithmOptionIsSet(DisableMatchStar)) { 482 482 PabloAST * mask = markerVar(compile(repeated, pb)); … … 484 484 // The post position character may land on the initial byte of a multi-byte character. Combine them with the masked range. 485 485 PabloAST * unbounded = pb.createMatchStar(base, pb.createOr(mask, nonFinal), "unbounded"); 486 return makeMarker( MarkerPosition::FinalPostPositionUnit, unbounded);486 return makeMarker(FinalPostPositionUnit, unbounded); 487 487 } else if (isUnicodeUnitLength(repeated) && !AlgorithmOptionIsSet(DisableMatchStar) && !AlgorithmOptionIsSet(DisableUnicodeMatchStar)) { 488 488 PabloAST * cc = markerVar(compile(repeated, pb)); … … 492 492 mstar = pb.createMatchStar(base, cc); 493 493 PabloAST * final = mFinal; 494 return makeMarker( MarkerPosition::FinalPostPositionUnit, pb.createAnd(mstar, final, "unbounded"));494 return makeMarker(FinalPostPositionUnit, pb.createAnd(mstar, final, "unbounded")); 495 495 } else if (mStarDepth > 0){ 496 496 PabloBuilder * const outer = pb.getParent(); … … 500 500 PabloAST * m1 = pb.createOr(base, starPending); 501 501 PabloAST * m2 = pb.createOr(base, starAccum); 502 MarkerType result = process(repeated, makeMarker( MarkerPosition::FinalPostPositionUnit, m1), pb);503 result = AdvanceMarker(result, MarkerPosition::FinalPostPositionUnit, pb);502 MarkerType result = process(repeated, makeMarker(FinalPostPositionUnit, m1), pb); 503 result = AdvanceMarker(result, FinalPostPositionUnit, pb); 504 504 PabloAST * loopComputation = markerVar(result); 505 505 pb.createAssign(starPending, pb.createAnd(loopComputation, pb.createNot(m2))); … … 517 517 mCompiledName = &nestedMap; 518 518 mStarDepth++; 519 MarkerType result = process(repeated, makeMarker( MarkerPosition::FinalPostPositionUnit, whilePending), wb);520 result = AdvanceMarker(result, MarkerPosition::FinalPostPositionUnit, wb);519 MarkerType result = process(repeated, makeMarker(FinalPostPositionUnit, whilePending), wb); 520 result = AdvanceMarker(result, FinalPostPositionUnit, wb); 521 521 PabloAST * loopComputation = markerVar(result); 522 522 wb.createAssign(whilePending, wb.createAnd(loopComputation, wb.createNot(whileAccum))); … … 532 532 inline MarkerType RE_Compiler::compileStart(MarkerType marker, pablo::PabloBuilder & pb) { 533 533 PabloAST * sol = pb.createNot(pb.createAdvance(pb.createNot(mLineBreak), 1)); 534 MarkerType m = AdvanceMarker(marker, MarkerPosition::InitialPostPositionUnit, pb);535 return makeMarker( MarkerPosition::InitialPostPositionUnit, pb.createAnd(markerVar(m), sol, "sol"));534 MarkerType m = AdvanceMarker(marker, InitialPostPositionUnit, pb); 535 return makeMarker(InitialPostPositionUnit, pb.createAnd(markerVar(m), sol, "sol")); 536 536 } 537 537 538 538 inline MarkerType RE_Compiler::compileEnd(MarkerType marker, pablo::PabloBuilder & pb) { 539 PabloAST * const nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));540 PabloAST * atEOL = pb.createOr(pb.createAnd(mLineBreak, nextPos), pb.createAdvance(pb.createAnd(nextPos, mCRLF), 1), "eol");541 return makeMarker( MarkerPosition::FinalPostPositionUnit, atEOL);539 PabloAST * const nextPos = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb)); 540 PabloAST * const atEOL = pb.createOr(pb.createAnd(mLineBreak, nextPos), pb.createAdvance(pb.createAnd(nextPos, mCRLF), 1), "eol"); 541 return makeMarker(FinalPostPositionUnit, atEOL); 542 542 } 543 543 544 544 inline MarkerType RE_Compiler::AdvanceMarker(MarkerType marker, const MarkerPosition newpos, PabloBuilder & pb) { 545 545 if (marker.pos != newpos) { 546 if (marker.pos == MarkerPosition::FinalMatchUnit) {546 if (marker.pos == FinalMatchUnit) { 547 547 marker.stream = pb.createAdvance(marker.stream, 1, "ipp"); 548 marker.pos = MarkerPosition::InitialPostPositionUnit; 549 } 550 if (newpos == MarkerPosition::FinalPostPositionUnit) { 551 PabloAST * nonFinal = mNonFinal; 552 marker.stream = pb.createScanThru(pb.createAnd(mInitial, marker.stream), nonFinal, "fpp"); 553 marker.pos = MarkerPosition::FinalPostPositionUnit; 548 marker.pos = InitialPostPositionUnit; 549 } 550 if (newpos == FinalPostPositionUnit) { 551 marker.stream = pb.createScanThru(pb.createAnd(mInitial, marker.stream), mNonFinal, "fpp"); 552 marker.pos = FinalPostPositionUnit; 554 553 } 555 554 } … … 589 588 mNonFinal = mPB.createExtract(required, mPB.getInteger(1)); 590 589 mFinal = mPB.createExtract(required, mPB.getInteger(2)); 591 592 590 } 593 591 -
icGREP/icgrep-devel/icgrep/re/re_compiler.h
r5810 r5812 25 25 namespace re { class CC; } 26 26 27 //namespace UCD {28 //class UnicodeSet;29 //}30 31 27 /* Marker streams represent the results of matching steps. 32 28 Three types of marker streams are used internally. … … 43 39 namespace re { 44 40 45 enum MarkerPosition {FinalMatchUnit, InitialPostPositionUnit, FinalPostPositionUnit};46 47 struct MarkerType {48 MarkerPosition pos;49 pablo::PabloAST * stream;50 MarkerType & operator =(const MarkerType &) = default;51 };52 53 inline MarkerPosition markerPos(const MarkerType & m) {return m.pos; }54 55 inline pablo::PabloAST * markerVar(const MarkerType & m) {return m.stream; }56 57 inline MarkerType makeMarker(MarkerPosition newpos, pablo::PabloAST * strm) {return {newpos, strm};}58 59 60 41 class RE_Compiler { 61 42 public: 62 43 44 enum MarkerPosition {FinalMatchUnit, InitialPostPositionUnit, FinalPostPositionUnit}; 45 46 struct MarkerType { 47 MarkerPosition pos; 48 pablo::PabloAST * stream; 49 MarkerType & operator =(const MarkerType &) = default; 50 }; 51 63 52 RE_Compiler(pablo::PabloKernel * kernel, cc::CC_Compiler & ccCompiler); 64 pablo::PabloAST * compile(RE * re );53 pablo::PabloAST * compile(RE * re, pablo::PabloAST * const initialCursors = nullptr); 65 54 66 55 static LLVM_ATTRIBUTE_NORETURN void UnsupportedRE(std::string errmsg); … … 88 77 }; 89 78 90 MarkerType compile(RE * re, pablo::PabloBuilder & cg); 79 80 MarkerType compile(RE * re, pablo::PabloBuilder & pb); 81 MarkerType compile(RE * re, pablo::PabloAST * const cursors, pablo::PabloBuilder & pb); 91 82 92 83 MarkerType process(RE * re, MarkerType marker, pablo::PabloBuilder & pb); … … 95 86 MarkerType compileSeq(Seq * seq, MarkerType marker, pablo::PabloBuilder & pb); 96 87 MarkerType compileSeqTail(Seq::iterator current, const Seq::iterator end, int matchLenSoFar, MarkerType marker, pablo::PabloBuilder & pb); 97 MarkerType compileAlt(Alt * alt, MarkerType marker, pablo::PabloBuilder & pb);88 MarkerType compileAlt(Alt * alt, MarkerType base, pablo::PabloBuilder & pb); 98 89 MarkerType compileAssertion(Assertion * a, MarkerType marker, pablo::PabloBuilder & pb); 99 90 MarkerType compileRep(Rep * rep, MarkerType marker, pablo::PabloBuilder & pb); … … 114 105 MarkerType AdvanceMarker(MarkerType marker, const MarkerPosition newpos, pablo::PabloBuilder & pb); 115 106 void AlignMarkers(MarkerType & m1, MarkerType & m2, pablo::PabloBuilder & pb); 107 108 static inline MarkerPosition markerPos(const MarkerType & m) {return m.pos; } 109 static inline pablo::PabloAST * markerVar(const MarkerType & m) {return m.stream; } 110 static inline MarkerType makeMarker(MarkerPosition newpos, pablo::PabloAST * strm) {return {newpos, strm};} 116 111 117 112 private: -
icGREP/icgrep-devel/icgrep/re/re_local.cpp
r5748 r5812 8 8 #include <re/re_intersect.h> 9 9 #include <re/re_assertion.h> 10 #include <re/re_any.h>11 10 #include <re/re_analysis.h> 12 #include < UCD/resolve_properties.h>13 #include <boost/container/flat_ set.hpp>11 #include <re/re_nullable.h> 12 #include <boost/container/flat_map.hpp> 14 13 #include <boost/range/adaptor/reversed.hpp> 15 #include <util/slab_allocator.h>16 #include <map>17 14 18 15 using namespace boost::container; … … 20 17 21 18 namespace re { 22 23 inline void combine(CC *& a, CC * b) { 19 20 using FollowMap = flat_map<const CC *, const CC*>; 21 22 inline const CC * combine(const CC * a, const CC * b) { 24 23 if (a && b) { 25 a =makeCC(a, b);24 return makeCC(a, b); 26 25 } else if (b) { 27 a =b;26 return b; 28 27 } 28 return a; 29 29 } 30 30 31 CC * RE_Local::first(RE * re) {32 if ( Name * name = dyn_cast<Name>(re)) {31 const CC * first(const RE * re) { 32 if (const Name * name = dyn_cast<Name>(re)) { 33 33 if (LLVM_LIKELY(name->getDefinition() != nullptr)) { 34 34 return first(name->getDefinition()); … … 36 36 UndefinedNameError(name); 37 37 } 38 } else if ( CC * cc = dyn_cast<CC>(re)) {38 } else if (const CC * cc = dyn_cast<CC>(re)) { 39 39 return cc; 40 } else if ( Seq * seq = dyn_cast<Seq>(re)) {41 CC * cc = nullptr;40 } else if (const Seq * seq = dyn_cast<Seq>(re)) { 41 const CC * cc = nullptr; 42 42 for (auto & si : *seq) { 43 c ombine(cc, first(si));44 if (! isNullable(si)) {43 cc = combine(cc, first(si)); 44 if (!RE_Nullable::isNullable(si)) { 45 45 break; 46 46 } 47 47 } 48 48 return cc; 49 } else if ( Alt * alt = dyn_cast<Alt>(re)) {50 CC * cc = nullptr;49 } else if (const Alt * alt = dyn_cast<Alt>(re)) { 50 const CC * cc = nullptr; 51 51 for (auto & ai : *alt) { 52 c ombine(cc, first(ai));52 cc = combine(cc, first(ai)); 53 53 } 54 54 return cc; 55 } else if ( Rep * rep = dyn_cast<Rep>(re)) {55 } else if (const Rep * rep = dyn_cast<Rep>(re)) { 56 56 return first(rep->getRE()); 57 } else if ( Diff * diff = dyn_cast<Diff>(re)) {58 if ( CC * lh = first(diff->getLH())) {59 if ( CC * rh = first(diff->getRH())) {57 } else if (const Diff * diff = dyn_cast<Diff>(re)) { 58 if (const CC * lh = first(diff->getLH())) { 59 if (const CC * rh = first(diff->getRH())) { 60 60 return subtractCC(lh, rh); 61 61 } 62 62 } 63 } else if ( Intersect * ix = dyn_cast<Intersect>(re)) {64 if ( CC * lh = first(ix->getLH())) {65 if ( CC * rh = first(ix->getRH())) {63 } else if (const Intersect * ix = dyn_cast<Intersect>(re)) { 64 if (const CC * lh = first(ix->getLH())) { 65 if (const CC * rh = first(ix->getRH())) { 66 66 return intersectCC(lh, rh); 67 67 } … … 71 71 } 72 72 73 CC * RE_Local::final(RE * re) {74 if ( Name * name = dyn_cast<Name>(re)) {73 const CC * final(const RE * re) { 74 if (const Name * name = dyn_cast<Name>(re)) { 75 75 if (LLVM_LIKELY(name->getDefinition() != nullptr)) { 76 76 return final(name->getDefinition()); … … 78 78 UndefinedNameError(name); 79 79 } 80 } else if ( CC * cc = dyn_cast<CC>(re)) {80 } else if (const CC * cc = dyn_cast<CC>(re)) { 81 81 return cc; 82 } else if ( Seq * seq = dyn_cast<Seq>(re)) {83 CC * cc = nullptr;82 } else if (const Seq * seq = dyn_cast<Seq>(re)) { 83 const CC * cc = nullptr; 84 84 for (auto & si : boost::adaptors::reverse(*seq)) { 85 c ombine(cc, first(si));86 if (! isNullable(si)) {85 cc = combine(cc, final(si)); 86 if (!RE_Nullable::isNullable(si)) { 87 87 break; 88 88 } 89 89 } 90 90 return cc; 91 } else if ( Alt * alt = dyn_cast<Alt>(re)) {92 CC * cc = nullptr;91 } else if (const Alt * alt = dyn_cast<Alt>(re)) { 92 const CC * cc = nullptr; 93 93 for (auto & ai : *alt) { 94 c ombine(cc, final(ai));94 cc = combine(cc, final(ai)); 95 95 } 96 96 return cc; 97 } else if ( Rep * rep = dyn_cast<Rep>(re)) {97 } else if (const Rep * rep = dyn_cast<Rep>(re)) { 98 98 return final(rep->getRE()); 99 } else if ( Diff * diff = dyn_cast<Diff>(re)) {100 if ( CC * lh = final(diff->getLH())) {101 if ( CC * rh = final(diff->getRH())) {99 } else if (const Diff * diff = dyn_cast<Diff>(re)) { 100 if (const CC * lh = final(diff->getLH())) { 101 if (const CC * rh = final(diff->getRH())) { 102 102 return subtractCC(lh, rh); 103 103 } 104 104 } 105 } else if ( Intersect * ix = dyn_cast<Intersect>(re)) {106 if ( CC * lh = final(ix->getLH())) {107 if ( CC * rh = final(ix->getRH())) {105 } else if (const Intersect * ix = dyn_cast<Intersect>(re)) { 106 if (const CC * lh = final(ix->getLH())) { 107 if (const CC * rh = final(ix->getRH())) { 108 108 return intersectCC(lh, rh); 109 109 } … … 114 114 } 115 115 116 void RE_Local::follow(RE * re, std::map<CC *, CC*> &follow_map) {117 if ( Name * name = dyn_cast<Name>(re)) {116 void follow(const RE * re, FollowMap & follows) { 117 if (const Name * name = dyn_cast<Name>(re)) { 118 118 if (LLVM_LIKELY(name->getDefinition() != nullptr)) { 119 return follow(name->getDefinition(), follow _map);119 return follow(name->getDefinition(), follows); 120 120 } else { 121 121 UndefinedNameError(name); 122 122 } 123 } else if ( Seq * seq = dyn_cast<Seq>(re)) {124 if ( seq->size() > 0) {125 RE *re_first = *(seq->begin());126 RE *re_follow = makeSeq(seq->begin() + 1, seq->end());123 } else if (const Seq * seq = dyn_cast<Seq>(re)) { 124 if (!seq->empty()) { 125 const RE * const re_first = *(seq->begin()); 126 const RE * const re_follow = makeSeq(seq->begin() + 1, seq->end()); 127 127 auto e1 = final(re_first); 128 128 auto e2 = first(re_follow); 129 129 if (e1 && e2) { 130 auto e = follow _map.find(e1);131 if (e != follow _map.end()) {130 auto e = follows.find(e1); 131 if (e != follows.end()) { 132 132 e->second = makeCC(e->second, e2); 133 133 } else { 134 follow _map.emplace(e1, e2);134 follows.emplace(e1, e2); 135 135 } 136 136 } 137 follow(re_first, follow _map);138 follow(re_follow, follow _map);137 follow(re_first, follows); 138 follow(re_follow, follows); 139 139 } 140 } else if ( Alt * alt = dyn_cast<Alt>(re)) {140 } else if (const Alt * alt = dyn_cast<Alt>(re)) { 141 141 for (auto ai = alt->begin(); ai != alt->end(); ++ai) { 142 follow(*ai, follow _map);142 follow(*ai, follows); 143 143 } 144 } else if ( Rep * rep = dyn_cast<Rep>(re)) {145 auto e1 = final(rep->getRE());144 } else if (const Rep * rep = dyn_cast<Rep>(re)) { 145 const auto e1 = final(rep->getRE()); 146 146 auto e2 = first(rep->getRE()); 147 147 if (e1 && e2) { 148 auto e = follow _map.find(e1);149 if (e != follow _map.end()) {148 auto e = follows.find(e1); 149 if (e != follows.end()) { 150 150 e->second = makeCC(e->second, e2); 151 151 } else { 152 follow _map.emplace(e1, e2);152 follows.emplace(e1, e2); 153 153 } 154 154 } 155 follow(rep->getRE(), follow _map);155 follow(rep->getRE(), follows); 156 156 } 157 157 } 158 158 159 bool RE_Local::isLocalLanguage(RE * re) { 160 UCD::UnicodeSet seen; 161 return isLocalLanguage_helper(re, seen); 162 } 163 164 bool RE_Local::isLocalLanguage_helper(const RE * re, UCD::UnicodeSet & seen) { 165 assert ("RE object cannot be null!" && re); 166 if (isa<Any>(re)) { 167 const bool no_intersect = seen.empty(); 168 seen.insert_range(0x00, 0x10FFFF); 169 return no_intersect; 170 } else if (const CC * cc = dyn_cast<CC>(re)) { 171 const bool has_intersect = cc->intersects(seen); 172 seen = seen + *cc; 173 return !has_intersect; 174 } else if (const Name * n = dyn_cast<Name>(re)) { 175 return isLocalLanguage_helper(n->getDefinition(), seen); 176 } else if (const Seq * seq = dyn_cast<Seq>(re)) { 177 for (const RE * item : *seq) { 178 if (!isLocalLanguage_helper(item, seen)) return false; 179 } 180 return true; 181 } else if (const Alt * alt = dyn_cast<Alt>(re)) { 182 for (RE * item : *alt) { 183 if (!isLocalLanguage_helper(item, seen)) return false; 184 } 185 return true; 186 } else if (const Rep * rep = dyn_cast<Rep>(re)) { 187 if (rep->getUB() > 1) return false; 188 return isLocalLanguage_helper(rep->getRE(), seen); 189 } else { 190 // A local language cannot contain Intersects, Differences, Assertions, Start, End. 191 return false; 192 } 193 } 194 195 bool RE_Local::isNullable(const RE * re) { 196 if (const Seq * re_seq = dyn_cast<const Seq>(re)) { 197 for (const RE * re : *re_seq) { 198 if (!isNullable(re)) { 199 return false; 159 CC * RE_Local::getFirstUniqueSymbol(RE * const re) { 160 const CC * const re_first = first(re); 161 if (re_first) { 162 FollowMap follows; 163 follow(re, follows); 164 for (const auto & entry : follows) { 165 if (entry.second->intersects(*re_first)) { 166 return nullptr; 200 167 } 201 168 } 202 return true; 203 } else if (const Alt * re_alt = dyn_cast<const Alt>(re)) { 204 for (const RE * re : *re_alt) { 205 if (isNullable(re)) { 206 return true; 207 } 208 } 209 } else if (const Rep* re_rep = dyn_cast<const Rep>(re)) { 210 return re_rep->getLB() == 0 ? true : isNullable(re_rep->getRE()); 211 } else if (const Diff * diff = dyn_cast<const Diff>(re)) { 212 return isNullable(diff->getLH()) && !isNullable(diff->getRH()); 213 } else if (const Intersect * e = dyn_cast<const Intersect>(re)) { 214 return isNullable(e->getLH()) && isNullable(e->getRH()); 215 } 216 return false; 169 } 170 return const_cast<CC *>(re_first); 217 171 } 218 172 -
icGREP/icgrep-devel/icgrep/re/re_local.h
r5632 r5812 2 2 #define RE_LOCAL_H 3 3 4 #include <UCD/ucd_compiler.hpp>5 #include <map>6 7 4 namespace re { 8 5 9 class RE; 6 class RE; class CC; 10 7 11 class RE_Local { 12 public: 13 static CC * first(RE * re); 14 static CC * final(RE * re); 15 static void follow(RE * re, std::map<CC*, CC*> &follow_map); 16 static bool isLocalLanguage(RE * re); 17 private: 18 static bool isLocalLanguage_helper(const RE * re, UCD::UnicodeSet & seen); 19 static bool isNullable(const RE * re); 8 struct RE_Local { 9 static CC * getFirstUniqueSymbol(RE * re); 20 10 }; 21 11 -
icGREP/icgrep-devel/icgrep/re/re_nullable.h
r5806 r5812 1 1 #ifndef RE_NULLABLE_H 2 2 #define RE_NULLABLE_H 3 4 #include <llvm/Support/Compiler.h>5 3 6 4 namespace re { class RE; } -
icGREP/icgrep-devel/icgrep/re/re_reverse.cpp
r5805 r5812 19 19 #include <re/re_intersect.h> 20 20 #include <re/re_assertion.h> 21 #include <re/printer_re.h>22 21 #include <llvm/Support/ErrorHandling.h> 23 #include <llvm/Support/raw_ostream.h>24 #include <llvm/Support/Debug.h>25 22 #include <map> 23 26 24 27 25 using namespace llvm; … … 29 27 namespace re { 30 28 31 RE * reverse_helper(RE * re, std::map<std::string, Name *> & captureMap) { 32 if (CC * cc = dyn_cast<CC>(re)) { 29 using CaptureMap = std::map<std::string, re::Name *>; 30 31 RE * reverse(RE * re, CaptureMap & captureMap) { 32 if (isa<CC>(re)) { 33 33 return re; 34 34 } else if (Range * rg = dyn_cast<Range>(re)) { … … 37 37 std::vector<RE*> list; 38 38 for (auto i = seq->rbegin(); i != seq->rend(); ++i) { 39 list.push_back(reverse _helper(*i, captureMap));39 list.push_back(reverse(*i, captureMap)); 40 40 } 41 41 return makeSeq(list.begin(), list.end()); … … 43 43 std::vector<RE*> list; 44 44 for (auto i = alt->begin(); i != alt->end(); ++i) { 45 list.push_back(reverse _helper(*i, captureMap));45 list.push_back(reverse(*i, captureMap)); 46 46 } 47 47 return makeAlt(list.begin(), list.end()); 48 48 } else if (Rep * rep = dyn_cast<Rep>(re)) { 49 return makeRep(reverse _helper(rep->getRE(), captureMap), rep->getLB(), rep->getUB());49 return makeRep(reverse(rep->getRE(), captureMap), rep->getLB(), rep->getUB()); 50 50 } else if (Group * g = dyn_cast<Group>(re)) { 51 return makeGroup(g->getMode(), reverse _helper(g->getRE(), captureMap), g->getSense());51 return makeGroup(g->getMode(), reverse(g->getRE(), captureMap), g->getSense()); 52 52 } else if (Diff * diff = dyn_cast<Diff>(re)) { 53 return makeDiff(reverse _helper(diff->getLH(), captureMap), reverse_helper(diff->getRH(), captureMap));53 return makeDiff(reverse(diff->getLH(), captureMap), reverse(diff->getRH(), captureMap)); 54 54 } else if (Intersect * e = dyn_cast<Intersect>(re)) { 55 return makeIntersect(reverse _helper(e->getLH(), captureMap), reverse_helper(e->getRH(), captureMap));55 return makeIntersect(reverse(e->getLH(), captureMap), reverse(e->getRH(), captureMap)); 56 56 } else if (Assertion * a = dyn_cast<Assertion>(re)) { 57 return makeAssertion(reverse _helper(a->getAsserted(), captureMap), Assertion::reverseKind(a->getKind()), a->getSense());57 return makeAssertion(reverse(a->getAsserted(), captureMap), Assertion::reverseKind(a->getKind()), a->getSense()); 58 58 } else if (isa<Start>(re)) { 59 59 return makeEnd(); … … 67 67 return makeName(n->getNamespace(), n->getName(), Name::Type::UnicodeProperty); 68 68 case Name::Type::ZeroWidth: 69 return makeZeroWidth(n->getName(), reverse _helper(n->getDefinition(), captureMap));70 case Name::Type::Capture: 69 return makeZeroWidth(n->getName(), reverse(n->getDefinition(), captureMap)); 70 case Name::Type::Capture: 71 71 { 72 72 std::string cname = n->getName(); … … 77 77 else { 78 78 std::string newName = "\\" + std::to_string(captureMap.size() + 1); 79 Name * capture = makeCapture(newName, reverse _helper(n->getDefinition(), captureMap));79 Name * capture = makeCapture(newName, reverse(n->getDefinition(), captureMap)); 80 80 captureMap.insert(std::make_pair(cname, capture)); 81 81 return capture; … … 92 92 else { 93 93 std::string newName = "\\" + std::to_string(captureMap.size() + 1); 94 Name * capture = makeCapture(newName, reverse _helper(referent->getDefinition(), captureMap));94 Name * capture = makeCapture(newName, reverse(referent->getDefinition(), captureMap)); 95 95 captureMap.insert(std::make_pair(cname, capture)); 96 96 return capture; … … 111 111 112 112 RE * reverse(RE * re) { 113 std::map<std::string, Name *>captureMap;114 return reverse _helper(re, captureMap);113 CaptureMap captureMap; 114 return reverse(re, captureMap); 115 115 } 116 116 117 } -
icGREP/icgrep-devel/icgrep/re/re_reverse.h
r5493 r5812 8 8 #define RE_REVERSE_H 9 9 10 //#include <map> // for map 11 namespace re { class RE; class Name;} 10 namespace re { class RE; } 12 11 13 12 namespace re { 14 13 15 /* 16 class Reverser ( 17 public: 18 Reverser(); 19 */ 20 RE * reverse(RE * re); 21 /* 22 private: 23 using NameMap = std::map<std::pair<std::string, std::string>, re::Name *>; 24 unsigned mCaptureGroupCount; 25 NameMap mNameMap; 26 Memoizer mMemoizer; 27 } 28 */ 14 RE * reverse(RE * re); 15 29 16 } 30 17
Note: See TracChangeset
for help on using the changeset viewer.