Changeset 4841
- Timestamp:
- Oct 17, 2015, 4:25:05 PM (3 years ago)
- Location:
- icGREP/icgrep-devel/icgrep
- Files:
-
- 14 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.cpp
r4819 r4841 452 452 addTargets(names); 453 453 generateRange(defaultIfHierachy, entry); 454 updateNames(names );454 updateNames(names, entry); 455 455 } 456 456 … … 471 471 addTargets(names); 472 472 generateRange(noIfHierachy, entry); 473 updateNames(names );473 updateNames(names, entry); 474 474 } 475 475 … … 488 488 inline void UCDCompiler::addTargets(const NameMap & names) { 489 489 for (const auto t : names) { 490 if ( isa<CC>(t.first->getDefinition())) {490 if (LLVM_LIKELY(isa<CC>(t.first->getDefinition()))) { 491 491 mTargetMap.emplace(cast<CC>(t.first->getDefinition()), t.second ? t.second : PabloBlock::createZeroes()); 492 492 } else { … … 500 500 * @brief updateNames 501 501 ** ------------------------------------------------------------------------------------------------------------- */ 502 inline void UCDCompiler::updateNames(NameMap & names ) {502 inline void UCDCompiler::updateNames(NameMap & names, PabloBuilder & entry) { 503 503 for (auto & t : names) { 504 504 auto f = mTargetMap.find(cast<CC>(t.first->getDefinition())); 505 t.second = f->second; 505 if (f != mTargetMap.end()) { 506 std::string name = t.first->getName(); 507 if (Statement * result = dyn_cast<Statement>(f->second)) { 508 result->setName(entry.getName(name, false)); 509 t.second = result; 510 } else { 511 t.second = entry.createAssign(std::move(name), f->second); 512 } 513 } 506 514 } 507 515 mTargetMap.clear(); -
icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.hpp
r4814 r4841 94 94 void addTargets(const NameMap & names); 95 95 96 void updateNames(NameMap & names );96 void updateNames(NameMap & names, PabloBuilder & entry); 97 97 98 98 private: -
icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_simplifier.cpp
r4835 r4841 22 22 #endif 23 23 24 /** ------------------------------------------------------------------------------------------------------------- * 25 * @brief optimize 26 ** ------------------------------------------------------------------------------------------------------------- */ 24 27 bool Simplifier::optimize(PabloFunction & function) { 25 28 eliminateRedundantCode(function.getEntryBlock()); … … 32 35 } 33 36 37 /** ------------------------------------------------------------------------------------------------------------- * 38 * @brief canTriviallyFold 39 ** ------------------------------------------------------------------------------------------------------------- */ 34 40 inline static PabloAST * canTriviallyFold(Statement * stmt, PabloBlock & block) { 35 41 for (unsigned i = 0; i != stmt->getNumOperands(); ++i) { … … 81 87 } 82 88 89 /** ------------------------------------------------------------------------------------------------------------- * 90 * @brief isSuperfluous 91 ** ------------------------------------------------------------------------------------------------------------- */ 83 92 inline bool Simplifier::isSuperfluous(const Assign * const assign) { 84 93 for (const PabloAST * inst : assign->users()) { … … 105 114 } 106 115 116 /** ------------------------------------------------------------------------------------------------------------- * 117 * @brief demoteDefinedVar 118 ** ------------------------------------------------------------------------------------------------------------- */ 107 119 inline bool demoteDefinedVar(const If * ifNode, const Assign * def) { 108 120 // If this value isn't used outside of this scope then there is no reason to allow it to escape it. … … 121 133 } 122 134 135 /** ------------------------------------------------------------------------------------------------------------- * 136 * @brief replaceReachableUsersOfWith 137 ** ------------------------------------------------------------------------------------------------------------- */ 123 138 inline void replaceReachableUsersOfWith(Statement * stmt, PabloAST * expr) { 124 139 const PabloBlock * const root = stmt->getParent(); … … 146 161 } 147 162 163 /** ------------------------------------------------------------------------------------------------------------- * 164 * @brief discardNestedIfBlock 165 * 166 * If this inner block is composed of only Boolean logic and Assign statements and there are fewer than 3 167 * statements, just add the statements in the inner block to the current block. 168 ** ------------------------------------------------------------------------------------------------------------- */ 169 inline bool discardNestedIfBlock(const PabloBlock & pb) { 170 unsigned computations = 0; 171 for (const Statement * stmt : pb) { 172 switch (stmt->getClassTypeId()) { 173 case PabloAST::ClassTypeId::And: 174 case PabloAST::ClassTypeId::Or: 175 case PabloAST::ClassTypeId::Xor: 176 if (++computations > 3) { 177 return false; 178 } 179 case PabloAST::ClassTypeId::Not: 180 case PabloAST::ClassTypeId::Assign: 181 break; 182 default: 183 return false; 184 } 185 } 186 return true; 187 } 188 189 /** ------------------------------------------------------------------------------------------------------------- * 190 * @brief removeIdenticalEscapedValues 191 ** ------------------------------------------------------------------------------------------------------------- */ 148 192 template <class ValueList> 149 193 inline void removeIdenticalEscapedValues(ValueList & list) { … … 161 205 } 162 206 207 /** ------------------------------------------------------------------------------------------------------------- * 208 * @brief eliminateRedundantCode 209 ** ------------------------------------------------------------------------------------------------------------- */ 163 210 void Simplifier::eliminateRedundantCode(PabloBlock & block, ExpressionTable * predecessor) { 164 211 ExpressionTable encountered(predecessor); … … 211 258 continue; 212 259 } 260 213 261 // Otherwise check if we any Assign reports the same value as another. If so, replace all uses of the 214 262 // second with the first. This will simplify future analysis. 215 263 removeIdenticalEscapedValues(ifNode->getDefined()); 264 265 // Finally after we've eliminated everything we can from the If body, check whether testing the If 266 // condition will meet or exceed the cost of executing the body. 267 if (LLVM_UNLIKELY(discardNestedIfBlock(ifNode->getBody()))) { 268 Statement * nested = ifNode->getBody().front(); 269 while (nested) { 270 Statement * next = nested->removeFromParent(); 271 nested->insertAfter(stmt); 272 stmt = nested; 273 nested = next; 274 } 275 ifNode->getDefined().clear(); 276 stmt = ifNode->eraseFromParent(true); 277 continue; 278 } 216 279 217 280 } else if (While * whileNode = dyn_cast<While>(stmt)) { … … 244 307 } 245 308 246 309 /** ------------------------------------------------------------------------------------------------------------- * 310 * @brief deadCodeElimination 311 ** ------------------------------------------------------------------------------------------------------------- */ 247 312 void Simplifier::deadCodeElimination(PabloBlock & block) { 248 313 Statement * stmt = block.front(); … … 260 325 } 261 326 327 /** ------------------------------------------------------------------------------------------------------------- * 328 * @brief eliminateRedundantEquations 329 ** ------------------------------------------------------------------------------------------------------------- */ 262 330 void Simplifier::eliminateRedundantEquations(PabloBlock & block) { 263 331 Statement * stmt = block.front(); -
icGREP/icgrep-devel/icgrep/pablo/pabloAST.h
r4775 r4841 213 213 return mName; 214 214 } 215 inline void setName(const String * const name) { 216 mName = name; 217 } 215 218 inline Statement * getNextNode() const { 216 219 return mNext; … … 239 242 } 240 243 } 241 } 242 inline void setName(const String * const name) { 243 mName = name; 244 } 244 } 245 245 #ifndef NDEBUG 246 246 bool noRecursiveOperand(const PabloAST * const operand); -
icGREP/icgrep-devel/icgrep/pablo/ps_if.h
r4764 r4841 44 44 return mDefined; 45 45 } 46 void addDefined(Assign * def);47 protected:48 46 inline DefinedVars & getDefined() { 49 47 return mDefined; 50 48 } 49 void addDefined(Assign * def); 50 protected: 51 51 If(PabloAST * expr, const std::initializer_list<Assign *> definedVars, PabloBlock & body); 52 53 52 If(PabloAST * expr, const std::vector<Assign *> & definedVars, PabloBlock & body); 54 53 private: -
icGREP/icgrep-devel/icgrep/re/printer_re.cpp
r4829 r4841 86 86 retVal += ") "; 87 87 } 88 else if (const GraphemeBoundary * g = dyn_cast<GraphemeBoundary>(re)) 89 { 88 else if (const GraphemeBoundary * g = dyn_cast<GraphemeBoundary>(re)) { 90 89 retVal = "Grapheme"; 91 90 switch (g->getType()) { … … 100 99 } 101 100 retVal += "Boundary("; 102 retVal += PrintRE(g->getExpression()); 103 retVal += ") "; 101 if (g->getExpression()) { 102 retVal += PrintRE(g->getExpression()); 103 } 104 retVal += ")"; 104 105 } 105 106 else if (isa<const End>(re)) -
icGREP/icgrep-devel/icgrep/re/re_analysis.cpp
r4835 r4841 63 63 // Eventually names might be set up for not unit length items. 64 64 return (n->getType() == Name::Type::Unicode || n->getType() == Name::Type::UnicodeProperty || n->getType() == Name::Type::Byte); 65 } else if (const GraphemeBoundary * gb = dyn_cast<GraphemeBoundary>(re)) {66 return isUnicodeUnitLength(gb->getExpression());67 65 } 68 66 return false; // otherwise … … 127 125 return std::make_pair(0, std::numeric_limits<int>::max()); 128 126 } 129 } else if (const GraphemeBoundary * gb = dyn_cast<GraphemeBoundary>(re)) { 130 return std::make_pair(getUnicodeUnitLengthRange(gb->getExpression()).first, std::numeric_limits<int>::max()); 127 } else if (const GraphemeBoundary * gp = dyn_cast<GraphemeBoundary>(re)) { 128 if (gp->getExpression()) { 129 return getUnicodeUnitLengthRange(gp->getExpression()); 130 } 131 return std::make_pair(0, 0); 131 132 } 132 133 return std::make_pair(1, 1); -
icGREP/icgrep-devel/icgrep/re/re_compiler.cpp
r4835 r4841 174 174 const UCD::ExternalProperty & ep = UCD::resolveExternalProperty(functionName); 175 175 Call * call = mPB.createCall(Prototype::Create(functionName, std::get<1>(ep), std::get<2>(ep), std::get<0>(ep)), mCCCompiler.getBasisBits()); 176 name->setCompiled( mPB.createAnd(call, mAny));176 name->setCompiled(call); 177 177 } else { 178 178 #endif … … 241 241 } 242 242 } else if (GraphemeBoundary * gb = dyn_cast<GraphemeBoundary>(re)) { 243 if (LLVM_LIKELY(gb->get GraphemeExtenderRule() == nullptr)) {243 if (LLVM_LIKELY(gb->getBoundaryRule() == nullptr)) { 244 244 switch (gb->getType()) { 245 245 case GraphemeBoundary::Type::ClusterBoundary: 246 246 if (graphemeClusterRule == nullptr) { 247 graphemeClusterRule = cast<Name>(resolve(generateGraphemeCluster ExtenderRule()));247 graphemeClusterRule = cast<Name>(resolve(generateGraphemeClusterBoundaryRule())); 248 248 } 249 249 gb->setBoundaryRule(graphemeClusterRule); … … 253 253 } 254 254 } 255 gb->setExpression(resolve(gb->getExpression())); 255 if (gb->getExpression()) { 256 resolve(gb->getExpression()); 257 } 256 258 } 257 259 return re; … … 261 263 std::unordered_set<Name *> visited; 262 264 263 std::function<void(RE*)> gather = [&](RE * re) { 265 std::function<void(RE*)> gather = [&](RE * re) { 264 266 if (Name * name = dyn_cast<Name>(re)) { 265 267 if (visited.insert(name).second) { … … 289 291 gather(ix->getRH()); 290 292 } else if (GraphemeBoundary * gb = dyn_cast<GraphemeBoundary>(re)) { 291 gather(gb->getExpression()); 292 gather(gb->getGraphemeExtenderRule()); 293 if (gb->getExpression()) { 294 gather(gb->getExpression()); 295 } 296 gather(gb->getBoundaryRule()); 293 297 } 294 298 }; … … 302 306 for (auto t : nameMap) { 303 307 if (t.second) { 304 mCompiledName.insert(std::make_pair(t.first, makeMarker(MarkerPosition::FinalMatchByte, mPB.createAnd(t.second, mAny))));308 mCompiledName.insert(std::make_pair(t.first, makeMarker(MarkerPosition::FinalMatchByte, t.second))); 305 309 } 306 310 } … … 309 313 // Now precompile any grapheme segmentation rules 310 314 if (graphemeClusterRule) { 311 mCompiledName.insert(std::make_pair(graphemeClusterRule, compileName(graphemeClusterRule, mPB))); 315 auto gcb = compileName(graphemeClusterRule, mPB); 316 mCompiledName.insert(std::make_pair(graphemeClusterRule, gcb)); 312 317 } 313 318 return re; … … 318 323 } 319 324 320 Name * RE_Compiler::generateGraphemeCluster ExtenderRule() {325 Name * RE_Compiler::generateGraphemeClusterBoundaryRule() { 321 326 // 3.1.1 Grapheme Cluster Boundary Rules 322 327 #define Behind(x) makeLookBehindAssertion(x) … … 324 329 325 330 RE * GCB_Control = makeName("gcb", "cn", Name::Type::UnicodeProperty); 326 331 RE * GCB_CR = makeName("gcb", "cr", Name::Type::UnicodeProperty); 332 RE * GCB_LF = makeName("gcb", "lf", Name::Type::UnicodeProperty); 333 334 // Break at the start and end of text. 327 335 RE * GCB_1 = makeStart(); 328 336 RE * GCB_2 = makeEnd(); 337 // Do not break between a CR and LF. 338 RE * GCB_3 = makeSeq({Behind(GCB_CR), Ahead(GCB_LF)}); 339 // Otherwise, break before and after controls. 329 340 RE * GCB_4 = Behind(GCB_Control); 330 341 RE * GCB_5 = Ahead(GCB_Control); 331 332 RE * GCB_1_5 = makeAlt({GCB_1, GCB_2, makeSeq({GCB_4, GCB_5})}); 342 RE * GCB_1_5 = makeAlt({GCB_1, GCB_2, makeDiff(makeSeq({GCB_4, GCB_5}), GCB_3)}); 333 343 334 344 RE * GCB_L = makeName("gcb", "l", Name::Type::UnicodeProperty); … … 338 348 RE * GCB_T = makeName("gcb", "t", Name::Type::UnicodeProperty); 339 349 RE * GCB_RI = makeName("gcb", "ri", Name::Type::UnicodeProperty); 340 // Legacy rules350 // Do not break Hangul syllable sequences. 341 351 RE * GCB_6 = makeSeq({Behind(GCB_L), Ahead(makeAlt({GCB_L, GCB_V, GCB_LV, GCB_LVT}))}); 342 352 RE * GCB_7 = makeSeq({Behind(makeAlt({GCB_LV, GCB_V})), Ahead(makeAlt({GCB_V, GCB_T}))}); 343 353 RE * GCB_8 = makeSeq({Behind(makeAlt({GCB_LVT, GCB_T})), Ahead(GCB_T)}); 354 // Do not break between regional indicator symbols. 344 355 RE * GCB_8a = makeSeq({Behind(GCB_RI), Ahead(GCB_RI)}); 356 // Do not break before extending characters. 345 357 RE * GCB_9 = Ahead(makeName("gcb", "ex", Name::Type::UnicodeProperty)); 346 // Extended rules358 // Do not break before SpacingMarks, or after Prepend characters. 347 359 RE * GCB_9a = Ahead(makeName("gcb", "sm", Name::Type::UnicodeProperty)); 348 360 RE * GCB_9b = Behind(makeName("gcb", "pp", Name::Type::UnicodeProperty)); 349 350 361 RE * GCB_6_9b = makeAlt({GCB_6, GCB_7, GCB_8, GCB_8a, GCB_9, GCB_9a, GCB_9b}); 362 // Otherwise, break everywhere. 363 RE * GCB_10 = makeSeq({Behind(makeAny()), Ahead(makeAny())}); 364 351 365 Name * gcb = makeName("gcb", Name::Type::UnicodeProperty); 352 gcb->setDefinition(makeDiff(GCB_6_9b, GCB_1_5)); 353 366 gcb->setDefinition(makeAlt({GCB_1_5, makeDiff(GCB_10, GCB_6_9b)})); 354 367 return gcb; 355 368 } … … 550 563 551 564 inline PabloAST * RE_Compiler::consecutive_matches(PabloAST * repeated, int length, int repeat_count, PabloBuilder & pb) { 552 553 554 555 556 557 558 559 560 }561 562 563 564 565 566 } 567 568 inline PabloAST * RE_Compiler::reachable(PabloAST * repeated, int repeated_lgth, int repeat_count, PabloBuilder & pb) {569 int i = repeated_lgth;570 int total_lgth = repeat_count * repeated_lgth;571 572 573 574 575 576 577 578 579 580 }581 582 583 584 585 565 int i = length; 566 int total = repeat_count * length; 567 PabloAST * consecutive_i = repeated; 568 while (i * 2 < total) { 569 PabloAST * v = consecutive_i; 570 PabloAST * v2 = pb.createAdvance(v, i); 571 i *= 2; 572 consecutive_i = pb.createAnd(v, v2, "at" + std::to_string(i) + "of" + std::to_string(total)); 573 } 574 if (i < total) { 575 PabloAST * v = consecutive_i; 576 consecutive_i = pb.createAnd(v, pb.createAdvance(v, total - i), "at" + std::to_string(total)); 577 } 578 return consecutive_i; 579 } 580 581 inline PabloAST * RE_Compiler::reachable(PabloAST * repeated, int length, int repeat_count, PabloBuilder & pb) { 582 int i = length; 583 int total_lgth = repeat_count * length; 584 if (repeat_count == 0) { 585 return repeated; 586 } 587 PabloAST * reachable_i = pb.createOr(repeated, pb.createAdvance(repeated, 1), "within1"); 588 while (i * 2 < total_lgth) { 589 PabloAST * v = reachable_i; 590 PabloAST * v2 = pb.createAdvance(v, i); 591 i *= 2; 592 reachable_i = pb.createOr(v, v2, "within" + std::to_string(i)); 593 } 594 if (i < total_lgth) { 595 PabloAST * v = reachable_i; 596 reachable_i = pb.createOr(v, pb.createAdvance(v, total_lgth - i), "within" + std::to_string(total_lgth)); 597 } 598 return reachable_i; 586 599 } 587 600 … … 633 646 } 634 647 return makeMarker(MarkerPosition::InitialPostPositionByte, mstar); 635 } 636 else if (isUnicodeUnitLength(repeated) && !DisableMatchStar && !DisableUnicodeMatchStar) { 648 } else if (isUnicodeUnitLength(repeated) && !DisableMatchStar && !DisableUnicodeMatchStar) { 637 649 PabloAST * cc = markerVar(compile(repeated, pb)); 638 650 PabloAST * mstar = nullptr; 639 651 PabloAST * nonFinal = mNonFinal; 640 if (mGrapheme ExtenderRule) {641 nonFinal = pb.createOr(nonFinal, mGraphemeExtenderRule);652 if (mGraphemeBoundaryRule) { 653 nonFinal = pb.createOr(nonFinal, pb.createNot(mGraphemeBoundaryRule, "gext")); 642 654 } 643 655 cc = pb.createOr(cc, nonFinal); … … 648 660 } 649 661 PabloAST * final = mFinal; 650 if (mGrapheme ExtenderRule) {651 final = pb.createOr(final, pb.createNot(mGraphemeExtenderRule));662 if (mGraphemeBoundaryRule) { 663 final = mGraphemeBoundaryRule; 652 664 } 653 665 return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(mstar, final, "unbounded")); … … 705 717 if (UNICODE_LINE_BREAK) { 706 718 PabloAST * nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionByte, pb)); 707 return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(nextPos, mUnicodeLineBreak, "e nd"));719 return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(nextPos, mUnicodeLineBreak, "eol")); 708 720 } else { 709 721 PabloAST * nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb)); // For LF match … … 712 724 } 713 725 714 inline MarkerType RE_Compiler::compileGraphemeBoundary(GraphemeBoundary * gb, const MarkerType marker, pablo::PabloBuilder & pb) { 715 const auto inGraphemeBoundaryRule = mGraphemeExtenderRule; 716 auto f = mCompiledName.find(gb->getGraphemeExtenderRule()); 717 if (LLVM_UNLIKELY(f == mCompiledName.end())) { 718 throw std::runtime_error("Internal error: failed to locate grapheme boundary rule!"); 719 } 720 mGraphemeExtenderRule = markerVar(f->second); 721 assert (mGraphemeExtenderRule); 722 MarkerType result = process(gb->getExpression(), marker, pb); 723 mGraphemeExtenderRule = inGraphemeBoundaryRule; 724 return result; 725 } 726 727 inline MarkerType RE_Compiler::AdvanceMarker(const MarkerType m, const MarkerPosition newpos, PabloBuilder & pb) { 728 if (m.pos == newpos) return m; 729 PabloAST * a = m.stream; 730 if (m.pos == MarkerPosition::FinalMatchByte) { 731 // Must advance the previous marker to the InitialPostPositionByte 732 a = pb.createAdvance(a, 1, "ipp"); 733 } 734 // Now at InitialPostPositionByte; is a further advance needed? 735 if (newpos == MarkerPosition::FinalPostPositionByte) { 736 // Must advance through nonfinal bytes 737 PabloAST * nonFinal = mNonFinal; 738 if (mGraphemeExtenderRule) { 739 nonFinal = pb.createOr(nonFinal, mGraphemeExtenderRule, "gext"); 740 } 741 a = pb.createScanThru(pb.createAnd(mInitial, a), nonFinal, "fpp"); 742 } 743 return {newpos, a}; 726 inline MarkerType RE_Compiler::compileGraphemeBoundary(GraphemeBoundary * gb, MarkerType marker, pablo::PabloBuilder & pb) { 727 auto f = mCompiledName.find(gb->getBoundaryRule()); 728 assert ("Internal error: failed to locate grapheme boundary rule!" && (f != mCompiledName.end())); 729 if (gb->getExpression()) { 730 const auto graphemeBoundaryRule = mGraphemeBoundaryRule; 731 mGraphemeBoundaryRule = markerVar(f->second); 732 marker = process(gb->getExpression(), marker, pb); 733 marker = AdvanceMarker(marker, MarkerPosition::FinalPostPositionByte, pb); 734 mGraphemeBoundaryRule = graphemeBoundaryRule; 735 } else { 736 marker = AdvanceMarker(marker, MarkerPosition::FinalPostPositionByte, pb); 737 PabloAST * rule = markerVar(f->second); 738 if (gb->getSense() == GraphemeBoundary::Sense::Negative) { 739 rule = pb.createNot(rule); 740 } 741 marker = makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(markerVar(marker), rule, "gb")); 742 } 743 return marker; 744 } 745 746 inline MarkerType RE_Compiler::AdvanceMarker(MarkerType marker, const MarkerPosition newpos, PabloBuilder & pb) { 747 if (marker.pos != newpos) { 748 if (marker.pos == MarkerPosition::FinalMatchByte) { 749 marker.stream = pb.createAdvance(marker.stream, 1, "ipp"); 750 marker.pos = MarkerPosition::InitialPostPositionByte; 751 } 752 if (newpos == MarkerPosition::FinalPostPositionByte) { 753 PabloAST * nonFinal = mNonFinal; 754 if (mGraphemeBoundaryRule) { 755 nonFinal = pb.createOr(nonFinal, pb.createNot(mGraphemeBoundaryRule, "gext")); 756 } 757 marker.stream = pb.createScanThru(pb.createAnd(mInitial, marker.stream), nonFinal, "fpp"); 758 marker.pos = MarkerPosition::FinalPostPositionByte; 759 } 760 } 761 return marker; 744 762 } 745 763 … … 758 776 , mUnicodeLineBreak(nullptr) 759 777 , mAny(nullptr) 760 , mGrapheme ExtenderRule(nullptr)778 , mGraphemeBoundaryRule(nullptr) 761 779 , mInitial(nullptr) 762 780 , mNonFinal(nullptr) -
icGREP/icgrep-devel/icgrep/re/re_compiler.h
r4833 r4841 63 63 64 64 MarkerType compile(RE * re, pablo::PabloBuilder & cg); 65 MarkerType AdvanceMarker(const MarkerType m, const MarkerPosition newpos, pablo::PabloBuilder & pb); 66 67 void AlignMarkers(MarkerType & m1, MarkerType & m2, pablo::PabloBuilder & pb); 68 65 69 66 MarkerType process(RE * re, MarkerType marker, pablo::PabloBuilder & pb); 70 67 MarkerType compileName(Name * name, MarkerType marker, pablo::PabloBuilder & pb); … … 77 74 MarkerType compileIntersect(Intersect * x, MarkerType marker, pablo::PabloBuilder & cg); 78 75 pablo::PabloAST * consecutive_matches(pablo::PabloAST * repeated, int length, int repeat_count, pablo::PabloBuilder & pb); 79 pablo::PabloAST * reachable(pablo::PabloAST * repeated, int repeated_lgth, int repeat_count, pablo::PabloBuilder & pb);76 pablo::PabloAST * reachable(pablo::PabloAST * repeated, int length, int repeat_count, pablo::PabloBuilder & pb); 80 77 static bool isFixedLength(RE * regexp); 81 78 MarkerType processLowerBound(RE * repeated, int lb, MarkerType marker, pablo::PabloBuilder & pb); … … 84 81 RE * resolveUnicodeProperties(RE * re); 85 82 86 Name * generateGraphemeCluster ExtenderRule();83 Name * generateGraphemeClusterBoundaryRule(); 87 84 MarkerType compileName(Name * name, pablo::PabloBuilder & pb); 88 85 MarkerType compileAny(const MarkerType m, pablo::PabloBuilder & pb); 89 86 MarkerType compileStart(const MarkerType marker, pablo::PabloBuilder & pb); 90 87 MarkerType compileEnd(const MarkerType marker, pablo::PabloBuilder & pb); 91 MarkerType compileGraphemeBoundary(GraphemeBoundary *gb, const MarkerType marker, pablo::PabloBuilder & pb); 88 MarkerType compileGraphemeBoundary(GraphemeBoundary *gb, MarkerType marker, pablo::PabloBuilder & pb); 89 90 MarkerType AdvanceMarker(MarkerType marker, const MarkerPosition newpos, pablo::PabloBuilder & pb); 91 void AlignMarkers(MarkerType & m1, MarkerType & m2, pablo::PabloBuilder & pb); 92 92 93 93 private: … … 98 98 pablo::PabloAST * mUnicodeLineBreak; 99 99 pablo::PabloAST * mAny; 100 pablo::PabloAST * mGrapheme ExtenderRule;100 pablo::PabloAST * mGraphemeBoundaryRule; 101 101 pablo::PabloAST * mInitial; 102 102 pablo::Assign * mNonFinal; … … 106 106 std::vector<pablo::Next *> mLoopVariants; // <- rethink name 107 107 pablo::PabloBuilder mPB; 108 std::unordered_map<Name *, MarkerType> mCompiledName; 108 std::unordered_map<Name *, MarkerType> mCompiledName; 109 109 pablo::PabloFunction & mFunction; 110 110 }; -
icGREP/icgrep-devel/icgrep/re/re_grapheme_boundary.hpp
r4833 r4841 16 16 17 17 enum class Type { 18 ClusterBoundary // g19 , WordBoundary // w20 , LineBreakBoundary // l21 , SentenceBoundary // s18 ClusterBoundary = 0 // g 19 , WordBoundary = 1 // w 20 , LineBreakBoundary = 2 // l 21 , SentenceBoundary = 3 // s 22 22 }; 23 23 24 enum class Sense {Positive, Negative}; 25 26 inline Type getType() const { return mType; } 27 GraphemeBoundary::Sense getSense() const {return mSense;} 24 28 inline RE * getExpression() const {return mExpression;} 25 inline void setExpression(RE * const r) {mExpression = r; } 26 inline Name * getGraphemeExtenderRule() const {return mBoundaryRule;} 27 inline void setBoundaryRule(Name * const r) {mBoundaryRule = r; } 28 inline Type getType() const {return mType;} 29 inline void setExpression(RE * const r) { mExpression = r; } 30 inline Name * getBoundaryRule() const {return mBoundaryRule;} 31 inline void setBoundaryRule(Name * const r) { mBoundaryRule = r; } 29 32 30 33 protected: 31 friend GraphemeBoundary * makeGraphemeBoundary( RE * const expression, const Type type);32 GraphemeBoundary( RE * const expression, const Type type) : RE(ClassTypeId::GraphemeBoundary), mExpression(expression), mBoundaryRule(nullptr), mType(type) {}34 friend GraphemeBoundary * makeGraphemeBoundary(const Type type, const Sense sense, RE * expression); 35 GraphemeBoundary(const Type type, const Sense sense, RE * expression) : RE(ClassTypeId::GraphemeBoundary), mType(type), mSense(sense), mExpression(expression), mBoundaryRule(nullptr) {} 33 36 virtual ~GraphemeBoundary() {} 34 37 35 38 private: 36 RE * mExpression; 37 Name * mBoundaryRule; 38 const Type mType; 39 const Type mType; 40 const Sense mSense; 41 RE * mExpression; 42 Name * mBoundaryRule; 39 43 }; 40 44 41 inline GraphemeBoundary * makeGraphemeBoundary(RE * const expression, const GraphemeBoundary::Type type) { 42 return new GraphemeBoundary(expression, type); 45 inline GraphemeBoundary * makeGraphemeBoundary(const GraphemeBoundary::Type type, const GraphemeBoundary::Sense sense, RE * expression) { 46 return new GraphemeBoundary(type, sense, expression); 47 } 48 49 inline GraphemeBoundary * makeGraphemeClusterBoundary(const GraphemeBoundary::Sense sense = GraphemeBoundary::Sense::Positive, RE * expression = nullptr) { 50 return makeGraphemeBoundary(GraphemeBoundary::Type::ClusterBoundary, sense, expression); 43 51 } 44 52 -
icGREP/icgrep-devel/icgrep/re/re_nullable.cpp
r4829 r4841 49 49 name->setDefinition(removeNullablePrefix(name->getDefinition())); 50 50 } 51 } else if (GraphemeBoundary * g = dyn_cast<GraphemeBoundary>(re)) {52 g->setExpression(removeNullablePrefix(g->getExpression()));53 51 } 54 52 return re; … … 86 84 name->setDefinition(removeNullableSuffix(name->getDefinition())); 87 85 } 88 } else if (GraphemeBoundary * g = dyn_cast<GraphemeBoundary>(re)) {89 g->setExpression(removeNullableSuffix(g->getExpression()));90 86 } 91 87 return re; -
icGREP/icgrep-devel/icgrep/re/re_parser.cpp
r4839 r4841 47 47 throw ParseFailure("An unexpected parsing error occurred!"); 48 48 } 49 if (parser.fModeFlagSet & ModeFlagType::GRAPHEME_CLUSTER_MODE) {50 re = makeGraphemeBoundary(re, GraphemeBoundary::Type::ClusterBoundary);51 }52 49 return re; 53 50 } … … 72 69 73 70 RE * RE_Parser::parse_RE() { 74 RE * r = parse_alt(); 75 return r; 71 return parse_alt(); 76 72 } 77 73 … … 80 76 for (;;) { 81 77 alt.push_back(parse_seq()); 82 if ( mCursor.noMore() ||*mCursor != '|') {78 if (*mCursor != '|') { 83 79 break; 84 80 } 85 81 ++mCursor; // advance past the alternation character '|' 86 82 } 87 if (alt.empty()) 88 { 83 if (alt.empty()) { 89 84 throw NoRegularExpressionFound(); 90 85 } … … 106 101 107 102 RE * RE_Parser::parse_next_item() { 108 RE * re = nullptr;109 103 if (mCursor.more()) { 110 104 switch (*mCursor) { … … 119 113 return makeEnd(); 120 114 case '|': case ')': 121 return nullptr; // This is ugly.115 break; 122 116 case '*': case '+': case '?': case '{': 123 117 throw NothingToRepeat(); … … 126 120 return createCC(parse_utf8_codepoint()); 127 121 } 128 elsethrow ParseFailure("Use \\] for literal ].");122 throw ParseFailure("Use \\] for literal ]."); 129 123 case '}': 130 124 if (fNested) { 131 return nullptr; // a recursive invocation for a regexp in \N{...} 132 } 133 else if (LEGACY_UNESCAPED_RBRAK_RBRACE_ALLOWED) { 125 break; // a recursive invocation for a regexp in \N{...} 126 } else if (LEGACY_UNESCAPED_RBRAK_RBRACE_ALLOWED) { 134 127 return createCC(parse_utf8_codepoint()); 135 128 } 136 elsethrow ParseFailure("Use \\} for literal }.");129 throw ParseFailure("Use \\} for literal }."); 137 130 case '[': 138 131 mCursor++; … … 148 141 } 149 142 } 150 return re;151 } 152 153 154 // Parse some kind of parenthesized group. Input precondition: _cursor143 return nullptr; 144 } 145 146 147 // Parse some kind of parenthesized group. Input precondition: mCursor 155 148 // after the ( 156 149 RE * RE_Parser::parse_group() { 157 RE * subexpr; 158 RE * group_expr; 159 ModeFlagSet savedModeFlags = fModeFlagSet; 150 const ModeFlagSet modeFlagSet = fModeFlagSet; 151 RE * group_expr = nullptr; 160 152 if (*mCursor == '?') { 161 153 switch (*++mCursor) { 162 154 case '#': // comment 163 ++mCursor; 164 while (mCursor.more() && *mCursor != ')') { 165 ++mCursor; 166 } 155 while (*++mCursor != ')'); 167 156 ++mCursor; 168 157 return parse_next_item(); … … 173 162 case '=': 174 163 ++mCursor; 175 subexpr = parse_alt(); 176 group_expr = makeLookAheadAssertion(subexpr); 164 group_expr = makeLookAheadAssertion(parse_alt()); 177 165 break; 178 166 case '!': 179 167 ++mCursor; 180 subexpr = parse_alt(); 181 group_expr = makeNegativeLookAheadAssertion(subexpr); 168 group_expr = makeNegativeLookAheadAssertion(parse_alt()); 182 169 break; 183 170 case '>': 184 171 ++mCursor; 185 subexpr = parse_alt(); 186 group_expr = makeAtomicGroup(subexpr); 172 group_expr = makeAtomicGroup(parse_alt()); 187 173 break; 188 174 case '|': 189 175 ++mCursor; 190 subexpr = parse_alt(); 191 group_expr = makeBranchResetGroup(subexpr); 176 group_expr = makeBranchResetGroup(parse_alt()); 192 177 break; 193 178 case '<': … … 195 180 if (*mCursor == '=') { 196 181 ++mCursor; 197 subexpr = parse_alt(); 198 group_expr = makeLookBehindAssertion(subexpr); 182 group_expr = makeLookBehindAssertion(parse_alt()); 199 183 } 200 184 else if (*mCursor == '!') { 201 185 ++mCursor; 202 subexpr = parse_alt(); 203 group_expr = makeNegativeLookBehindAssertion(subexpr); 186 group_expr = makeNegativeLookBehindAssertion(parse_alt()); 204 187 } else { 205 188 throw ParseFailure("Illegal lookbehind assertion syntax."); 206 189 } 207 190 break; 208 case '-': case 'd' : case 'i': case 'm': case 's': case 'x': case 'g': { 209 bool negateMode = false; 210 ModeFlagType modeBit; 191 case '-': case 'd' : case 'i': case 'm': case 's': case 'x': case 'g': 211 192 while (*mCursor != ')' && *mCursor != ':') { 193 bool negateMode = false; 194 ModeFlagType modeBit; 212 195 if (*mCursor == '-') { 213 196 negateMode = true; 214 197 ++mCursor; 215 198 } 216 switch (*mCursor ++) {199 switch (*mCursor) { 217 200 case 'i': modeBit = CASE_INSENSITIVE_MODE_FLAG; break; 218 201 case 'g': modeBit = GRAPHEME_CLUSTER_MODE; break; … … 223 206 default: throw ParseFailure("Unsupported mode flag."); 224 207 } 208 ++mCursor; 225 209 if (negateMode) { 226 210 fModeFlagSet &= ~modeBit; … … 233 217 ++mCursor; 234 218 group_expr = parse_alt(); 219 fModeFlagSet = modeFlagSet; 220 break; 235 221 } else { // if *_cursor == ')' 236 222 ++mCursor; 237 // return immediately without restoring mode flags238 223 return parse_next_item(); 239 224 } 240 break;241 }242 225 default: 243 226 throw ParseFailure("Illegal (? syntax."); … … 246 229 group_expr = parse_alt(); 247 230 } 248 // Restore mode flags.249 fModeFlagSet = savedModeFlags;250 231 if (*mCursor != ')') { 251 232 throw ParseFailure("Closing parenthesis required."); … … 255 236 } 256 237 257 RE * RE_Parser::extend_item(RE * re) { 258 if (mCursor.noMore()) { 259 return re; 260 } else { 238 inline RE * RE_Parser::extend_item(RE * re) { 239 if (LLVM_LIKELY(mCursor.more())) { 261 240 int lb = 0, ub = 0; 241 bool hasRep = true; 262 242 switch (*mCursor) { 263 243 case '*': … … 277 257 break; 278 258 default: 279 return re; 280 } 281 if (lb > MAX_REPETITION_LOWER_BOUND || ub > MAX_REPETITION_UPPER_BOUND) { 282 throw ParseFailure("Bounded repetition exceeds icgrep implementation limit"); 283 } 284 if ((ub != Rep::UNBOUNDED_REP) && (lb > ub)) { 285 throw ParseFailure("Lower bound cannot exceed upper bound in bounded repetition"); 286 } 287 ++mCursor; 288 if (*mCursor == '?') { // Non-greedy qualifier 289 // Greedy vs. non-greedy makes no difference for icgrep. 290 ++mCursor; 291 } else if (*mCursor == '+') { 292 ++mCursor; 293 throw ParseFailure("Possessive repetition is not supported in icgrep 1.0"); 294 } 295 return makeRep(re, lb, ub); 296 } 259 hasRep = false; 260 } 261 if (hasRep) { 262 if (lb > MAX_REPETITION_LOWER_BOUND || ub > MAX_REPETITION_UPPER_BOUND) { 263 throw ParseFailure("Bounded repetition exceeds icgrep implementation limit"); 264 } 265 if ((ub != Rep::UNBOUNDED_REP) && (lb > ub)) { 266 throw ParseFailure("Lower bound cannot exceed upper bound in bounded repetition"); 267 } 268 ++mCursor; 269 if (*mCursor == '?') { // Non-greedy qualifier 270 // Greedy vs. non-greedy makes no difference for icgrep. 271 ++mCursor; 272 } else if (*mCursor == '+') { 273 ++mCursor; 274 throw ParseFailure("Possessive repetition is not supported in icgrep 1.0"); 275 } 276 re = makeRep(re, lb, ub); 277 } 278 } 279 if ((fModeFlagSet & ModeFlagType::GRAPHEME_CLUSTER_MODE) != 0) { 280 re = makeGraphemeClusterBoundary(GraphemeBoundary::Sense::Positive, re); 281 } 282 return re; 297 283 } 298 284 … … 345 331 } 346 332 } 347 348 inline RE * makeGraphemeClusterBoundary() {349 throw ParseFailure("makeGraphemeClusterBoundary() not yet supported.");350 }351 352 333 353 334 RE * RE_Parser::parseEscapedSet() { … … 361 342 } else { 362 343 switch (*++mCursor) { 363 case 'g': re = makeGraphemeClusterBoundary(); 344 case 'g': 345 re = makeGraphemeClusterBoundary(complemented ? GraphemeBoundary::Sense::Negative : GraphemeBoundary::Sense::Positive); 346 break; 364 347 case 'w': throw ParseFailure("\\b{w} not yet supported."); 365 348 case 'l': throw ParseFailure("\\b{l} not yet supported."); … … 371 354 } 372 355 ++mCursor; 373 return complemented ? makeComplement(re) :re;356 return re; 374 357 } 375 358 case 'd': … … 421 404 // to get to the next extended grapheme cluster boundary. 422 405 ++mCursor; 423 return makeGraphemeBoundary(makeAny(), GraphemeBoundary::Type::ClusterBoundary); 406 // return makeSeq({makeRep(makeAny(), 1, Rep::UNBOUNDED_REP), makeGraphemeClusterBoundary()}); 407 return makeGraphemeClusterBoundary(GraphemeBoundary::Sense::Positive, makeAny()); 424 408 case 'N': 425 409 if (*++mCursor != '{') { -
icGREP/icgrep-devel/icgrep/re/re_simplifier.cpp
r4405 r4841 1 1 #include "re_simplifier.h" 2 #include "re_cc.h" 3 #include "re_name.h" 4 #include "re_start.h" 5 #include "re_end.h" 6 #include "re_seq.h" 7 #include "re_alt.h" 8 #include "re_rep.h" 9 #include "re_diff.h" 10 #include "re_intersect.h" 11 #include "re_assertion.h" 2 #include <re/re_name.h> 3 #include <re/re_any.h> 4 #include <re/re_start.h> 5 #include <re/re_end.h> 6 #include <re/re_alt.h> 7 #include <re/re_cc.h> 8 #include <re/re_seq.h> 9 #include <re/re_rep.h> 10 #include <re/re_diff.h> 11 #include <re/re_intersect.h> 12 #include <re/re_assertion.h> 13 #include <re/re_grapheme_boundary.hpp> 12 14 #include <algorithm> 13 15 #include <memory> … … 24 26 } 25 27 re = makeAlt(list.begin(), list.end()); 26 } 27 else if (Seq * seq = dyn_cast<Seq>(re)) { 28 } else if (Seq * seq = dyn_cast<Seq>(re)) { 28 29 std::vector<RE*> list; 29 30 list.reserve(seq->size()); … … 32 33 } 33 34 re = makeSeq(list.begin(), list.end()); 34 } 35 else if (Assertion * a = dyn_cast<Assertion>(re)) { 35 } else if (Assertion * a = dyn_cast<Assertion>(re)) { 36 36 re = makeAssertion(simplify(a->getAsserted()), a->getKind(), a->getSense()); 37 } 38 else if (Rep * rep = dyn_cast<Rep>(re)) { 37 } else if (Rep * rep = dyn_cast<Rep>(re)) { 39 38 re = makeRep(simplify(rep->getRE()), rep->getLB(), rep->getUB()); 40 } 41 else if (Diff * diff = dyn_cast<Diff>(re)) { 39 } else if (Diff * diff = dyn_cast<Diff>(re)) { 42 40 re = makeDiff(simplify(diff->getLH()), diff->getRH()); 43 } 44 else if (Intersect * e = dyn_cast<Intersect>(re)) { 41 } else if (Intersect * e = dyn_cast<Intersect>(re)) { 45 42 re = makeIntersect(simplify(e->getLH()), e->getRH()); 46 43 } -
icGREP/icgrep-devel/icgrep/re/re_simplifier.h
r4203 r4841 2 2 #define RE_SIMPLIFIER_H 3 3 4 //Regular Expressions5 #include "re_seq.h"6 #include <list>7 8 4 namespace re { 9 5 10 class Alt;6 class RE; 11 7 12 8 class RE_Simplifier {
Note: See TracChangeset
for help on using the changeset viewer.