- Timestamp:
- Sep 15, 2018, 5:56:24 PM (5 months ago)
- Location:
- icGREP/icgrep-devel/icgrep
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/UCD/resolve_properties.cpp
r5998 r6161 67 67 } else if (value == "\\b{g}") { 68 68 RE * gcb = generateGraphemeClusterBoundaryRule(); 69 property->setDefinition(resolveUnicode Properties(gcb));69 property->setDefinition(resolveUnicodeNames(gcb)); 70 70 return true; 71 71 } else if (value == "^s") { // "start anchor (^) in single-line mode" … … 109 109 // resolve a regular expression 110 110 re::RE * propValueRe = RE_Parser::parse(value.substr(1), re::DEFAULT_MODE, re::PCRE, false); 111 propValueRe = re::resolve Names(propValueRe); // Recursive name resolution may be required.111 propValueRe = re::resolveUnicodeNames(propValueRe); // Recursive name resolution may be required. 112 112 return propObj->GetCodepointSetMatchingPattern(propValueRe); 113 113 } -
icGREP/icgrep-devel/icgrep/kernels/grapheme_kernel.cpp
r5888 r6161 41 41 nameMap.emplace(name, nullptr); 42 42 } 43 GCB = resolveUnicode Properties(GCB);43 GCB = resolveUnicodeNames(GCB); 44 44 ucdCompiler.generateWithDefaultIfHierarchy(nameMap, pb); 45 45 re_compiler.addPrecompiled("UTF8_nonfinal", pb.createExtract(getInputStreamVar("nonFinal"), pb.getInteger(0))); -
icGREP/icgrep-devel/icgrep/re/exclude_CC.cpp
r5929 r6161 19 19 #include <re/re_intersect.h> 20 20 #include <re/re_assertion.h> 21 #include <re/re_utility.h> 21 22 #include <llvm/Support/Casting.h> 22 23 #include <llvm/Support/ErrorHandling.h> … … 25 26 26 27 namespace re { 28 29 class CC_Remover : public RE_Transformer { 30 public: 31 CC_Remover(CC * toExclude) : RE_Transformer(), mExcludedCC(toExclude) {} 32 RE * transformCC (CC * cc) override; 33 RE * transformName (Name * name) override; 34 private: 35 CC * mExcludedCC; 36 }; 37 38 RE * CC_Remover::transformCC(CC * cc) { 39 if (intersects(mExcludedCC, cc)) return subtractCC(cc, mExcludedCC); 40 else return cc; 41 } 27 42 28 bool mayMatchCC(RE * re, CC * cc) { 29 if (CC * cc0 = dyn_cast<CC>(re)) { 30 return intersects(cc0, cc); 31 } else if (Seq * seq = dyn_cast<Seq>(re)) { 32 for (auto s : * seq) { 33 if (mayMatchCC(s, cc)) return true; 34 } 35 return false; 36 } else if (Alt * alt = dyn_cast<Alt>(re)) { 37 for (auto a : * alt) { 38 if (mayMatchCC(a, cc)) return true; 39 } 40 return false; 41 } else if (Rep * rep = dyn_cast<Rep>(re)) { 42 return mayMatchCC(rep->getRE(), cc); 43 } else if (Group * g = dyn_cast<Group>(re)) { 44 return mayMatchCC(g->getRE(), cc); 45 } else if (Diff * diff = dyn_cast<Diff>(re)) { 46 // We only need exclude from the LH operand. 47 return mayMatchCC(diff->getLH(), cc); 48 } else if (Intersect * e = dyn_cast<Intersect>(re)) { 49 // We only need check one of the operands. 50 return mayMatchCC(e->getLH(), cc); 51 } else if (isa<Start>(re) || isa<End>(re) || isa<Assertion>(re)) { 52 return false; 53 } else if (Name * n = dyn_cast<Name>(re)) { 54 if (n->getType() == Name::Type::ZeroWidth) { 55 return false; 56 } 57 RE * defn = n->getDefinition(); 58 return mayMatchCC(defn, cc); 59 } else { 60 report_fatal_error("exclude_CC: unhandled regexp type"); 43 RE * CC_Remover::transformName(Name * n) { 44 switch (n->getType()) { 45 case Name::Type::Reference: 46 case Name::Type::ZeroWidth: 47 return n; 48 case Name::Type::Capture: 49 return makeCapture(n->getName(), transform(n->getDefinition())); 50 default: 51 RE * defn = n->getDefinition(); 52 if (const CC * cc0 = dyn_cast<CC>(defn)) { 53 if (!intersects(mExcludedCC, cc0)) return n; 54 } 55 std::string cc_name = n->getName() + "--" + mExcludedCC->canonicalName(); 56 return makeName(cc_name, Name::Type::Unicode, transform(defn)); 57 /* 58 return transform(defn); 59 */ 61 60 } 62 61 } 63 64 62 65 63 RE * exclude_CC(RE * re, CC * cc) { 66 if (!mayMatchCC(re, cc)) return re; 67 if (CC * cc0 = dyn_cast<CC>(re)) { 68 if (intersects(cc0, cc)) return subtractCC(cc0, cc); 69 else return cc0; 70 } else if (Seq * seq = dyn_cast<Seq>(re)) { 71 std::vector<RE*> list; 72 for (auto s : * seq) { 73 list.push_back(exclude_CC(s, cc)); 74 } 75 return makeSeq(list.begin(), list.end()); 76 } else if (Alt * alt = dyn_cast<Alt>(re)) { 77 std::vector<RE*> list; 78 for (auto a : * alt) { 79 list.push_back(exclude_CC(a, cc)); 80 } 81 return makeAlt(list.begin(), list.end()); 82 } else if (Rep * rep = dyn_cast<Rep>(re)) { 83 return makeRep(exclude_CC(rep->getRE(), cc), rep->getLB(), rep->getUB()); 84 } else if (Group * g = dyn_cast<Group>(re)) { 85 return makeGroup(g->getMode(), exclude_CC(g->getRE(), cc), g->getSense()); 86 } else if (Diff * diff = dyn_cast<Diff>(re)) { 87 // We only need exclude from the LH operand. 88 return makeDiff(exclude_CC(diff->getLH(), cc), diff->getRH()); 89 } else if (Intersect * e = dyn_cast<Intersect>(re)) { 90 // We only need exclude from the one of the operands. 91 return makeIntersect(exclude_CC(e->getLH(), cc), e->getRH()); 92 } else if (isa<Start>(re) || isa<End>(re) || isa<Assertion>(re)) { 93 return re; 94 } else if (Name * n = dyn_cast<Name>(re)) { 95 switch (n->getType()) { 96 case Name::Type::Reference: 97 case Name::Type::ZeroWidth: 98 return re; 99 case Name::Type::Capture: 100 return makeCapture(n->getName(), exclude_CC(n->getDefinition(), cc)); 101 default: 102 RE * defn = n->getDefinition(); 103 if (const CC * cc0 = dyn_cast<CC>(defn)) { 104 if (!intersects(cc0, cc)) return re; 105 } 106 std::string cc_name = n->getName() + "--" + cc->canonicalName(); 107 return makeName(cc_name, Name::Type::Unicode, exclude_CC(defn, cc)); 108 /* 109 return exclude_CC(defn, cc); 110 */ 111 } 112 } else { 113 report_fatal_error("exclude_CC: unhandled regexp type"); 114 } 64 return CC_Remover(cc).transform(re); 115 65 } 116 66 } -
icGREP/icgrep-devel/icgrep/re/exclude_CC.h
r5929 r6161 12 12 class CC; 13 13 14 /* Return true if a string matched by r may contain a character in cc. */15 bool mayMatchCC(RE * r, CC * cc);16 17 14 /* Transform a regular expression r so that matched strings do not include 18 15 matches to any character within the given character class cc. -
icGREP/icgrep-devel/icgrep/re/re_name_resolve.cpp
r6160 r6161 28 28 namespace re { 29 29 30 struct NameResolver { 31 RE * resolveUnicodeProperties(RE * re) { 32 if (Name * name = dyn_cast<Name>(re)) { 33 auto f = mMemoizer.find(name); 34 if (f == mMemoizer.end()) { 35 if (LLVM_LIKELY(name->getDefinition() != nullptr)) { 36 name->setDefinition(resolveUnicodeProperties(name->getDefinition())); 37 } else if (LLVM_LIKELY(name->getType() == Name::Type::UnicodeProperty || name->getType() == Name::Type::ZeroWidth)) { 38 if (UCD::resolvePropertyDefinition(name)) { 39 name->setDefinition(resolveUnicodeProperties(name->getDefinition())); 40 } else { 41 name->setDefinition(makeCC(UCD::resolveUnicodeSet(name), &cc::Unicode)); 42 } 43 } else { 44 UndefinedNameError(name); 45 } 46 re = mMemoizer.memoize(name); 47 } else { 48 return *f; 49 } 50 } else if (Vector * vec = dyn_cast<Vector>(re)) { 51 for (RE *& re : *vec) { 52 re = resolveUnicodeProperties(re); 53 } 54 } else if (Rep * rep = dyn_cast<Rep>(re)) { 55 rep->setRE(resolveUnicodeProperties(rep->getRE())); 56 } else if (Assertion * a = dyn_cast<Assertion>(re)) { 57 a->setAsserted(resolveUnicodeProperties(a->getAsserted())); 58 } else if (Range * rg = dyn_cast<Range>(re)) { 59 return makeRange(resolveUnicodeProperties(rg->getLo()), 60 resolveUnicodeProperties(rg->getHi())); 61 } else if (Diff * diff = dyn_cast<Diff>(re)) { 62 diff->setLH(resolveUnicodeProperties(diff->getLH())); 63 diff->setRH(resolveUnicodeProperties(diff->getRH())); 64 } else if (Intersect * ix = dyn_cast<Intersect>(re)) { 65 ix->setLH(resolveUnicodeProperties(ix->getLH())); 66 ix->setRH(resolveUnicodeProperties(ix->getRH())); 67 } else if (Group * g = dyn_cast<Group>(re)) { 68 g->setRE(resolveUnicodeProperties(g->getRE())); 69 } 70 return re; 71 } 72 73 RE * resolve(RE * re) { 74 if (Name * name = dyn_cast<Name>(re)) { 75 auto f = mMemoizer.find(name); 76 if (f == mMemoizer.end()) { 77 if (LLVM_LIKELY(name->getDefinition() != nullptr)) { 78 name->setDefinition(resolve(name->getDefinition())); 79 } else { 80 UndefinedNameError(name); 81 } 82 re = mMemoizer.memoize(name); 83 } else { 84 return *f; 85 } 86 } else if (Vector * vec = dyn_cast<Vector>(re)) { 87 for (RE *& re : *vec) { 88 re = resolve(re); 89 } 90 } else if (Rep * rep = dyn_cast<Rep>(re)) { 91 rep->setRE(resolve(rep->getRE())); 92 } else if (Assertion * a = dyn_cast<Assertion>(re)) { 93 a->setAsserted(resolve(a->getAsserted())); 94 } else if (Range * rg = dyn_cast<Range>(re)) { 95 return makeRange(resolve(rg->getLo()), resolve(rg->getHi())); 96 } else if (Diff * diff = dyn_cast<Diff>(re)) { 97 diff->setLH(resolve(diff->getLH())); 98 diff->setRH(resolve(diff->getRH())); 99 } else if (Intersect * ix = dyn_cast<Intersect>(re)) { 100 ix->setLH(resolve(ix->getLH())); 101 ix->setRH(resolve(ix->getRH())); 102 } else if (Group * g = dyn_cast<Group>(re)) { 103 g->setRE(resolve(g->getRE())); 104 } 105 return re; 106 } 107 30 class UnicodeNameResolver : public RE_Transformer { 31 public: 32 UnicodeNameResolver() : RE_Transformer() {} 33 RE * transformName(Name * name) override; 108 34 private: 109 Memoizer 35 Memoizer mMemoizer; 110 36 }; 111 37 112 RE * resolveUnicodeProperties(RE * re) { 113 NameResolver nameResolver; 114 return nameResolver.resolveUnicodeProperties(re); 38 RE * UnicodeNameResolver::transformName(Name * name) { 39 auto f = mMemoizer.find(name); 40 if (f == mMemoizer.end()) { 41 if (LLVM_LIKELY(name->getDefinition() != nullptr)) { 42 name->setDefinition(transform(name->getDefinition())); 43 } else if (LLVM_LIKELY(name->getType() == Name::Type::UnicodeProperty || name->getType() == Name::Type::ZeroWidth)) { 44 if (UCD::resolvePropertyDefinition(name)) { 45 name->setDefinition(transform(name->getDefinition())); 46 } else { 47 name->setDefinition(makeCC(UCD::resolveUnicodeSet(name), &cc::Unicode)); 48 } 49 } else { 50 UndefinedNameError(name); 51 } 52 return mMemoizer.memoize(name); 53 } else { 54 return *f; 115 55 } 116 117 RE * resolveNames(RE * re) {118 NameResolver nameResolver;119 return nameResolver.resolve(re);120 }121 122 123 124 bool hasAnchor(const RE * re) {125 if (const Alt * alt = dyn_cast<Alt>(re)) {126 for (const RE * re : *alt) {127 if (hasAnchor(re)) {128 return true;129 }130 }131 return false;132 } else if (const Seq * seq = dyn_cast<Seq>(re)) {133 for (const RE * re : *seq) {134 if (hasAnchor(re)) {135 return true;136 }137 }138 return false;139 } else if (const Rep * rep = dyn_cast<Rep>(re)) {140 return hasAnchor(rep->getRE());141 } else if (isa<Start>(re)) {142 return true;143 } else if (isa<End>(re)) {144 return true;145 } else if (const Assertion * a = dyn_cast<Assertion>(re)) {146 return hasAnchor(a->getAsserted());147 } else if (const Diff * diff = dyn_cast<Diff>(re)) {148 return hasAnchor(diff->getLH()) || hasAnchor(diff->getRH());149 } else if (const Intersect * e = dyn_cast<Intersect>(re)) {150 return hasAnchor(e->getLH()) || hasAnchor(e->getRH());151 } else if (isa<Any>(re)) {152 return false;153 } else if (isa<CC>(re)) {154 return false;155 } else if (const Group * g = dyn_cast<Group>(re)) {156 return hasAnchor(g->getRE());157 } else if (const Name * n = dyn_cast<Name>(re)) {158 return hasAnchor(n->getDefinition());159 }160 return false; // otherwise161 56 } 162 57 163 struct AnchorResolution { 58 RE * resolveUnicodeNames(RE * re) { 59 return UnicodeNameResolver().transform(re); 60 } 61 62 63 class AnchorResolution : public RE_Transformer { 64 public: 65 AnchorResolution(RE * anchorRE); 66 RE * transformStart(Start * s) override; 67 RE * transformEnd(End * s) override; 68 69 private: 164 70 RE * mAnchorRE; 165 71 bool mIsNegated; 166 RE * resolve(RE * r);167 72 }; 168 169 RE * AnchorResolution::resolve(RE * r) { 170 if (hasAnchor(r)) { 171 if (const Alt * alt = dyn_cast<Alt>(r)) { 172 std::vector<RE *> list; 173 list.reserve(alt->size()); 174 for (RE * item : *alt) { 175 item = resolve(item); 176 list.push_back(item); 177 } 178 return makeAlt(list.begin(), list.end()); 179 } else if (const Seq * seq = dyn_cast<Seq>(r)) { 180 std::vector<RE *> list; 181 list.reserve(seq->size()); 182 for (RE * item : *seq) { 183 item = resolve(item); 184 list.push_back(item); 185 } 186 return makeSeq(list.begin(), list.end()); 187 } else if (Assertion * a = dyn_cast<Assertion>(r)) { 188 return makeAssertion(resolve(a->getAsserted()), a->getKind(), a->getSense()); 189 } else if (Rep * rep = dyn_cast<Rep>(r)) { 190 return makeRep(resolve(rep->getRE()), rep->getLB(), rep->getUB()); 191 } else if (Diff * diff = dyn_cast<Diff>(r)) { 192 return makeDiff(resolve(diff->getLH()), resolve(diff->getRH())); 193 } else if (Intersect * e = dyn_cast<Intersect>(r)) { 194 return makeIntersect(resolve(e->getLH()), resolve(e->getRH())); 195 } else if (isa<Start>(r)) { 196 if (mIsNegated) { 197 return makeNegativeLookBehindAssertion(mAnchorRE); 198 } else { 199 return makeAlt({makeSOT(), makeLookBehindAssertion(mAnchorRE)}); 200 } 201 } else if (isa<End>(r)) { 202 if (mIsNegated) { 203 return makeNegativeLookAheadAssertion(mAnchorRE); 204 } else { 205 return makeAlt({makeEOT(), makeLookAheadAssertion(mAnchorRE)}); 206 } 207 } 208 } 209 return r; 210 } 211 212 RE * resolveAnchors(RE * r, RE * breakRE) { 213 AnchorResolution a; 73 74 AnchorResolution::AnchorResolution(RE * breakRE) 75 : RE_Transformer() { 214 76 if (const CC * cc = dyn_cast<CC>(breakRE)) { 215 a.mIsNegated = true;77 mIsNegated = true; 216 78 if (cc->getAlphabet() == &cc::Unicode) { 217 a.mAnchorRE = makeDiff(makeCC(0, 0x10FFFF), breakRE);79 mAnchorRE = makeDiff(makeCC(0, 0x10FFFF), breakRE); 218 80 } else if (cc->getAlphabet() == &cc::Byte) { 219 a.mAnchorRE = makeDiff(makeByte(0, 0xFF), breakRE);81 mAnchorRE = makeDiff(makeByte(0, 0xFF), breakRE); 220 82 } else { 221 83 llvm::report_fatal_error("resolveAnchors: unexpected alphabet " + cc->getAlphabet()->getName()); 222 84 } 223 85 } else { 224 a.mIsNegated = false;225 a.mAnchorRE = breakRE;86 mIsNegated = false; 87 mAnchorRE = breakRE; 226 88 } 227 return a.resolve(r); 89 } 90 91 RE * AnchorResolution::transformStart(Start * s) { 92 if (mIsNegated) return makeNegativeLookBehindAssertion(mAnchorRE); 93 return makeAlt({makeSOT(), makeLookBehindAssertion(mAnchorRE)}); 94 } 95 96 RE * AnchorResolution::transformEnd(End * e) { 97 if (mIsNegated) return makeNegativeLookAheadAssertion(mAnchorRE); 98 return makeAlt({makeEOT(), makeLookAheadAssertion(mAnchorRE)}); 99 } 100 101 RE * resolveAnchors(RE * r, RE * breakRE) { 102 return AnchorResolution(breakRE).transform(r); 228 103 } 229 104 -
icGREP/icgrep-devel/icgrep/re/re_name_resolve.h
r5872 r6161 7 7 class Name; 8 8 9 RE * resolveUnicodeProperties(RE * re); 10 RE * resolveNames(RE * re); 9 RE * resolveUnicodeNames(RE * re); 11 10 RE * resolveAnchors(RE * r, RE * breakRE); 12 11 -
icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp
r6160 r6161 71 71 errs() << "resolveGraphemeMode:\n" << Printer_RE::PrintRE(r) << '\n'; 72 72 } 73 r = re::resolveUnicode Properties(r);73 r = re::resolveUnicodeNames(r); 74 74 if (PrintOptions.isSet(ShowAllREs)) { 75 errs() << "resolveUnicode Properties:\n" << Printer_RE::PrintRE(r) << '\n';75 errs() << "resolveUnicodeNames:\n" << Printer_RE::PrintRE(r) << '\n'; 76 76 } 77 77 r = resolveCaseInsensitiveMode(r, globallyCaseInsensitive); … … 106 106 errs() << "Star_Normal_Form:\n" << Printer_RE::PrintRE(r) << '\n'; 107 107 } 108 r = re::resolveNames(r);109 if (PrintOptions.isSet(ShowAllREs)) {110 errs() << "Resolve Names:\n" << Printer_RE::PrintRE(r) << '\n';111 }112 108 if (codegen::OptLevel > 1) { 113 109 r = RE_Minimizer::minimize(r);
Note: See TracChangeset
for help on using the changeset viewer.