Changeset 4819
- Timestamp:
- Oct 4, 2015, 4:59:28 PM (3 years ago)
- Location:
- icGREP/icgrep-devel/icgrep
- Files:
-
- 1 deleted
- 8 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.cpp
r4814 r4819 2 2 #include <cc/cc_compiler.h> 3 3 #include <UCD/unicode_set.h> 4 #include <re/re_name.h> 4 5 #include <utf8_encoder.h> 5 6 -
icGREP/icgrep-devel/icgrep/cc/cc_namemap.cpp
r4673 r4819 9 9 #include <re/re_assertion.h> 10 10 #include <UCD/resolve_properties.h> 11 #include <re/printer_re.h> 11 12 12 13 using namespace re; … … 61 62 return f->second; 62 63 } 63 return insert(std::move(classname), (type == ByteClass) ? makeByteName(classname, cc) :makeName(classname, cc));64 return insert(std::move(classname), makeName(classname, cc)); 64 65 } 65 66 return re; -
icGREP/icgrep-devel/icgrep/re/re_compiler.cpp
r4818 r4819 18 18 #include <re/re_assertion.h> 19 19 #include <re/re_analysis.h> 20 #include <re/printer_re.h> 20 21 #include <cc/cc_namemap.hpp> 21 22 #include <pablo/codegenstate.h> -
icGREP/icgrep-devel/icgrep/re/re_memoizer.hpp
r4818 r4819 2 2 #define RE_NAMEDICTIONARY_H 3 3 4 #include <re/re_name.h> 5 #include <set> 6 4 7 namespace re { 5 8 6 class NameDictionary 7 { 8 public: 9 NameDictionary(); 9 namespace { 10 11 struct MemoizerComparator { 12 inline bool operator() (const RE * lh, const RE * rh) const{ 13 if (LLVM_LIKELY(isa<Name>(lh) && isa<Name>(rh))) { 14 return *cast<Name>(lh) < *cast<Name>(rh); 15 } else if (isa<Name>(lh)) { 16 return *cast<Name>(lh) < *cast<CC>(rh); 17 } 18 return !(*cast<Name>(rh) < *cast<CC>(lh)); 19 } 20 }; 21 22 } 23 24 struct Memoizer : private std::set<RE *, MemoizerComparator> { 25 26 inline Name * memoize(CC * cc) { 27 auto f = find(cc); 28 if (f != end()) { 29 return cast<Name>(*f); 30 } else { 31 Name * name = makeName(cc->canonicalName(CC_type::UnicodeClass), cc); 32 insert(name); 33 return name; 34 } 35 } 36 37 inline Name * memoize(Name * name) { 38 return cast<Name>(*insert(name).first); 39 } 10 40 }; 11 41 -
icGREP/icgrep-devel/icgrep/re/re_name.h
r4818 r4819 5 5 #include <re/re_cc.h> 6 6 #include <string> 7 #include <re/printer_re.h>8 7 9 8 namespace pablo { … … 43 42 mCompiled = var; 44 43 } 44 bool operator<(const Name & other) const; 45 bool operator<(const CC & other) const; 45 46 void setDefinition(RE * definition); 46 47 virtual ~Name() {} 47 48 protected: 48 49 friend Name * makeName(const std::string &, RE *); 49 friend Name * makeByteName(const std::string &, RE *);50 50 friend Name * makeName(const std::string &, const Type); 51 51 friend Name * makeName(const std::string &, const std::string &, const Type); … … 106 106 } 107 107 108 inline bool Name::operator < (const Name & other) const { 109 if (mDefinition && other.mDefinition && isa<CC>(mDefinition) && isa<CC>(other.mDefinition)) { 110 return *cast<CC>(mDefinition) < *cast<CC>(other.mDefinition); 111 } else if (mNamespaceLength < other.mNamespaceLength) { 112 return true; 113 } else if (mNamespaceLength > other.mNamespaceLength) { 114 return false; 115 } else if (mNameLength < other.mNameLength) { 116 return true; 117 } else if (mNameLength > other.mNameLength) { 118 return false; 119 } 120 const auto diff = std::memcmp(mNamespace, other.mNamespace, mNamespaceLength); 121 if (diff < 0) { 122 return true; 123 } else if (diff > 0) { 124 return false; 125 } 126 return (std::memcmp(mName, other.mName, mNameLength) < 0); 127 } 128 129 inline bool Name::operator < (const CC & other) const { 130 if (mDefinition && isa<CC>(mDefinition)) { 131 return *cast<CC>(mDefinition) < other; 132 } 133 return false; 134 } 135 108 136 inline Name * makeName(const std::string & name, const Name::Type type) { 109 137 return new Name(nullptr, 0, name.c_str(), name.length(), type, nullptr); … … 125 153 } 126 154 127 inline Name * makeByteName(const std::string & name, RE * cc) {128 if (isa<Name>(cc)) {129 return cast<Name>(cc);130 }131 else {132 return new Name(nullptr, 0, name.c_str(), name.length(), Name::Type::Byte, cc);133 }134 }135 136 155 } 137 156 -
icGREP/icgrep-devel/icgrep/re/re_parser.cpp
r4812 r4819 137 137 case ']': 138 138 if (LEGACY_UNESCAPED_RBRAK_RBRACE_ALLOWED) { 139 return build_CC(parse_utf8_codepoint());139 return createCC(parse_utf8_codepoint()); 140 140 } 141 141 else throw ParseFailure("Use \\] for literal ]."); … … 145 145 } 146 146 else if (LEGACY_UNESCAPED_RBRAK_RBRACE_ALLOWED) { 147 return build_CC(parse_utf8_codepoint());147 return createCC(parse_utf8_codepoint()); 148 148 } 149 149 else throw ParseFailure("Use \\} for literal }."); … … 158 158 return parse_escaped(); 159 159 default: 160 return build_CC(parse_utf8_codepoint());160 return createCC(parse_utf8_codepoint()); 161 161 } 162 162 } … … 383 383 return parseEscapedSet(); 384 384 else 385 return build_CC(parse_escaped_codepoint());385 return createCC(parse_escaped_codepoint()); 386 386 } 387 387 … … 495 495 } 496 496 497 Name* RE_Parser::parsePropertyExpression() {497 RE * RE_Parser::parsePropertyExpression() { 498 498 const cursor_t start = _cursor; 499 499 while (_cursor != _end && *_cursor != '}' and *_cursor != ':' and *_cursor != '=') { … … 513 513 } 514 514 515 Name * RE_Parser::createName(const std::string value) { 516 517 auto key = std::make_pair("", value); 518 auto f = mNameMap.find(key); 519 if (f != mNameMap.end()) { 520 return f->second; 521 } 522 Name * property = makeName(value, Name::Type::UnicodeProperty); 523 mNameMap.insert(std::make_pair(std::move(key), property)); 524 return property; 525 } 526 527 Name * RE_Parser::createName(const std::string prop, const std::string value) { 528 auto key = std::make_pair(prop, value); 529 auto f = mNameMap.find(key); 530 if (f != mNameMap.end()) { 531 return f->second; 532 } 533 Name * property = makeName(prop, value, Name::Type::UnicodeProperty); 534 mNameMap.insert(std::make_pair(std::move(key), property)); 535 return property; 536 } 537 538 CC * RE_Parser::parseNamePatternExpression(){ 539 540 re::ModeFlagSet outerFlags = fModeFlagSet; 515 RE * RE_Parser::parseNamePatternExpression(){ 516 517 ModeFlagSet outerFlags = fModeFlagSet; 541 518 fModeFlagSet = 1; 542 519 … … 554 531 555 532 // Embed the nameRE in ";.*$nameRE" to skip the codepoint field of Uname.txt 556 RE * embedded = makeSeq({ re::makeCC(0x3B), re::makeRep(re::makeAny(), 0, Rep::UNBOUNDED_REP), nameRE});533 RE * embedded = makeSeq({mMemoizer.memoize(makeCC(0x3B)), makeRep(makeAny(), 0, Rep::UNBOUNDED_REP), nameRE}); 557 534 Encoding encoding(Encoding::Type::UTF_8, 8); 558 535 embedded = regular_expression_passes(encoding, embedded); … … 571 548 catch (std::runtime_error e) { 572 549 releaseSlabAllocatorMemory(); 573 std::cerr << "Runtime error: " << e.what() << std::endl; 574 exit(1); 550 throw e; 575 551 } 576 552 #ifndef NDEBUG … … 586 562 void * icgrep_MCptr = engine->getPointerToFunction(nameSearchIR); 587 563 588 CC * nameSearchResult = nullptr;564 CC * result = nullptr; 589 565 if (icgrep_MCptr) { 590 566 GrepExecutor grepEngine(icgrep_MCptr); 591 567 grepEngine.setParseCodepointsOption(); 592 568 grepEngine.doGrep("../Uname.txt"); 593 nameSearchResult = grepEngine.getParsedCodepoints(); 594 } 595 596 //engine->freeMachineCodeForFunction(nameSearchIR); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6. 569 result = grepEngine.getParsedCodepoints(); 570 } 597 571 delete engine; 598 return nameSearchResult;572 return mMemoizer.memoize(result); 599 573 } 600 574 … … 672 646 // if and only if it appears immediately after the opening [ or [^ 673 647 if ( *_cursor == ']' && LEGACY_UNESCAPED_RBRAK_RBRACE_ALLOWED) { 674 cc->insert(']');648 insert(cc, ']'); 675 649 lastItemKind = CodepointItem; 676 650 lastCodepointItem = static_cast<codepoint_t> (']'); … … 679 653 else if ( *_cursor == '-' && LEGACY_UNESCAPED_HYPHEN_ALLOWED) { 680 654 ++_cursor; 681 cc->insert('-');655 insert(cc, '-'); 682 656 lastItemKind = CodepointItem; 683 657 lastCodepointItem = static_cast<codepoint_t> ('-'); … … 694 668 throw ParseFailure("Set operator has no left operand."); 695 669 } 696 if ( cc->begin() != cc->end()) {697 subexprs.push_back( cc);670 if (!cc->empty()) { 671 subexprs.push_back(mMemoizer.memoize(cc)); 698 672 } 699 673 RE * newOperand = makeAlt(subexprs.begin(), subexprs.end()); … … 720 694 throw ParseFailure("Set operator has no right operand."); 721 695 } 722 if ( cc->begin() != cc->end()) {723 subexprs.push_back( cc);696 if (!cc->empty()) { 697 subexprs.push_back(mMemoizer.memoize(cc)); 724 698 } 725 699 RE * newOperand = makeAlt(subexprs.begin(), subexprs.end()); … … 742 716 case posixPropertyOpener: { 743 717 if (lastItemKind != NoItem) { 744 if (cc->begin() != cc->end()) subexprs.push_back(cc); 718 if (!cc->empty()) { 719 subexprs.push_back(mMemoizer.memoize(cc)); 720 } 745 721 RE * newOperand = makeAlt(subexprs.begin(), subexprs.end()); 746 722 subexprs.clear(); … … 783 759 throw ParseFailure("Range operator - has illegal left operand."); 784 760 } 785 cc->insert_range(lastCodepointItem, parse_codepoint());761 insert_range(cc, lastCodepointItem, parse_codepoint()); 786 762 lastItemKind = RangeItem; 787 763 break; 788 764 case hyphenChar: 789 cc->insert('-');765 insert(cc, '-'); 790 766 lastItemKind = CodepointItem; 791 767 lastCodepointItem = static_cast<codepoint_t> ('-'); 792 768 break; 793 769 case ampChar: 794 cc->insert('&');770 insert(cc, '&'); 795 771 lastItemKind = CodepointItem; 796 772 lastCodepointItem = static_cast<codepoint_t> ('&'); … … 804 780 else { 805 781 lastCodepointItem = parse_escaped_codepoint(); 806 cc->insert(lastCodepointItem);782 insert(cc, lastCodepointItem); 807 783 lastItemKind = CodepointItem; 808 784 } … … 810 786 case emptyOperator: 811 787 lastCodepointItem = parse_utf8_codepoint(); 812 cc->insert(lastCodepointItem);788 insert(cc, lastCodepointItem); 813 789 lastItemKind = CodepointItem; 814 790 break; … … 957 933 } 958 934 959 CC * RE_Parser::build_CC(codepoint_t cp) { 960 CC * cc = makeCC(); 961 CC_add_codepoint(cc, cp); 962 return cc; 963 } 964 965 void RE_Parser::CC_add_codepoint(CC * cc, codepoint_t cp) { 935 inline Name * RE_Parser::createCC(const codepoint_t cp) { 936 CC * cc = nullptr; 937 if (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) { 938 cc = makeCC(); 939 caseInsensitiveInsert(cc, cp); 940 } else { 941 cc = makeCC(cp); 942 } 943 return mMemoizer.memoize(cc); 944 } 945 946 inline void RE_Parser::insert(CC * cc, const codepoint_t cp) { 966 947 if (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) { 967 948 caseInsensitiveInsert(cc, cp); … … 971 952 } 972 953 973 void RE_Parser::CC_add_range(CC * cc, codepoint_t lo,codepoint_t hi) {954 inline void RE_Parser::insert_range(CC * cc, const codepoint_t lo, const codepoint_t hi) { 974 955 if (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) { 975 956 caseInsensitiveInsertRange(cc, lo, hi); … … 995 976 } 996 977 997 inline Name* RE_Parser::makeDigitSet() {978 inline RE * RE_Parser::makeDigitSet() { 998 979 return createName("nd"); 999 980 } 1000 981 1001 inline Name* RE_Parser::makeAlphaNumeric() {982 inline RE * RE_Parser::makeAlphaNumeric() { 1002 983 return createName("alnum"); 1003 984 } 1004 985 1005 inline Name* RE_Parser::makeWhitespaceSet() {986 inline RE * RE_Parser::makeWhitespaceSet() { 1006 987 return createName("whitespace"); 1007 988 } 1008 989 1009 inline Name* RE_Parser::makeWordSet() {990 inline RE * RE_Parser::makeWordSet() { 1010 991 return createName("word"); 1011 992 } 1012 993 1013 } 994 RE * RE_Parser::createName(const std::string value) { 995 auto key = std::make_pair("", value); 996 auto f = mNameMap.find(key); 997 if (f != mNameMap.end()) { 998 return f->second; 999 } 1000 RE * const property = mMemoizer.memoize(makeName(value, Name::Type::UnicodeProperty)); 1001 mNameMap.insert(std::make_pair(std::move(key), property)); 1002 return property; 1003 } 1004 1005 RE * RE_Parser::createName(const std::string prop, const std::string value) { 1006 auto key = std::make_pair(prop, value); 1007 auto f = mNameMap.find(key); 1008 if (f != mNameMap.end()) { 1009 return f->second; 1010 } 1011 RE * const property = mMemoizer.memoize(makeName(prop, value, Name::Type::UnicodeProperty)); 1012 mNameMap.insert(std::make_pair(std::move(key), property)); 1013 return property; 1014 } 1015 1016 } -
icGREP/icgrep-devel/icgrep/re/re_parser.h
r4809 r4819 16 16 #include <memory> 17 17 #include <map> 18 18 #include <re/re_memoizer.hpp> 19 19 20 20 namespace re { … … 43 43 private: 44 44 45 using NameMap = std::map<std::pair<std::string, std::string>, re:: Name*>;45 using NameMap = std::map<std::pair<std::string, std::string>, re::RE *>; 46 46 47 typedef std::string::const_iterator cursor_t;47 using cursor_t = std::string::const_iterator; 48 48 49 49 RE_Parser(const std::string & regular_expression); … … 73 73 codepoint_t parse_utf8_codepoint(); 74 74 75 Name* parsePropertyExpression();75 RE * parsePropertyExpression(); 76 76 77 CC* parseNamePatternExpression();77 RE * parseNamePatternExpression(); 78 78 79 79 RE * makeComplement(RE * s); 80 80 RE * makeWordBoundary(); 81 81 RE * makeWordNonBoundary(); 82 Name* makeDigitSet();83 Name* makeAlphaNumeric();84 Name* makeWhitespaceSet();85 Name* makeWordSet();82 RE * makeDigitSet(); 83 RE * makeAlphaNumeric(); 84 RE * makeWhitespaceSet(); 85 RE * makeWordSet(); 86 86 87 Name* createName(const std::string value);88 Name* createName(const std::string prop, const std::string value);87 RE * createName(const std::string value); 88 RE * createName(const std::string prop, const std::string value); 89 89 90 90 CharsetOperatorKind getCharsetOperator(); … … 103 103 104 104 // CC insertion dependent on case-insensitive flag. 105 CC * build_CC(codepoint_t cp); 106 107 void CC_add_codepoint(CC * cc, codepoint_t cp); 108 109 void CC_add_range(CC * cc, codepoint_t lo, codepoint_t hi); 105 Name * createCC(const codepoint_t cp); 106 void insert(CC * cc, const codepoint_t cp); 107 void insert_range(CC * cc, const codepoint_t lo, const codepoint_t hi); 110 108 111 109 static std::string canonicalize(const cursor_t begin, const cursor_t end); … … 118 116 bool fNested; 119 117 NameMap mNameMap; 118 Memoizer mMemoizer; 120 119 }; 121 120 -
icGREP/icgrep-devel/icgrep/toolchain.cpp
r4815 r4819 100 100 std::cerr << "RemoveNullableSuffix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl; 101 101 } 102 103 cc::CC_NameMap nameMap;104 re_ast = nameMap.process(re_ast, re::UnicodeClass);105 106 // std::cerr << "-----------------------------" << std::endl;107 108 if (PrintAllREs || PrintNamedREs) {109 std::cerr << "Namer:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;110 std::cerr << "NameMap:\n" << nameMap.printMap() << std::endl;111 }112 102 113 103 re_ast = re::RE_Simplifier::simplify(re_ast); -
icGREP/icgrep-devel/icgrep/utf8_encoder.h
r4814 r4819 8 8 #define UTF8_ENCODER_H 9 9 10 //Regular Expressions11 10 #include <re/re_cc.h> 12 #include <cc/cc_namemap.hpp>13 11 14 12 namespace cc { 15 16 class CC_NameMap;17 13 18 14 struct UTF8_Encoder {
Note: See TracChangeset
for help on using the changeset viewer.