Ignore:
Timestamp:
Jan 15, 2018, 4:48:02 PM (18 months ago)
Author:
nmedfort
Message:

Revised RE_Minimizer to use alphabets + minor optimizations to RE functions

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r5816 r5835  
    6666    RE * re = parser->parse_RE();
    6767    if (re == nullptr) {
    68         ParseFailure("An unexpected parsing error occurred!");
     68        parser->ParseFailure("An unexpected parsing error occurred!");
    6969    }
    7070    return re;
    7171}
    7272
    73 RE_Parser::RE_Parser(const std::string & regular_expression)
    74 : fByteMode(false)
    75 , fModeFlagSet(MULTILINE_MODE_FLAG)
    76 , fNested(false)
    77 , mGroupsOpen(0)
    78 , mCursor(regular_expression)
    79 , mCaptureGroupCount(0)
    80 , mReSyntax(RE_Syntax::PCRE)
    81 {
    82 
    83 }
    84 
    85 RE * makeAtomicGroup(RE * r) {
    86     RE_Parser::ParseFailure("Atomic grouping not supported.");
    87 }
    88 
    89 RE * makeBranchResetGroup(RE * r) {
     73RE * RE_Parser::makeAtomicGroup(RE * r) {
     74    ParseFailure("Atomic grouping not supported.");
     75}
     76
     77RE * RE_Parser::makeBranchResetGroup(RE * r) {
    9078    // Branch reset groups only affect submatch numbering, but
    9179    // this has no effect in icgrep.
    92     RE_Parser::ParseFailure("Branch reset groups not supported.");
     80    ParseFailure("Branch reset groups not supported.");
    9381}
    9482
     
    457445    }
    458446}
    459    
    460 void InvalidUTF8Encoding() {
    461     RE_Parser::ParseFailure("Invalid UTF-8 encoding!");
    462 }
    463447
    464448codepoint_t RE_Parser::parse_literal_codepoint() {
     
    471455codepoint_t RE_Parser::parse_utf8_codepoint() {
    472456    // Must cast to unsigned char to avoid sign extension.
    473     unsigned char pfx = static_cast<unsigned char>(*mCursor++);
     457    const unsigned char pfx = static_cast<unsigned char>(*mCursor++);
    474458    codepoint_t cp = pfx;
    475459    if (pfx < 0x80) return cp;
     
    492476            InvalidUTF8Encoding();
    493477        }
    494         char_t sfx = *mCursor++;
     478        const char_t sfx = *mCursor++;
    495479        if ((sfx & 0xC0) != 0x80) {
    496480            InvalidUTF8Encoding();
     
    892876}
    893877
    894 LLVM_ATTRIBUTE_NORETURN void RE_Parser::ParseFailure(std::string errmsg) {
     878RE_Parser::RE_Parser(const std::string & regular_expression)
     879: fByteMode(false)
     880, fModeFlagSet(MULTILINE_MODE_FLAG)
     881, fNested(false)
     882, mGroupsOpen(0)
     883, mCursor(regular_expression)
     884, mCaptureGroupCount(0)
     885, mReSyntax(RE_Syntax::PCRE) {
     886
     887}
     888
     889LLVM_ATTRIBUTE_NORETURN void RE_Parser::InvalidUTF8Encoding() {
     890    ParseFailure("Invalid UTF-8 encoding!");
     891}
     892
     893LLVM_ATTRIBUTE_NORETURN void RE_Parser::Cursor::IncompleteRegularExpression() {
     894    ParseFailure("Incomplete regular expression!");
     895}
     896
     897LLVM_ATTRIBUTE_NORETURN void RE_Parser::Cursor::ParseFailure(const std::string & errmsg) {
     898#if 0
     899    // TODO: this ought to check if the cursor position is on a UTF-8 character
     900    raw_fd_ostream out(STDERR_FILENO, false);
     901    out.changeColor(raw_string_ostream::WHITE);
     902    out.write(mStart.base(), mCursor - mStart);
     903    out.changeColor(raw_string_ostream::BLUE, true);
     904    out << *mCursor;
     905    out.changeColor(raw_string_ostream::WHITE);
     906    out.write(mCursor.base() + 1, mEnd - mCursor - 1);
     907    out << "\n\n";
     908#endif
    895909    llvm::report_fatal_error(errmsg);
    896910}
Note: See TracChangeset for help on using the changeset viewer.