Ignore:
Timestamp:
Jul 7, 2017, 2:27:56 PM (2 years ago)
Author:
cameron
Message:

-enable-byte-mode initial check-in

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r5537 r5554  
    4242}
    4343
    44 RE * RE_Parser::parse(const std::string & regular_expression, ModeFlagSet initialFlags, RE_Syntax syntax) {
     44RE * RE_Parser::parse(const std::string & regular_expression, ModeFlagSet initialFlags, RE_Syntax syntax, bool ByteMode) {
    4545    std::unique_ptr<RE_Parser> parser = nullptr;
    4646    switch (syntax) {
     
    6262            break;
    6363    }
     64    parser->fByteMode = ByteMode;
    6465    parser->fModeFlagSet = initialFlags;
    6566    parser->fNested = false;
     
    7475
    7576RE_Parser::RE_Parser(const std::string & regular_expression)
    76 : fModeFlagSet(0)
     77: fByteMode(false)
     78, fModeFlagSet(0)
    7779, fNested(false)
    7880, fGraphemeBoundaryPending(false)
     
    164166            case ']':
    165167                if (LEGACY_UNESCAPED_RBRAK_RBRACE_ALLOWED) {
    166                     return createCC(parse_utf8_codepoint());
     168                    return createCC(parse_literal_codepoint());
    167169                }
    168170                ParseFailure("Use  \\] for literal ].");
     
    171173                    break;  //  a recursive invocation for a regexp in \N{...}
    172174                } else if (LEGACY_UNESCAPED_RBRAK_RBRACE_ALLOWED) {
    173                     return createCC(parse_utf8_codepoint());
     175                    return createCC(parse_literal_codepoint());
    174176                }
    175177                ParseFailure("Use \\} for literal }.");
     
    188190                return parse_escaped();
    189191            default:
    190                 re = createCC(parse_utf8_codepoint());
     192                re = createCC(parse_literal_codepoint());
    191193                if ((fModeFlagSet & ModeFlagType::GRAPHEME_CLUSTER_MODE) != 0) {
    192194                    fGraphemeBoundaryPending = true;
     
    534536void InvalidUTF8Encoding() {
    535537    RE_Parser::ParseFailure("Invalid UTF-8 encoding!");
     538}
     539
     540codepoint_t RE_Parser::parse_literal_codepoint() {
     541    if (fByteMode) {
     542       return static_cast<uint8_t>(*mCursor++);
     543    }
     544    else return parse_utf8_codepoint();
    536545}
    537546
     
    909918                break;
    910919            case emptyOperator:
    911                 lastCodepointItem = parse_utf8_codepoint();
     920                lastCodepointItem = parse_literal_codepoint();
    912921                insert(cc, lastCodepointItem);
    913922                lastItemKind = CodepointItem;
     
    924933        return parse_escaped_codepoint();
    925934    } else {
    926         return parse_utf8_codepoint();
     935        return parse_literal_codepoint();
    927936    }
    928937}
     
    10021011            // Escaped letters should be reserved for special functions.
    10031012            if (((*mCursor >= 'A') && (*mCursor <= 'Z')) || ((*mCursor >= 'a') && (*mCursor <= 'z'))){
    1004                 //Escape unknow letter will be parse as normal letter
    1005                 return parse_utf8_codepoint();
     1013                //Escape unknown letter will be parse as normal letter
     1014                return parse_literal_codepoint();
    10061015                //ParseFailure("Undefined or unsupported escape sequence");
    10071016            }
Note: See TracChangeset for help on using the changeset viewer.