Ignore:
Timestamp:
Oct 4, 2016, 3:26:12 PM (3 years ago)
Author:
xwa163
Message:

Support BRE and ERE for regex syntax.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r5161 r5180  
    66
    77#include <re/re_parser.h>
     8#include <re/re_parser_helper.h>
     9#include <re/re_parser_pcre.h>
     10#include <re/re_parser_ere.h>
     11#include <re/re_parser_bre.h>
    812#include <re/re_name.h>
    913#include <re/re_alt.h>
     
    2327#include <algorithm>
    2428
    25 // It would probably be best to enforce that {}, [], () must always
    26 // be balanced.   But legacy syntax allows } and ] to occur as literals
    27 // in certain contexts (no opening { or [, or immediately after [ or [^ ).
    28 // Perhaps this define should become a parameter.
    29 #define LEGACY_UNESCAPED_RBRAK_RBRACE_ALLOWED true
    30 #define LEGACY_UNESCAPED_HYPHEN_ALLOWED true
    31 
    32 
    33 
    34 
    3529namespace re {
    3630   
    3731
    38 RE * RE_Parser::parse(const std::string & regular_expression, ModeFlagSet initialFlags) {
    39     RE_Parser parser(regular_expression);
    40     parser.fModeFlagSet = initialFlags;
    41     parser.fNested = false;
    42     parser.fGraphemeBoundaryPending = false;
    43     parser.mCaptureGroupCount = 0;
    44     RE * re = parser.parse_RE();
     32RE * RE_Parser::parse(const std::string & regular_expression, ModeFlagSet initialFlags, RE_Syntax syntax) {
     33    std::unique_ptr<RE_Parser> parser = nullptr;
     34
     35    switch (syntax) {
     36        case RE_Syntax::PCRE:
     37            parser = llvm::make_unique<RE_Parser_PCRE>(regular_expression);
     38            break;
     39        case RE_Syntax::ERE:
     40            parser = llvm::make_unique<RE_Parser_ERE>(regular_expression);
     41            break;
     42        case RE_Syntax ::BRE:
     43            parser = llvm::make_unique<RE_Parser_BRE>(regular_expression);
     44            break;
     45        default:
     46            //TODO handle FixString
     47            ParseFailure("Unsupport RE syntax!");
     48            break;
     49    }
     50
     51
     52    parser->fModeFlagSet = initialFlags;
     53    parser->fNested = false;
     54    parser->fGraphemeBoundaryPending = false;
     55    parser->mCaptureGroupCount = 0;
     56    RE * re = parser->parse_RE();
    4557    if (re == nullptr) {
    4658        ParseFailure("An unexpected parsing error occurred!");
     
    5365    , fNested(false)
    5466    , fGraphemeBoundaryPending(false)
     67    , fSupportNonCaptureGroup(false)
    5568    , mCursor(regular_expression)
    5669    , mCaptureGroupCount(0)
     
    164177    const ModeFlagSet modeFlagSet = fModeFlagSet;
    165178    RE * group_expr = nullptr;
    166     if (*mCursor == '?') {
     179    if (*mCursor == '?' && fSupportNonCaptureGroup) {
    167180        switch (*++mCursor) {
    168181            case '#':  // comment
     
    302315inline std::pair<int, int> RE_Parser::parse_range_bound() {
    303316    int lower_bound = 0, upper_bound = 0;
    304     if (*++mCursor == ',') {
    305         ++mCursor;
    306     } else {
     317    if (*++mCursor != ',') {
    307318        lower_bound = parse_int();
    308319    }
     
    332343
    333344
    334 #define bit3C(x) (1ULL << ((x) - 0x3C))
    335345const uint64_t setEscapeCharacters = bit3C('b') | bit3C('p') | bit3C('q') | bit3C('d') | bit3C('w') | bit3C('s') | bit3C('<') | bit3C('>') |
    336346                                     bit3C('B') | bit3C('P') | bit3C('Q') | bit3C('D') | bit3C('W') | bit3C('S') | bit3C('N') | bit3C('X');
    337347
    338 inline bool isSetEscapeChar(char c) {
     348inline bool RE_Parser::isSetEscapeChar(char c) {
    339349    return c >= 0x3C && c <= 0x7B && ((setEscapeCharacters >> (c - 0x3C)) & 1) == 1;
    340350}
     
    582592}
    583593
     594inline bool RE_Parser::isUnsupportChartsetOperator(char c) {
     595    return false;
     596}
     597
    584598CharsetOperatorKind RE_Parser::getCharsetOperator() {
     599    if (isUnsupportChartsetOperator(*mCursor)) {
     600        return emptyOperator;
     601    }
    585602    switch (*mCursor) {
    586603        case '&':
     
    876893        default:
    877894            // Escaped letters should be reserved for special functions.
    878             if (((*mCursor >= 'A') && (*mCursor <= 'Z')) || ((*mCursor >= 'a') && (*mCursor <= 'z')))
    879                 ParseFailure("Undefined or unsupported escape sequence");
     895            if (((*mCursor >= 'A') && (*mCursor <= 'Z')) || ((*mCursor >= 'a') && (*mCursor <= 'z'))){
     896                //Escape unknow letter will be parse as normal letter
     897                return parse_utf8_codepoint();
     898                //ParseFailure("Undefined or unsupported escape sequence");
     899            }
    880900            else if ((*mCursor < 0x20) || (*mCursor >= 0x7F))
    881901                ParseFailure("Illegal escape sequence");
Note: See TracChangeset for help on using the changeset viewer.