Ignore:
Timestamp:
Sep 23, 2014, 3:15:47 PM (5 years ago)
Author:
nmedfort
Message:

Some refactoring of the RE CC class and CC Compiler; Moved RE into re subdirectory.

Location:
icGREP/icgrep-devel/icgrep/re
Files:
1 added
1 moved

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r4182 r4187  
    3131}
    3232
     33template<class T>
     34inline static RE * simplify_vector(T & vec) {
     35    RE * re;
     36    if (vec->size() == 1) {
     37        re = vec->back();
     38        vec->pop_back();
     39    }
     40    else {
     41        re = vec.release();
     42    }
     43    return re;
     44}
     45
    3346RE * RE_Parser::parse_alt(const bool subexpression) {
    3447    std::unique_ptr<Alt> alt(new Alt());
     
    5366        throw ParseFailure("Cannot fully parse statement!");
    5467    }
    55 
    56     RE * re;
    57     if (alt->size() == 1) {
    58         re = alt->back();
    59         alt->pop_back();
    60     }
    61     else {
    62         re = alt.release();
    63     }
    64     return re;
     68    return simplify_vector(alt);
    6569}
    6670
     
    7882        throw NoRegularExpressionFound();
    7983    }
    80 
    81     RE * re;
    82     if (seq->size() == 1) {
    83         re = seq->back();
    84         seq->pop_back();
    85     }
    86     else {
    87         re = seq.release();
    88     }
    89     return re;
     84    return simplify_vector(seq);
    9085}
    9186
     
    139134        case '*':
    140135            ++_cursor; // skip past the '*'
    141             re = new Rep(re, 0, UNBOUNDED_REP);
     136            re = new Rep(re, 0, Rep::UNBOUNDED_REP);
    142137            break;
    143138        case '?':
     
    147142        case '+':
    148143            ++_cursor; // skip past the '+'
    149             re = new Rep(re, 1, UNBOUNDED_REP);
     144            re = new Rep(re, 1, Rep::UNBOUNDED_REP);
    150145            break;
    151146        case '{':
     
    182177        throw_incomplete_expression_error_if_end_of_stream();
    183178        if (*_cursor == '}') {
    184             rep = new Rep(re, lower_bound, UNBOUNDED_REP);
     179            rep = new Rep(re, lower_bound, Rep::UNBOUNDED_REP);
    185180        }
    186181        else {
     
    273268        }
    274269        name->setName(std::string(start, _cursor));
    275         if (isValidUnicodeCategoryName(name)) {
    276             ++_cursor;
    277             return name.release();
    278         }
     270        ++_cursor;
     271        return name.release();
    279272    }
    280273    throw ParseFailure("Incorrect Unicode character class format!");
     
    301294                // close the bracket expression.
    302295                if (start == _cursor) {
    303                     cc->insert1(']');
     296                    cc->insert(']');
    304297                    ++_cursor;
    305298                    included_closing_square_bracket = true;
     
    323316                    if ((start == _cursor) ? (*next != '-') : (*next == ']')) {
    324317                        _cursor = next;
    325                         cc->insert1('-');
     318                        cc->insert('-');
    326319                        break;
    327320                    }
     
    351344                }
    352345            }
    353             cc->insert1(low);
     346            cc->insert(low);
    354347        }
    355348    }
     
    369362    if (*_cursor == '\\') {
    370363        if (++_cursor == _end) {
    371             return false;
     364            throw ParseFailure("Unknown charset escape!");
    372365        }
    373366        switch (*_cursor) {
     
    431424
    432425inline void RE_Parser::negate_cc(std::unique_ptr<CC> & cc) {
    433     cc->negate_class();
    434     cc->remove1(10);
    435 }
    436 
    437 bool RE_Parser::isValidUnicodeCategoryName(const std::unique_ptr<Name> & name) {
    438     static const char * SET_OF_VALID_CATEGORIES[] = {
    439         "C", "Cc", "Cf", "Cn", "Co", "Cs",
    440         "L", "L&", "Lc", "Ll", "Lm", "Lo", "Lt", "Lu",
    441         "M", "Mc", "Me", "Mn",
    442         "N", "Nd", "Nl", "No",
    443         "P", "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps",
    444         "S", "Sc", "Sk", "Sm", "So",
    445         "Z", "Zl", "Zp", "Zs"
    446     };
    447     // NOTE: this method isn't as friendly as using an unordered_set for VALID_CATEGORIES since it requires
    448     // that the set is in ALPHABETICAL ORDER; however it ought to have less memory overhead than an
    449     // unordered_set and roughly equivalent speed.
    450     return std::binary_search(std::begin(SET_OF_VALID_CATEGORIES), std::end(SET_OF_VALID_CATEGORIES), name->getName());
     426    cc->negate();
     427    cc->remove(10);
    451428}
    452429
Note: See TracChangeset for help on using the changeset viewer.