source: icGREP/icgrep-devel/icgrep/re/re_parser_ere.cpp

Last change on this file was 5789, checked in by cameron, 4 weeks ago

Further parser bug fixes and restructuring

File size: 2.5 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <re/re_parser_ere.h>
8#include <re/re_start.h>
9#include <re/re_end.h>
10#include <re/re_any.h>
11#include <re/re_alt.h>
12#include <re/re_seq.h>
13
14namespace re {
15
16
17RE * RE_Parser_ERE::parse_next_item() {
18    if (mCursor.noMore() || atany("*?+{|")) return nullptr;
19    else if ((mGroupsOpen > 0) && at(')')) return nullptr;
20    else if (accept('^')) return makeStart();
21    else if (accept('$')) return makeEnd();
22    else if (accept('.')) return makeAny();
23    else if (accept('(')) return parse_group();
24    else if (accept('[')) return parse_bracket_expr();
25    else if (accept('\\')) return parse_escaped();
26    else return createCC(parse_literal_codepoint());
27}
28
29// A parenthesized capture group.  Input precondition: the opening ( has been consumed
30RE * RE_Parser_ERE::parse_group() {
31    mGroupsOpen++;
32    RE * captured = parse_capture_body();
33    require(')');
34    mGroupsOpen--;
35    return captured;
36}
37
38RE * RE_Parser_ERE::parse_escaped() {
39    if (accept('b')) return makeWordBoundary();
40    if (accept('B')) return makeWordNonBoundary();
41    if (accept('s')) return makeWhitespaceSet();
42    if (accept('S')) return makeComplement(makeWhitespaceSet());
43    if (accept('<')) return makeWordBegin();
44    if (accept('>')) return makeWordEnd();
45    if (isdigit(*mCursor)) return parse_back_reference();
46    else {
47        return createCC(parse_literal_codepoint());
48    }
49}
50
51
52// Parsing items within a bracket expression.
53// Items represent individual characters or sets of characters.
54// Ranges may be formed by individual character items separated by '-'.
55// Note that there are no backslash escapes for ERE or BRE bracket expressions.
56RE * RE_Parser_ERE::parse_bracket_expr () {
57    bool negated = accept('^');
58    std::vector<RE *> items;
59    do {
60        if (accept('[')) {
61            if (accept('=')) items.push_back(parse_equivalence_class());
62            else if (accept('.')) items.push_back(range_extend(parse_collation_element()));
63            else if (accept(':')) items.push_back(parse_Posix_class());
64            else items.push_back(parse_bracket_expr());
65        } else {
66            items.push_back(range_extend(makeCC(parse_literal_codepoint())));
67        }
68    } while (mCursor.more() && !at(']'));
69    RE * t = makeAlt(items.begin(), items.end());
70    require(']');
71    if (negated) return makeComplement(t);
72    else return t;
73}
74}
Note: See TracBrowser for help on using the repository browser.