source: icGREP/icgrep-devel/icgrep/re/re_parser_ere.cpp @ 5787

Last change on this file since 5787 was 5787, checked in by cameron, 16 months ago

RE parser restructuring; parsing symbolic ranges, collation and equivalence exprs

File size: 3.2 KB
RevLine 
[5180]1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <re/re_parser_ere.h>
[5787]8#include <re/re_start.h>
9#include <re/re_end.h>
10#include <re/re_any.h>
11#include <re/re_alt.h>
12#include <re/re_seq.h>
[5180]13
[5787]14namespace re {
[5180]15
16
[5787]17RE * RE_Parser_ERE::parse_next_item() {
18    if (mCursor.noMore() || atany("*?+{|")) return nullptr;
19    else if ((mGroupsOpen > 0) && at(')')) return nullptr;
20    else if (accept('^')) return makeStart();
21    else if (accept('$')) return makeEnd();
22    else if (accept('.')) return makeAny();
23    else if (accept('(')) return parse_group();
24    else if (accept('[')) return parse_bracket_expr();
25    else if (accept('\\')) return parse_escaped();
26    else return createCC(parse_literal_codepoint());
27}
28
29// A parenthesized group.  Input precondition: the opening ( has been consumed
30RE * RE_Parser_ERE::parse_group() {
31    // Capturing paren group.
32    mGroupsOpen++;
33    RE * captured = parse_alt();
34    mCaptureGroupCount++;
35    std::string captureName = "\\" + std::to_string(mCaptureGroupCount);
36    Name * const capture  = mMemoizer.memoize(makeCapture(captureName, captured));
37    auto key = std::make_pair("", captureName);
38    mNameMap.insert(std::make_pair(std::move(key), capture));
39    if (!accept(')')) ParseFailure("Closing parenthesis required.");
40    mGroupsOpen--;
41    return capture;
42}
43
44RE * RE_Parser_ERE::parse_escaped() {
45    if (accept('b')) return makeWordBoundary();
46    if (accept('B')) return makeWordNonBoundary();
47    if (accept('s')) return makeWhitespaceSet();
48    if (accept('S')) return makeComplement(makeWhitespaceSet());
49    if (accept('<')) return makeWordBegin();
50    if (accept('>')) return makeWordEnd();
51    if (isdigit(*mCursor)) {
52        mCursor++;
53        std::string backref = std::string(mCursor.pos()-2, mCursor.pos());
54        auto key = std::make_pair("", backref);
55        auto f = mNameMap.find(key);
56        if (f != mNameMap.end()) {
57            return makeReference(backref, f->second);
58        }
59        else {
60            ParseFailure("Back reference " + backref + " without prior capture group.");
61        }
[5180]62    }
[5787]63    else {
64        return createCC(parse_literal_codepoint());
65    }
66}
[5180]67
[5787]68
69// Parsing items within a bracket expression.
70// Items represent individual characters or sets of characters.
71// Ranges may be formed by individual character items separated by '-'.
72RE * RE_Parser_ERE::parse_bracket_expr () {
73    bool negated = accept('^');
74    std::vector<RE *> items;
75    do {
76        if (accept('[')) {
77            if (accept('=')) items.push_back(parse_equivalence_class());
78            else if (accept('.')) items.push_back(range_extend(parse_collation_element()));
79            else if (accept(':')) items.push_back(parse_Posix_class());
80            else items.push_back(parse_bracket_expr());
81        } else {
82            items.push_back(range_extend(makeCC(parse_literal_codepoint())));
[5180]83        }
[5787]84    } while (mCursor.more() && !at(']'));
85    RE * t = makeAlt(items.begin(), items.end());
86    if (!accept(']')) ParseFailure("Expecting ]");
87    if (negated) return makeComplement(t);
88    else return t;
89}
90}
Note: See TracBrowser for help on using the repository browser.