source: icGREP/icgrep-devel/icgrep/re/re_parser.h @ 4305

Last change on this file since 4305 was 4305, checked in by cameron, 5 years ago

Support for ICU, Perl backslash escape codepoint sequences; non-codepoint escapes to follow

File size: 1.5 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#ifndef RE_PARSER_H
8#define RE_PARSER_H
9
10#include "re_re.h"
11#include "re_any.h"
12#include "re_name.h"
13
14#include <string>
15#include <list>
16#include <memory>
17
18namespace re {
19
20class RE_Parser
21{
22public:
23
24    static RE * parse(const std::string &intput_string, const bool allow_escapes_within_charset = false);
25
26private:
27
28    typedef std::string::const_iterator cursor_t;
29
30    RE_Parser(const std::string & regular_expression, const bool allow_escapes_within_charset);
31
32    RE * parse_alt(const bool subexpression);
33
34    RE * parse_seq();
35
36    RE * parse_next_token();
37
38    Any * parse_any_character();
39
40    RE * extend_item(RE * re);
41
42    RE * parse_range_bound(RE * re);
43
44    RE * parse_literal();
45
46    RE * parse_escaped_metacharacter();
47
48    unsigned parse_utf8_codepoint();
49
50    Name * parse_unicode_category();
51
52    RE * parse_charset();
53
54    bool parse_charset_literal(unsigned & literal);
55
56    unsigned parse_escaped_codepoint();
57
58    unsigned parse_hex_codepoint(int mindigits, int maxdigits);
59
60    unsigned parse_octal_codepoint(int mindigits, int maxdigits);
61
62    unsigned parse_int();
63
64    inline void throw_incomplete_expression_error_if_end_of_stream() const;
65
66private:
67
68    cursor_t                    _cursor;
69    const cursor_t              _end;
70    const bool                  _allow_escapes_within_charset;
71};
72
73}
74
75#endif // RE_PARSER_H
Note: See TracBrowser for help on using the repository browser.