source: icGREP/icgrep-devel/icgrep/re/re_parser.h @ 4532

Last change on this file since 4532 was 4429, checked in by cameron, 5 years ago

Throw exception for unsupported mode flags and repetition bounds > limits

File size: 2.4 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#ifndef RE_PARSER_H
8#define RE_PARSER_H
9
10#include "re_re.h"
11#include "re_any.h"
12#include "re_name.h"
13
14#include <string>
15#include <list>
16#include <memory>
17
18namespace re {
19       
20enum CharsetOperatorKind
21        {intersectOp, setDiffOp, ampChar, hyphenChar, rangeHyphen, posixPropertyOpener, setOpener, setCloser, backSlash, emptyOperator};
22
23typedef unsigned codepoint_t;
24
25enum ModeFlagType
26    {CASE_INSENSITIVE_MODE_FLAG = 1,
27     MULTILINE_MODE_FLAG = 2,      // not currently implemented
28     DOTALL_MODE_FLAG = 4,         // not currently implemented
29     IGNORE_SPACE_MODE_FLAG = 8,   // not currently implemented
30     UNIX_LINES_MODE_FLAG = 16};   // not currently implemented
31   
32const int MAX_REPETITION_LOWER_BOUND = 1024;
33const int MAX_REPETITION_UPPER_BOUND = 2048;
34
35typedef unsigned ModeFlagSet;
36   
37class RE_Parser
38{
39public:
40
41    static RE * parse(const std::string &input_string, ModeFlagSet initialFlags);
42
43private:
44
45    typedef std::string::const_iterator cursor_t;
46
47    RE_Parser(const std::string & regular_expression);
48   
49    RE_Parser(const std::string & regular_expression, ModeFlagSet initialFlags);
50
51    RE * parse_RE();
52   
53    RE * parse_alt();
54   
55    RE * parse_seq();
56
57    RE * parse_next_item();
58   
59    RE * parse_group();
60   
61    RE * extend_item(RE * re);
62
63    void parse_range_bound(int & lower_bound, int & upper_bound);
64
65    unsigned parse_int();
66   
67    RE * parse_escaped();
68
69    RE * parse_escaped_set();
70
71    codepoint_t parse_utf8_codepoint();
72
73    Name * parse_property_expression();
74       
75        CharsetOperatorKind getCharsetOperator();
76
77    RE * parse_charset();
78
79    codepoint_t parse_codepoint();
80
81    codepoint_t parse_escaped_codepoint();
82
83    codepoint_t parse_hex_codepoint(int mindigits, int maxdigits);
84
85    codepoint_t parse_octal_codepoint(int mindigits, int maxdigits);
86
87    inline void throw_incomplete_expression_error_if_end_of_stream() const;
88   
89    // CC insertion dependent on case-insensitive flag.
90    CC * build_CC(codepoint_t cp);
91   
92    void CC_add_codepoint(CC * cc, codepoint_t cp);
93   
94    void CC_add_range(CC * cc, codepoint_t lo, codepoint_t hi);
95
96private:
97
98    cursor_t                    _cursor;
99    const cursor_t              _end;
100    ModeFlagSet fModeFlagSet;
101};
102
103}
104
105#endif // RE_PARSER_H
Note: See TracBrowser for help on using the repository browser.