source: icGREP/icgrep-devel/icgrep/re/re_parser.h @ 4316

Last change on this file since 4316 was 4316, checked in by cameron, 5 years ago

Case insensitive matching - initial check in.

File size: 2.1 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#ifndef RE_PARSER_H
8#define RE_PARSER_H
9
10#include "re_re.h"
11#include "re_any.h"
12#include "re_name.h"
13
14#include <string>
15#include <list>
16#include <memory>
17
18namespace re {
19       
20enum CharsetOperatorKind
21        {intersectOp, setDiffOp, ampChar, hyphenChar, rangeHyphen, posixPropertyOpener, setOpener, setCloser, backSlash, emptyOperator};
22
23typedef unsigned codepoint_t;
24
25enum ModeFlagType
26    {CASE_INSENSITIVE_MODE_FLAG = 1,
27     MULTILINE_MODE_FLAG = 2,
28     DOTALL_MODE_FLAG = 4,
29     IGNORE_SPACE_MODE_FLAG = 8,
30     UNIX_LINES_MODE_FLAG = 16};
31typedef unsigned ModeFlagSet;
32   
33class RE_Parser
34{
35public:
36
37    static RE * parse(const std::string &input_string);
38
39private:
40
41    typedef std::string::const_iterator cursor_t;
42
43    RE_Parser(const std::string & regular_expression);
44
45    RE * parse_RE();
46   
47    RE * parse_alt();
48   
49    RE * parse_seq();
50
51    RE * parse_next_item();
52   
53    RE * parse_group();
54   
55    RE * extend_item(RE * re);
56
57    void parse_range_bound(int & lower_bound, int & upper_bound);
58
59    unsigned parse_int();
60   
61    RE * parse_escaped();
62
63    RE * parse_escaped_set();
64
65    codepoint_t parse_utf8_codepoint();
66
67    Name * parse_property_expression();
68       
69        CharsetOperatorKind getCharsetOperator();
70
71    RE * parse_charset();
72
73    codepoint_t parse_codepoint();
74
75    codepoint_t parse_escaped_codepoint();
76
77    codepoint_t parse_hex_codepoint(int mindigits, int maxdigits);
78
79    codepoint_t parse_octal_codepoint(int mindigits, int maxdigits);
80
81    inline void throw_incomplete_expression_error_if_end_of_stream() const;
82   
83    // CC insertion dependent on case-insensitive flag.
84    CC * build_CC(codepoint_t cp);
85   
86    void CC_add_codepoint(CC * cc, codepoint_t cp);
87   
88    void CC_add_range(CC * cc, codepoint_t lo, codepoint_t hi);
89
90private:
91
92    cursor_t                    _cursor;
93    const cursor_t              _end;
94    ModeFlagSet fModeFlagSet;
95};
96
97}
98
99#endif // RE_PARSER_H
Note: See TracBrowser for help on using the repository browser.