source: icGREP/icgrep-devel/icgrep/re/re_parser.h @ 4412

Last change on this file since 4412 was 4412, checked in by cameron, 4 years ago

Command line parameter -i for case-insensitive matching

File size: 2.3 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#ifndef RE_PARSER_H
8#define RE_PARSER_H
9
10#include "re_re.h"
11#include "re_any.h"
12#include "re_name.h"
13
14#include <string>
15#include <list>
16#include <memory>
17
18namespace re {
19       
20enum CharsetOperatorKind
21        {intersectOp, setDiffOp, ampChar, hyphenChar, rangeHyphen, posixPropertyOpener, setOpener, setCloser, backSlash, emptyOperator};
22
23typedef unsigned codepoint_t;
24
25enum ModeFlagType
26    {CASE_INSENSITIVE_MODE_FLAG = 1,
27     MULTILINE_MODE_FLAG = 2,      // not currently implemented
28     DOTALL_MODE_FLAG = 4,         // not currently implemented
29     IGNORE_SPACE_MODE_FLAG = 8,   // not currently implemented
30     UNIX_LINES_MODE_FLAG = 16};   // not currently implemented
31typedef unsigned ModeFlagSet;
32   
33class RE_Parser
34{
35public:
36
37    static RE * parse(const std::string &input_string, ModeFlagSet initialFlags);
38
39private:
40
41    typedef std::string::const_iterator cursor_t;
42
43    RE_Parser(const std::string & regular_expression);
44   
45    RE_Parser(const std::string & regular_expression, ModeFlagSet initialFlags);
46
47    RE * parse_RE();
48   
49    RE * parse_alt();
50   
51    RE * parse_seq();
52
53    RE * parse_next_item();
54   
55    RE * parse_group();
56   
57    RE * extend_item(RE * re);
58
59    void parse_range_bound(int & lower_bound, int & upper_bound);
60
61    unsigned parse_int();
62   
63    RE * parse_escaped();
64
65    RE * parse_escaped_set();
66
67    codepoint_t parse_utf8_codepoint();
68
69    Name * parse_property_expression();
70       
71        CharsetOperatorKind getCharsetOperator();
72
73    RE * parse_charset();
74
75    codepoint_t parse_codepoint();
76
77    codepoint_t parse_escaped_codepoint();
78
79    codepoint_t parse_hex_codepoint(int mindigits, int maxdigits);
80
81    codepoint_t parse_octal_codepoint(int mindigits, int maxdigits);
82
83    inline void throw_incomplete_expression_error_if_end_of_stream() const;
84   
85    // CC insertion dependent on case-insensitive flag.
86    CC * build_CC(codepoint_t cp);
87   
88    void CC_add_codepoint(CC * cc, codepoint_t cp);
89   
90    void CC_add_range(CC * cc, codepoint_t lo, codepoint_t hi);
91
92private:
93
94    cursor_t                    _cursor;
95    const cursor_t              _end;
96    ModeFlagSet fModeFlagSet;
97};
98
99}
100
101#endif // RE_PARSER_H
Note: See TracBrowser for help on using the repository browser.