source: icGREP/icgrep-devel/icgrep/re/re_parser.h @ 4643

Last change on this file since 4643 was 4614, checked in by nmedfort, 4 years ago

Replaced CharSetItem? with a std::pair.

File size: 2.4 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#ifndef RE_PARSER_H
8#define RE_PARSER_H
9
10#include "re_re.h"
11#include "re_any.h"
12#include "re_name.h"
13
14#include <string>
15#include <list>
16#include <memory>
17
18namespace re {
19       
20enum CharsetOperatorKind
21        {intersectOp, setDiffOp, ampChar, hyphenChar, rangeHyphen, posixPropertyOpener, setOpener, setCloser, backSlash, emptyOperator};
22
23enum ModeFlagType
24    {CASE_INSENSITIVE_MODE_FLAG = 1,
25     MULTILINE_MODE_FLAG = 2,      // not currently implemented
26     DOTALL_MODE_FLAG = 4,         // not currently implemented
27     IGNORE_SPACE_MODE_FLAG = 8,   // not currently implemented
28     UNIX_LINES_MODE_FLAG = 16};   // not currently implemented
29   
30const int MAX_REPETITION_LOWER_BOUND = 1024;
31const int MAX_REPETITION_UPPER_BOUND = 2048;
32
33typedef unsigned ModeFlagSet;
34   
35class RE_Parser
36{
37public:
38
39    static RE * parse(const std::string &input_string, ModeFlagSet initialFlags);
40
41private:
42
43    typedef std::string::const_iterator cursor_t;
44
45    RE_Parser(const std::string & regular_expression);
46   
47    RE_Parser(const std::string & regular_expression, ModeFlagSet initialFlags);
48
49    RE * parse_RE();
50   
51    RE * parse_alt();
52   
53    RE * parse_seq();
54
55    RE * parse_next_item();
56   
57    RE * parse_group();
58   
59    RE * extend_item(RE * re);
60
61    void parse_range_bound(int & lo_codepoint, int & hi_codepoint);
62
63    unsigned parse_int();
64   
65    RE * parse_escaped();
66
67    RE * parse_escaped_set();
68
69    codepoint_t parse_utf8_codepoint();
70
71    Name * parse_property_expression();
72       
73        CharsetOperatorKind getCharsetOperator();
74
75    RE * parse_charset();
76
77    codepoint_t parse_codepoint();
78
79    codepoint_t parse_escaped_codepoint();
80
81    codepoint_t parse_hex_codepoint(int mindigits, int maxdigits);
82
83    codepoint_t parse_octal_codepoint(int mindigits, int maxdigits);
84
85    inline void throw_incomplete_expression_error_if_end_of_stream() const;
86   
87    // CC insertion dependent on case-insensitive flag.
88    CC * build_CC(codepoint_t cp);
89   
90    void CC_add_codepoint(CC * cc, codepoint_t cp);
91   
92    void CC_add_range(CC * cc, codepoint_t lo, codepoint_t hi);
93
94private:
95
96    cursor_t                    _cursor;
97    const cursor_t              _end;
98    ModeFlagSet fModeFlagSet;
99};
100
101}
102
103#endif // RE_PARSER_H
Note: See TracBrowser for help on using the repository browser.