source: icGREP/icgrep-devel/icgrep/re/re_parser.h @ 4673

Last change on this file since 4673 was 4673, checked in by nmedfort, 4 years ago

Moved resolveProperty responsibilities out of RE_Parser but kept expansion of Name objects with definitions in it.

File size: 3.1 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#ifndef RE_PARSER_H
8#define RE_PARSER_H
9
10#include <re/re_re.h>
11#include <re/re_any.h>
12#include <re/re_name.h>
13#include <UCD/resolve_properties.h>
14#include <string>
15#include <list>
16#include <memory>
17#include <map>
18
19
20namespace re {
21
22enum CharsetOperatorKind
23        {intersectOp, setDiffOp, ampChar, hyphenChar, rangeHyphen, posixPropertyOpener, setOpener, setCloser, backSlash, emptyOperator};
24
25enum ModeFlagType
26    {CASE_INSENSITIVE_MODE_FLAG = 1,
27     MULTILINE_MODE_FLAG = 2,      // not currently implemented
28     DOTALL_MODE_FLAG = 4,         // not currently implemented
29     IGNORE_SPACE_MODE_FLAG = 8,   // not currently implemented
30     UNIX_LINES_MODE_FLAG = 16};   // not currently implemented
31   
32const int MAX_REPETITION_LOWER_BOUND = 1024;
33const int MAX_REPETITION_UPPER_BOUND = 2048;
34
35typedef unsigned ModeFlagSet;
36   
37class RE_Parser
38{
39public:
40
41    friend Name * UCD::resolveProperty(const std::string, RE_Parser *);
42    friend Name * UCD::resolveProperty(const std::string, const std::string, RE_Parser *);
43
44    static RE * parse(const std::string &input_string, ModeFlagSet initialFlags);
45
46private:
47
48    using NameMap = std::map<std::pair<std::string, std::string>, re::Name *>;
49
50    typedef std::string::const_iterator cursor_t;
51
52    RE_Parser(const std::string & regular_expression);
53   
54    RE_Parser(const std::string & regular_expression, ModeFlagSet initialFlags);
55
56    RE * parse_RE();
57   
58    RE * parse_alt();
59   
60    RE * parse_seq();
61
62    RE * parse_next_item();
63   
64    RE * parse_group();
65   
66    RE * extend_item(RE * re);
67
68    void parse_range_bound(int & lo_codepoint, int & hi_codepoint);
69
70    unsigned parse_int();
71   
72    RE * parse_escaped();
73
74    RE * parseEscapedSet();
75
76    codepoint_t parse_utf8_codepoint();
77
78    Name * parsePropertyExpression();
79       
80    RE * makeComplement(RE * s);
81    RE * makeWordBoundary();
82    RE * makeWordNonBoundary();
83    Name * makeDigitSet();
84    Name * makeAlphaNumeric();
85    Name * makeWhitespaceSet();
86    Name * makeWordSet();
87
88    Name * createName(const std::string value);
89    Name * createName(const std::string prop, const std::string value);
90
91        CharsetOperatorKind getCharsetOperator();
92
93    RE * parse_charset();
94
95    codepoint_t parse_codepoint();
96
97    codepoint_t parse_escaped_codepoint();
98
99    codepoint_t parse_hex_codepoint(int mindigits, int maxdigits);
100
101    codepoint_t parse_octal_codepoint(int mindigits, int maxdigits);
102
103    inline void throw_incomplete_expression_error_if_end_of_stream() const;
104   
105    // CC insertion dependent on case-insensitive flag.
106    CC * build_CC(codepoint_t cp);
107   
108    void CC_add_codepoint(CC * cc, codepoint_t cp);
109   
110    void CC_add_range(CC * cc, codepoint_t lo, codepoint_t hi);
111
112    static std::string canonicalize(const cursor_t begin, const cursor_t end);
113
114private:
115
116    cursor_t                    _cursor;
117    const cursor_t              _end;
118    ModeFlagSet                 fModeFlagSet;
119    NameMap                     mNameMap;
120};
121
122}
123
124#endif // RE_PARSER_H
Note: See TracBrowser for help on using the repository browser.