source: icGREP/icgrep-devel/icgrep/re/re_parser.h @ 4796

Last change on this file since 4796 was 4796, checked in by cameron, 4 years ago

Parsing of name property expressions

File size: 3.2 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#ifndef RE_PARSER_H
8#define RE_PARSER_H
9
10#include <re/re_re.h>
11#include <re/re_any.h>
12#include <re/re_name.h>
13#include <UCD/resolve_properties.h>
14#include <string>
15#include <list>
16#include <memory>
17#include <map>
18
19
20namespace re {
21
22enum CharsetOperatorKind
23        {intersectOp, setDiffOp, ampChar, hyphenChar, rangeHyphen, posixPropertyOpener, setOpener, setCloser, backSlash, emptyOperator};
24
25enum ModeFlagType
26    {CASE_INSENSITIVE_MODE_FLAG = 1,
27     MULTILINE_MODE_FLAG = 2,      // not currently implemented
28     DOTALL_MODE_FLAG = 4,         // not currently implemented
29     IGNORE_SPACE_MODE_FLAG = 8,   // not currently implemented
30     UNIX_LINES_MODE_FLAG = 16};   // not currently implemented
31   
32const int MAX_REPETITION_LOWER_BOUND = 1024;
33const int MAX_REPETITION_UPPER_BOUND = 2048;
34
35typedef unsigned ModeFlagSet;
36   
37class RE_Parser
38{
39public:
40
41    friend Name * UCD::resolveProperty(const std::string, RE_Parser *);
42    friend Name * UCD::resolveProperty(const std::string, const std::string, RE_Parser *);
43
44    static RE * parse(const std::string &input_string, ModeFlagSet initialFlags);
45
46private:
47
48    using NameMap = std::map<std::pair<std::string, std::string>, re::Name *>;
49
50    typedef std::string::const_iterator cursor_t;
51
52    RE_Parser(const std::string & regular_expression);
53   
54    RE_Parser(const std::string & regular_expression, ModeFlagSet initialFlags);
55
56    RE * parse_RE();
57   
58    RE * parse_alt();
59   
60    RE * parse_seq();
61
62    RE * parse_next_item();
63   
64    RE * parse_group();
65   
66    RE * extend_item(RE * re);
67
68    void parse_range_bound(int & lo_codepoint, int & hi_codepoint);
69
70    unsigned parse_int();
71   
72    RE * parse_escaped();
73
74    RE * parseEscapedSet();
75
76    codepoint_t parse_utf8_codepoint();
77
78    Name * parsePropertyExpression();
79   
80    CC * parseNamePatternExpression();
81   
82    RE * makeComplement(RE * s);
83    RE * makeWordBoundary();
84    RE * makeWordNonBoundary();
85    Name * makeDigitSet();
86    Name * makeAlphaNumeric();
87    Name * makeWhitespaceSet();
88    Name * makeWordSet();
89
90    Name * createName(const std::string value);
91    Name * createName(const std::string prop, const std::string value);
92
93        CharsetOperatorKind getCharsetOperator();
94
95    RE * parse_charset();
96
97    codepoint_t parse_codepoint();
98
99    codepoint_t parse_escaped_codepoint();
100
101    codepoint_t parse_hex_codepoint(int mindigits, int maxdigits);
102
103    codepoint_t parse_octal_codepoint(int mindigits, int maxdigits);
104
105    inline void throw_incomplete_expression_error_if_end_of_stream() const;
106   
107    // CC insertion dependent on case-insensitive flag.
108    CC * build_CC(codepoint_t cp);
109   
110    void CC_add_codepoint(CC * cc, codepoint_t cp);
111   
112    void CC_add_range(CC * cc, codepoint_t lo, codepoint_t hi);
113
114    static std::string canonicalize(const cursor_t begin, const cursor_t end);
115
116private:
117
118    cursor_t                    _cursor;
119    const cursor_t              _end;
120    ModeFlagSet                 fModeFlagSet;
121    bool                        fNested;
122    NameMap                     mNameMap;
123};
124
125}
126
127#endif // RE_PARSER_H
Note: See TracBrowser for help on using the repository browser.