source: icGREP/icgrep-devel/icgrep/re/re_compiler.h @ 4405

Last change on this file since 4405 was 4405, checked in by cameron, 5 years ago

AST support for Lookahead/Lookbehind? assertions

File size: 3.6 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#ifndef RE_TO_PABLO_COMPILER_H
8#define RE_TO_PABLO_COMPILER_H
9
10#include <re/re_re.h>
11#include <cc/cc_compiler.h>
12
13#include <string>
14#include <list>
15#include <map>
16
17namespace cc {
18class CC_NameMap;
19}
20
21namespace pablo {
22class PabloBlock;
23class PabloAST;
24class Assign;
25class Var;
26}
27
28/*  Marker streams represent the results of matching steps.
29    Two types of marker streams are used internally.
30    FinalByte markers are used for character classes and
31    other strings by a one bit at their final position.
32    PostPosition markers are used to mark matches with
33    a 1 bit immediately after a match.   PostPosition markers
34    are generally required whenever a regular expression element
35    can match the empty string (e.g., * and ? repeated items).
36*/
37   
38namespace re {
39
40enum MarkerPosition {FinalByte, PostPosition};
41
42struct MarkerType { 
43    MarkerPosition pos;
44    pablo::Assign * stream;
45};
46
47inline bool isPostPositionMarker(MarkerType m) {
48    return m.pos == PostPosition;
49}
50
51inline bool isFinalPositionMarker(MarkerType m) {
52    return m.pos == FinalByte;
53}
54
55MarkerType makePostPositionMarker(std::string marker_name, pablo::PabloAST * s, pablo::PabloBlock & pb);
56
57MarkerType makeFinalPositionMarker(std::string marker_name, pablo::PabloAST * s, pablo::PabloBlock & pb);
58
59pablo::Assign * markerStream(MarkerType m, pablo::PabloBlock & pb);
60
61pablo::Var * markerVar(MarkerType m, pablo::PabloBlock & pb);
62
63pablo::Var * postPositionVar(MarkerType m, pablo::PabloBlock & pb);
64
65class RE_Compiler {
66public:
67
68    RE_Compiler(pablo::PabloBlock & baseCG, const cc::CC_NameMap & nameMap);
69    void initializeRequiredStreams(cc::CC_Compiler & ccc);
70    void finalizeMatchResult(MarkerType match_result);
71    MarkerType compile(RE * re) {
72        return compile(re, mCG);
73    }
74
75private:
76
77    MarkerType compile(RE * re, pablo::PabloBlock & cg);
78
79    pablo::PabloAST * character_class_strm(Name * name, pablo::PabloBlock & pb);
80    pablo::PabloAST * nextUnicodePosition(MarkerType m, pablo::PabloBlock & pb);
81    MarkerType process(RE * re, MarkerType marker, pablo::PabloBlock & pb);
82    MarkerType process(Name * name, MarkerType marker, pablo::PabloBlock & pb);
83    MarkerType process(Seq * seq, MarkerType marker, pablo::PabloBlock & pb);
84    MarkerType process(Alt * alt, MarkerType marker, pablo::PabloBlock & pb);
85    MarkerType process(Assertion * a, MarkerType marker, pablo::PabloBlock & pb);
86    MarkerType process(Rep * rep, MarkerType marker, pablo::PabloBlock & pb);
87    MarkerType process(Diff * diff, MarkerType marker, pablo::PabloBlock & cg);
88    MarkerType process(Intersect * x, MarkerType marker, pablo::PabloBlock & cg);
89    pablo::Assign * consecutive(pablo::Assign * repeated,  int repeated_lgth, int repeat_count, pablo::PabloBlock & pb);
90    static bool isFixedLength(RE * regexp);
91    MarkerType processLowerBound(RE * repeated,  int lb, MarkerType marker, pablo::PabloBlock & pb);
92    MarkerType processUnboundedRep(RE * repeated, MarkerType marker, pablo::PabloBlock & pb);
93    MarkerType processBoundedRep(RE * repeated, int ub, MarkerType marker, pablo::PabloBlock & pb);
94
95    pablo::PabloBlock &                             mCG;
96    pablo::Var *                                    mLineFeed;
97    pablo::PabloAST *                               mCRLF;
98    pablo::PabloAST *                               mUnicodeLineBreak;
99    pablo::PabloAST *                               mInitial;
100    pablo::PabloAST *                               mNonFinal;   
101};
102
103}
104
105#endif // COMPILER_H
Note: See TracBrowser for help on using the repository browser.