source: icGREP/icgrep-devel/icgrep/re/re_compiler.h @ 4415

Last change on this file since 4415 was 4411, checked in by cameron, 5 years ago

Support for single Unicode position lookahead assertions, \b

File size: 3.6 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#ifndef RE_TO_PABLO_COMPILER_H
8#define RE_TO_PABLO_COMPILER_H
9
10#include <re/re_re.h>
11#include <cc/cc_compiler.h>
12
13#include <string>
14#include <list>
15#include <map>
16
17namespace cc {
18class CC_NameMap;
19}
20
21namespace pablo {
22class PabloBlock;
23class PabloAST;
24class Assign;
25class Var;
26}
27
28/*   Marker streams represent the results of matching steps.
29     Three types of marker streams are used internally.
30     FinalMatchByte markers are used for character classes and
31     other strings identified by a one bit at their final position.
32     InitialPostPositionByte markers are used to mark matches with
33     a 1 bit immediately after a match.   InitialPostPositionByte markers
34     are generally required whenever a regular expression element
35     can match the empty string (e.g., * and ? repeated items).
36     FinalPostPositionByte markers are used for single code unit
37     lookahead assertions. 
38*/
39
40namespace re {
41
42enum MarkerPosition {FinalMatchByte, InitialPostPositionByte, FinalPostPositionByte};
43
44struct MarkerType { 
45    MarkerPosition pos;
46    pablo::Assign * stream;
47};
48
49inline MarkerPosition markerPos(MarkerType m) {return m.pos;}
50
51inline pablo::Assign * markerVar(MarkerType m) {return m.stream;}
52   
53inline MarkerType makeMarker(MarkerPosition newpos, pablo::Assign * strm) {return {newpos, strm};}
54
55
56class RE_Compiler {
57public:
58
59    RE_Compiler(pablo::PabloBlock & baseCG, const cc::CC_NameMap & nameMap);
60    void initializeRequiredStreams(cc::CC_Compiler & ccc);
61    void finalizeMatchResult(MarkerType match_result);
62    MarkerType compile(RE * re) {
63        return compile(re, mCG);
64    }
65
66private:
67
68    MarkerType compile(RE * re, pablo::PabloBlock & cg);
69    MarkerType AdvanceMarker(MarkerType m, MarkerPosition newpos, pablo::PabloBlock & pb);
70   
71    void AlignMarkers(MarkerType & m1, MarkerType & m2, pablo::PabloBlock & pb);
72   
73    pablo::PabloAST * character_class_strm(Name * name, pablo::PabloBlock & pb);
74    pablo::PabloAST * nextUnicodePosition(MarkerType m, pablo::PabloBlock & pb);
75    MarkerType process(RE * re, MarkerType marker, pablo::PabloBlock & pb);
76    MarkerType process(Name * name, MarkerType marker, pablo::PabloBlock & pb);
77    MarkerType process(Seq * seq, MarkerType marker, pablo::PabloBlock & pb);
78    MarkerType process(Alt * alt, MarkerType marker, pablo::PabloBlock & pb);
79    MarkerType process(Assertion * a, MarkerType marker, pablo::PabloBlock & pb);
80    MarkerType process(Rep * rep, MarkerType marker, pablo::PabloBlock & pb);
81    MarkerType process(Diff * diff, MarkerType marker, pablo::PabloBlock & cg);
82    MarkerType process(Intersect * x, MarkerType marker, pablo::PabloBlock & cg);
83    pablo::Assign * consecutive(pablo::Assign * repeated,  int repeated_lgth, int repeat_count, pablo::PabloBlock & pb);
84    static bool isFixedLength(RE * regexp);
85    MarkerType processLowerBound(RE * repeated,  int lb, MarkerType marker, pablo::PabloBlock & pb);
86    MarkerType processUnboundedRep(RE * repeated, MarkerType marker, pablo::PabloBlock & pb);
87    MarkerType processBoundedRep(RE * repeated, int ub, MarkerType marker, pablo::PabloBlock & pb);
88
89    pablo::PabloBlock &                             mCG;
90    pablo::Assign *                                 mLineFeed;
91    pablo::PabloAST *                               mCRLF;
92    pablo::PabloAST *                               mUnicodeLineBreak;
93    pablo::PabloAST *                               mInitial;
94    pablo::PabloAST *                               mNonFinal;   
95};
96
97}
98
99#endif // COMPILER_H
Note: See TracBrowser for help on using the repository browser.