Ignore:
Timestamp:
Jan 7, 2015, 8:34:23 PM (5 years ago)
Author:
cameron
Message:

Support for single Unicode position lookahead assertions, \b

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r4410 r4411  
    2626}
    2727
    28 /*  Marker streams represent the results of matching steps.
    29     Two types of marker streams are used internally.
    30     FinalByte markers are used for character classes and
    31     other strings by a one bit at their final position.
    32     PostPosition markers are used to mark matches with
    33     a 1 bit immediately after a match.   PostPosition markers
    34     are generally required whenever a regular expression element
    35     can match the empty string (e.g., * and ? repeated items).
     28/*   Marker streams represent the results of matching steps.
     29     Three types of marker streams are used internally.
     30     FinalMatchByte markers are used for character classes and
     31     other strings identified by a one bit at their final position.
     32     InitialPostPositionByte markers are used to mark matches with
     33     a 1 bit immediately after a match.   InitialPostPositionByte markers
     34     are generally required whenever a regular expression element
     35     can match the empty string (e.g., * and ? repeated items).
     36     FinalPostPositionByte markers are used for single code unit
     37     lookahead assertions. 
    3638*/
    37    
     39
    3840namespace re {
    3941
    40 enum MarkerPosition {FinalByte, PostPosition};
     42enum MarkerPosition {FinalMatchByte, InitialPostPositionByte, FinalPostPositionByte};
    4143
    4244struct MarkerType {
     
    4547};
    4648
    47 inline bool isPostPositionMarker(MarkerType m) {
    48     return m.pos == PostPosition;
    49 }
     49inline MarkerPosition markerPos(MarkerType m) {return m.pos;}
    5050
    51 inline bool isFinalPositionMarker(MarkerType m) {
    52     return m.pos == FinalByte;
    53 }
     51inline pablo::Assign * markerVar(MarkerType m) {return m.stream;}
     52   
     53inline MarkerType makeMarker(MarkerPosition newpos, pablo::Assign * strm) {return {newpos, strm};}
    5454
    55 MarkerType makePostPositionMarker(std::string marker_name, pablo::PabloAST * s, pablo::PabloBlock & pb);
    56 
    57 MarkerType makeFinalPositionMarker(std::string marker_name, pablo::PabloAST * s, pablo::PabloBlock & pb);
    58 
    59 pablo::Assign * markerStream(MarkerType m, pablo::PabloBlock &);
    60 
    61 pablo::Assign * markerVar(MarkerType m, pablo::PabloBlock & pb);
    62 
    63 pablo::Assign * postPositionVar(MarkerType m, pablo::PabloBlock & pb);
    6455
    6556class RE_Compiler {
     
    7667
    7768    MarkerType compile(RE * re, pablo::PabloBlock & cg);
    78 
     69    MarkerType AdvanceMarker(MarkerType m, MarkerPosition newpos, pablo::PabloBlock & pb);
     70   
     71    void AlignMarkers(MarkerType & m1, MarkerType & m2, pablo::PabloBlock & pb);
     72   
    7973    pablo::PabloAST * character_class_strm(Name * name, pablo::PabloBlock & pb);
    8074    pablo::PabloAST * nextUnicodePosition(MarkerType m, pablo::PabloBlock & pb);
Note: See TracChangeset for help on using the changeset viewer.