Ignore:
Timestamp:
Jul 16, 2016, 12:42:28 PM (3 years ago)
Author:
xuedongx
Message:

delete GCB as a separate type.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_name_resolve.cpp

    r5083 r5091  
    1010#include <re/re_intersect.h>
    1111#include <re/re_assertion.h>
    12 #include <re/re_grapheme_boundary.hpp>
    1312#include <re/re_analysis.h>
    1413#include <re/re_memoizer.hpp>
     
    3029}
    3130
    32 Name * generateGraphemeClusterBoundaryRule() {
    33     // 3.1.1 Grapheme Cluster Boundary Rules
    34     #define Behind(x) makeLookBehindAssertion(x)
    35     #define Ahead(x) makeLookAheadAssertion(x)
    36 
    37     RE * GCB_Control = makeName("gcb", "cn", Name::Type::UnicodeProperty);
    38     RE * GCB_CR = makeName("gcb", "cr", Name::Type::UnicodeProperty);
    39     RE * GCB_LF = makeName("gcb", "lf", Name::Type::UnicodeProperty);
    40     RE * GCB_Control_CR_LF = makeAlt({GCB_CR, GCB_LF});
    41 
    42     // Break at the start and end of text.
    43     RE * GCB_1 = makeStart();
    44     RE * GCB_2 = makeEnd();
    45     // Do not break between a CR and LF.
    46     RE * GCB_3 = makeSeq({Behind(GCB_CR), Ahead(GCB_LF)});
    47     // Otherwise, break before and after controls.
    48     RE * GCB_4 = Behind(GCB_Control_CR_LF);
    49     RE * GCB_5 = Ahead(GCB_Control_CR_LF);
    50     RE * GCB_1_5 = makeAlt({GCB_1, GCB_2, makeDiff(makeAlt({GCB_4, GCB_5}), GCB_3)});
    51 
    52     RE * GCB_L = makeName("gcb", "l", Name::Type::UnicodeProperty);
    53     RE * GCB_V = makeName("gcb", "v", Name::Type::UnicodeProperty);
    54     RE * GCB_LV = makeName("gcb", "lv", Name::Type::UnicodeProperty);
    55     RE * GCB_LVT = makeName("gcb", "lvt", Name::Type::UnicodeProperty);
    56     RE * GCB_T = makeName("gcb", "t", Name::Type::UnicodeProperty);
    57     RE * GCB_RI = makeName("gcb", "ri", Name::Type::UnicodeProperty);
    58     // Do not break Hangul syllable sequences.
    59     RE * GCB_6 = makeSeq({Behind(GCB_L), Ahead(makeAlt({GCB_L, GCB_V, GCB_LV, GCB_LVT}))});
    60     RE * GCB_7 = makeSeq({Behind(makeAlt({GCB_LV, GCB_V})), Ahead(makeAlt({GCB_V, GCB_T}))});
    61     RE * GCB_8 = makeSeq({Behind(makeAlt({GCB_LVT, GCB_T})), Ahead(GCB_T)});
    62     // Do not break between regional indicator symbols.
    63     RE * GCB_8a = makeSeq({Behind(GCB_RI), Ahead(GCB_RI)});
    64     // Do not break before extending characters.
    65     RE * GCB_9 = Ahead(makeName("gcb", "ex", Name::Type::UnicodeProperty));
    66     // Do not break before SpacingMarks, or after Prepend characters.
    67     RE * GCB_9a = Ahead(makeName("gcb", "sm", Name::Type::UnicodeProperty));
    68     RE * GCB_9b = Behind(makeName("gcb", "pp", Name::Type::UnicodeProperty));
    69     RE * GCB_6_9b = makeAlt({GCB_6, GCB_7, GCB_8, GCB_8a, GCB_9, GCB_9a, GCB_9b});
    70     // Otherwise, break everywhere.
    71     RE * GCB_10 = makeSeq({Behind(makeAny()), Ahead(makeAny())});
    72 
    73     Name * gcb = makeName("gcb", Name::Type::UnicodeProperty);
    74     gcb->setDefinition(makeAlt({GCB_1_5, makeDiff(GCB_10, GCB_6_9b)}));
    75     return gcb;
    76 }
    77 
    78 Name * graphemeClusterRule = nullptr;
     31Name * ZeroWidth = nullptr;
    7932
    8033RE * resolve(RE * re) {
     
    8538            if (LLVM_LIKELY(name->getDefinition() != nullptr)) {
    8639                name->setDefinition(resolve(name->getDefinition()));
    87             } else if (LLVM_LIKELY(name->getType() == Name::Type::UnicodeProperty)) {
     40            } else if (LLVM_LIKELY(name->getType() == Name::Type::UnicodeProperty || name->getType() == Name::Type::ZeroWidth)) {
    8841                if (UCD::resolvePropertyDefinition(name)) {
    89                     resolve(name->getDefinition());
     42                    if (name->getType() == Name::Type::ZeroWidth) {
     43                        ZeroWidth = name;
     44                    }
     45                    resolve(name->getDefinition());
    9046                } else {
    9147                    #ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
     
    160116            return resolve(makeName("intersect", intersectCC(lh, rh)));
    161117        }
    162     } else if (GraphemeBoundary * gb = dyn_cast<GraphemeBoundary>(re)) {
    163         if (LLVM_LIKELY(gb->getBoundaryRule() == nullptr)) {
    164             switch (gb->getType()) {
    165                 case GraphemeBoundary::Type::ClusterBoundary:
    166                     if (graphemeClusterRule == nullptr) {
    167                         graphemeClusterRule = cast<Name>(resolve(generateGraphemeClusterBoundaryRule()));
    168                     }
    169                     gb->setBoundaryRule(graphemeClusterRule);
    170                     break;
    171                 default:
    172                     throw std::runtime_error("Only grapheme cluster boundary rules are supported in icGrep 1.0");
    173             }
    174         }
    175         if (gb->getExpression()) {
    176             resolve(gb->getExpression());
    177         }
    178118    }
    179119    return re;
     
    211151        gather(cast<Intersect>(re)->getLH());
    212152        gather(cast<Intersect>(re)->getRH());
    213     } else if (isa<GraphemeBoundary>(re)) {
    214         if (cast<GraphemeBoundary>(re)->getExpression()) {
    215             gather(cast<GraphemeBoundary>(re)->getExpression());
    216         }
    217         gather(cast<GraphemeBoundary>(re)->getBoundaryRule());
    218     }
     153    }
    219154}
    220155   
    221 UCD::UCDCompiler::NameMap resolveNames(RE * re, Name * &Rule) {
     156UCD::UCDCompiler::NameMap resolveNames(RE * re, Name * &zerowidth) {
    222157
    223     graphemeClusterRule = nullptr;
     158    ZeroWidth = nullptr;
    224159    re = resolve(re);
    225160    gather(re);
    226     Rule = graphemeClusterRule;
     161    zerowidth = ZeroWidth;
    227162   
    228163    return nameMap;
Note: See TracChangeset for help on using the changeset viewer.