Ignore:
Timestamp:
Jul 16, 2016, 12:42:28 PM (3 years ago)
Author:
xuedongx
Message:

delete GCB as a separate type.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.cpp

    r5037 r5091  
    99#include <re/re_name.h>
    1010#include <re/re_diff.h>
     11#include <re/re_any.h>
     12#include <re/re_start.h>
     13#include <re/re_end.h>
     14#include <re/re_cc.h>
     15#include <re/re_seq.h>
     16#include <re/re_rep.h>
     17#include <re/re_intersect.h>
     18#include <re/re_assertion.h>
    1119#include "UCD/PropertyAliases.h"
    1220#include "UCD/PropertyObjects.h"
     
    2533
    2634namespace UCD {
     35
     36void generateGraphemeClusterBoundaryRule(Name * const &property) {
     37    // 3.1.1 Grapheme Cluster Boundary Rules
     38#define Behind(x) makeLookBehindAssertion(x)
     39#define Ahead(x) makeLookAheadAssertion(x)
     40
     41    RE * GCB_Control = makeName("gcb", "cn", Name::Type::UnicodeProperty);
     42    RE * GCB_CR = makeName("gcb", "cr", Name::Type::UnicodeProperty);
     43    RE * GCB_LF = makeName("gcb", "lf", Name::Type::UnicodeProperty);
     44    RE * GCB_Control_CR_LF = makeAlt({GCB_CR, GCB_LF});
     45
     46    // Break at the start and end of text.
     47    RE * GCB_1 = makeStart();
     48    RE * GCB_2 = makeEnd();
     49    // Do not break between a CR and LF.
     50    RE * GCB_3 = makeSeq({Behind(GCB_CR), Ahead(GCB_LF)});
     51    // Otherwise, break before and after controls.
     52    RE * GCB_4 = Behind(GCB_Control_CR_LF);
     53    RE * GCB_5 = Ahead(GCB_Control_CR_LF);
     54    RE * GCB_1_5 = makeAlt({GCB_1, GCB_2, makeDiff(makeAlt({GCB_4, GCB_5}), GCB_3)});
     55
     56    RE * GCB_L = makeName("gcb", "l", Name::Type::UnicodeProperty);
     57    RE * GCB_V = makeName("gcb", "v", Name::Type::UnicodeProperty);
     58    RE * GCB_LV = makeName("gcb", "lv", Name::Type::UnicodeProperty);
     59    RE * GCB_LVT = makeName("gcb", "lvt", Name::Type::UnicodeProperty);
     60    RE * GCB_T = makeName("gcb", "t", Name::Type::UnicodeProperty);
     61    RE * GCB_RI = makeName("gcb", "ri", Name::Type::UnicodeProperty);
     62    // Do not break Hangul syllable sequences.
     63    RE * GCB_6 = makeSeq({Behind(GCB_L), Ahead(makeAlt({GCB_L, GCB_V, GCB_LV, GCB_LVT}))});
     64    RE * GCB_7 = makeSeq({Behind(makeAlt({GCB_LV, GCB_V})), Ahead(makeAlt({GCB_V, GCB_T}))});
     65    RE * GCB_8 = makeSeq({Behind(makeAlt({GCB_LVT, GCB_T})), Ahead(GCB_T)});
     66    // Do not break between regional indicator symbols.
     67    RE * GCB_8a = makeSeq({Behind(GCB_RI), Ahead(GCB_RI)});
     68    // Do not break before extending characters.
     69    RE * GCB_9 = Ahead(makeName("gcb", "ex", Name::Type::UnicodeProperty));
     70    // Do not break before SpacingMarks, or after Prepend characters.
     71    RE * GCB_9a = Ahead(makeName("gcb", "sm", Name::Type::UnicodeProperty));
     72    RE * GCB_9b = Behind(makeName("gcb", "pp", Name::Type::UnicodeProperty));
     73    RE * GCB_6_9b = makeAlt({GCB_6, GCB_7, GCB_8, GCB_8a, GCB_9, GCB_9a, GCB_9b});
     74    // Otherwise, break everywhere.
     75    RE * GCB_10 = makeSeq({Behind(makeAny()), Ahead(makeAny())});
     76
     77    //Name * gcb = makeName("gcb", Name::Type::UnicodeProperty);
     78    property->setDefinition(makeAlt({GCB_1_5, makeDiff(GCB_10, GCB_6_9b)}));
     79}
    2780
    2881bool resolvePropertyDefinition(Name * const property) {
     
    91144            Name * join = makeName("joincontrol", Name::Type::UnicodeProperty);
    92145            property->setDefinition(makeAlt({alnum, mark, conn, join}));
     146            return true;
     147        } else if (value == "GCB" || value == "NonGCB"){
     148            generateGraphemeClusterBoundaryRule(property);
    93149            return true;
    94150        }
Note: See TracChangeset for help on using the changeset viewer.