Ignore:
Timestamp:
Jun 21, 2015, 4:38:51 PM (4 years ago)
Author:
nmedfort
Message:

Multiplexing bug fix and some CC changes.

Location:
icGREP/icgrep-devel/icgrep/re
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_cc.cpp

    r4611 r4612  
    3131    name << std::hex;
    3232    if ((type == ByteClass) && (mSparseCharSet.back().hi_codepoint >= 0x80)) {
    33       name << "BC_";
     33      name << "BC";
    3434    }
    3535    else {
    36         name << "CC_";
     36        name << "CC";
    3737    }
    3838    char separator = '_';
     
    4545            name << i.lo_codepoint << '_' << i.hi_codepoint;
    4646        }
     47        separator = ',';
    4748    }
    4849    return name.str();
    4950}
    5051
    51 CodePointType CC::max_codepoint() {
    52     return mSparseCharSet.size() == 0 ? 0 : mSparseCharSet.back().hi_codepoint;
    53 }
    54 
    55 void CC::insert_range(const CodePointType lo_codepoint, const CodePointType hi_codepoint) {
     52void CC::insert_range(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint) {
    5653    CharSetItem item(lo_codepoint, hi_codepoint);
    5754    for (auto i = mSparseCharSet.begin(); i != mSparseCharSet.end(); ) {
     
    7572}
    7673
    77 void CC::remove_range(const CodePointType lo_codepoint, const CodePointType hi_codepoint) {
     74void CC::remove_range(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint) {
    7875    for (auto i = mSparseCharSet.begin(); i != mSparseCharSet.end(); ) {
    7976        CharSetItem & range = *i;
     
    133130        }
    134131        isect->insert_range(std::max(ra.lo_codepoint, rb.lo_codepoint), std::min(ra.hi_codepoint, rb.hi_codepoint));
    135         if (ra.hi_codepoint < rb.hi_codepoint) ++ai; 
     132        if (ra.hi_codepoint < rb.hi_codepoint) ++ai;
    136133        else ++bi;
    137134    }
     
    141138CC * caseInsensitize(const CC * cc) {
    142139    CC * cci = makeCC();
    143     for (auto i = cc->cbegin(); i != cc->cend(); i++) {
    144         caseInsensitiveInsertRange(cci, i->lo_codepoint, i->hi_codepoint);
     140    for (const CharSetItem & i : *cc) {
     141        caseInsensitiveInsertRange(cci, i.lo_codepoint, i.hi_codepoint);
    145142    }
    146143    return cci;
     
    153150 * @param hi
    154151 ** ------------------------------------------------------------------------------------------------------------- */
    155 CC * rangeIntersect(const CC * cc, const CodePointType lo, const CodePointType hi) {
     152CC * rangeIntersect(const CC * cc, const codepoint_t lo, const codepoint_t hi) {
    156153    assert ("cc cannot be null" && cc);
    157154    CC * intersect = makeCC();
     
    170167 * @param hi
    171168 ** ------------------------------------------------------------------------------------------------------------- */
    172 CC * rangeGaps(const CC * cc, const CodePointType lo, const CodePointType hi) {
     169CC * rangeGaps(const CC * cc, const codepoint_t lo, const codepoint_t hi) {
    173170    assert ("cc cannot be null" && cc);
    174171    CC * gaps = makeCC();
    175     CodePointType cp = lo;
     172    codepoint_t cp = lo;
    176173    if (cp < hi) {
    177174        auto i = cc->cbegin(), end = cc->cend();
  • icGREP/icgrep-devel/icgrep/re/re_cc.h

    r4611 r4612  
    1616namespace re {
    1717
    18 typedef int CodePointType;
    19 
    20 struct CharSetItem{   
    21     CharSetItem() : lo_codepoint(0), hi_codepoint(0) {}
    22     CharSetItem(const CodePointType lo, const CodePointType hi) : lo_codepoint(lo), hi_codepoint(hi) {}
    23     CodePointType lo_codepoint;
    24     CodePointType hi_codepoint;
     18typedef unsigned codepoint_t;
     19
     20struct CharSetItem {
     21    constexpr CharSetItem() : lo_codepoint(0), hi_codepoint(0) {}
     22    constexpr CharSetItem(const codepoint_t lo, const codepoint_t hi) : lo_codepoint(lo), hi_codepoint(hi) {}
     23    constexpr codepoint_t operator [](const unsigned i) const {
     24        return (i == 0) ? lo_codepoint : (i == 1) ? hi_codepoint : throw std::runtime_error("CharSetItem[] can only accept 0 or 1.");
     25    }
     26    codepoint_t lo_codepoint;
     27    codepoint_t hi_codepoint;
    2528};
    2629
     
    4649    typedef CharSetVector::const_reference          const_reference;
    4750
    48     static const CodePointType UNICODE_MAX = 0x10FFFF;
     51    static const codepoint_t UNICODE_MAX = 0x10FFFF;
    4952
    5053    std::string canonicalName(const CC_type type) const;
    5154
    52     CodePointType max_codepoint();
    53 
    54     void insert_range(const CodePointType lo_codepoint, const CodePointType hi_codepoint);
    55 
    56     void remove_range(const CodePointType lo_codepoint, const CodePointType hi_codepoint);
    57 
    58     inline void insert(const CodePointType codepoint) {
     55    CharSetItem & operator [](unsigned i) {
     56        return mSparseCharSet[i];
     57    }
     58
     59    const CharSetItem & operator [](unsigned i) const {
     60        return mSparseCharSet[i];
     61    }
     62
     63    inline codepoint_t min_codepoint() const {
     64        return mSparseCharSet.size() == 0 ? 0 : mSparseCharSet.front().lo_codepoint;
     65    }
     66
     67    inline codepoint_t max_codepoint() const {
     68        return mSparseCharSet.size() == 0 ? 0 : mSparseCharSet.back().hi_codepoint;
     69    }
     70
     71    void insert_range(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint);
     72
     73    void remove_range(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint);
     74
     75    inline void insert(const codepoint_t codepoint) {
    5976        insert_range(codepoint, codepoint);
    6077    }
    6178
    62     inline void remove(const CodePointType codepoint) {
     79    inline void remove(const codepoint_t codepoint) {
    6380        remove_range(codepoint, codepoint);
    6481    }
     
    116133protected:
    117134    friend CC * makeCC();
    118     friend CC * makeCC(const CodePointType codepoint);
    119     friend CC * makeCC(const CodePointType lo, const CodePointType hi);
     135    friend CC * makeCC(const codepoint_t codepoint);
     136    friend CC * makeCC(const codepoint_t lo, const codepoint_t hi);
    120137    friend CC * makeCC(const CC * cc1, const CC * cc2);
    121138    friend CC * subtractCC(const CC * cc1, const CC * cc2);
     
    126143    }
    127144    CC(const CC & cc);
    128     inline CC(const CodePointType codepoint)
     145    inline CC(const codepoint_t codepoint)
    129146    : RE(ClassTypeId::CC)
    130147    , mSparseCharSet(mCharSetAllocator) {
    131148        insert(codepoint);
    132149    }
    133     inline CC(const CodePointType lo_codepoint, const CodePointType hi_codepoint)
     150    inline CC(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint)
    134151    : RE(ClassTypeId::CC)
    135152    , mSparseCharSet(mCharSetAllocator) {
     
    157174    return cc.cend();
    158175}
     176
     177
    159178
    160179/**
     
    170189}
    171190
    172 inline CC * makeCC(const CodePointType codepoint) {
     191inline CC * makeCC(const codepoint_t codepoint) {
    173192    return new CC(codepoint);
    174193}
    175194
    176 inline CC * makeCC(const CodePointType lo, const CodePointType hi) {
     195inline CC * makeCC(const codepoint_t lo, const codepoint_t hi) {
    177196    return new CC(lo, hi);
    178197}
     
    188207CC * caseInsensitize(const CC * cc);
    189208
    190 CC * rangeIntersect(const CC * cc, const CodePointType lo, const CodePointType hi);
    191 
    192 CC * rangeGaps(const CC * cc, const CodePointType lo, const CodePointType hi);
     209CC * rangeIntersect(const CC * cc, const codepoint_t lo, const codepoint_t hi);
     210
     211CC * rangeGaps(const CC * cc, const codepoint_t lo, const codepoint_t hi);
    193212
    194213CC * outerRanges(const CC * cc);
  • icGREP/icgrep-devel/icgrep/re/re_parser.h

    r4429 r4612  
    2020enum CharsetOperatorKind
    2121        {intersectOp, setDiffOp, ampChar, hyphenChar, rangeHyphen, posixPropertyOpener, setOpener, setCloser, backSlash, emptyOperator};
    22 
    23 typedef unsigned codepoint_t;
    2422
    2523enum ModeFlagType
Note: See TracChangeset for help on using the changeset viewer.