Changeset 4812


Ignore:
Timestamp:
Oct 1, 2015, 2:54:17 PM (2 years ago)
Author:
nmedfort
Message:

Embedded UnicodeSet? into CC objects (will currently cause memory leak)

Location:
icGREP/icgrep-devel/icgrep
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp

    r4667 r4812  
    1010#include <sstream>
    1111#include <algorithm>
     12#include <assert.h>
     13#include <llvm/Support/Casting.h>
     14
     15using namespace llvm;
    1216
    1317namespace UCD {
  • icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.cpp

    r4808 r4812  
    1919    assert (!suffix->empty());
    2020    mSuffixVar = mCharacterClassCompiler.compileCC(suffix, entry);
    21     generateRange(ifRanges, 0, CC::UNICODE_MAX, entry);
     21    generateRange(ifRanges, 0, UNICODE_MAX, entry);
    2222}
    2323
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.cpp

    r4631 r4812  
    2525#include <include/simd-lib/builtins.hpp>
    2626
    27 using namespace re;
    28 
    2927namespace UCD {
    3028
     
    3634const size_t QUAD_BITS = (8 * sizeof(bitquad_t));
    3735const size_t MOD_QUAD_BIT_MASK = QUAD_BITS - 1;
    38 const size_t UNICODE_QUAD_COUNT = (CC::UNICODE_MAX + 1) / QUAD_BITS;
     36const size_t UNICODE_QUAD_COUNT = (UNICODE_MAX + 1) / QUAD_BITS;
    3937const bitquad_t FULL_QUAD_MASK = -1;
    4038
    4139inline run_type_t typeOf(const run_t & run) {
    42     return std::get<0>(run);
     40    return run.first;
    4341}
    4442
    4543inline UnicodeSet::length_t lengthOf(const run_t & run) {
    46     return std::get<1>(run);
     44    return run.second;
    4745}
    4846
     
    7977}
    8078
     79#ifndef NDEBUG
    8180/** ------------------------------------------------------------------------------------------------------------- *
    8281 * @brief runLengthSumsUpToUnicodeQuadCount
     
    9089    }
    9190    return sum == UNICODE_QUAD_COUNT;
     91}
     92#endif
     93
     94/** ------------------------------------------------------------------------------------------------------------- *
     95 * @brief empty
     96 ** ------------------------------------------------------------------------------------------------------------- */
     97bool UnicodeSet::empty() const {
     98    return (mRuns.size() == 1) && typeOf(mRuns.front()) == Empty;
     99}
     100
     101/** ------------------------------------------------------------------------------------------------------------- *
     102 * @brief size
     103 ** ------------------------------------------------------------------------------------------------------------- */
     104UnicodeSet::size_type UnicodeSet::size() const {
     105    return std::distance(begin(), end());
     106}
     107
     108/** ------------------------------------------------------------------------------------------------------------- *
     109 * @brief front
     110 ** ------------------------------------------------------------------------------------------------------------- */
     111UnicodeSet::interval_t UnicodeSet::front() const {
     112    return *begin();
     113}
     114
     115/** ------------------------------------------------------------------------------------------------------------- *
     116 * @brief back
     117 ** ------------------------------------------------------------------------------------------------------------- */
     118UnicodeSet::interval_t UnicodeSet::back() const {
     119    auto back = begin();
     120    for (auto i = back; i != end(); back = i++);
     121    return *back;
    92122}
    93123
     
    322352}
    323353
     354///** ------------------------------------------------------------------------------------------------------------- *
     355// * @brief insert_range
     356// ** ------------------------------------------------------------------------------------------------------------- */
     357//void UnicodeSet::insert_range(const codepoint_t lo, const codepoint_t hi)  {
     358
     359//    if (LLVM_UNLIKELY(lo > hi)) {
     360//        throw std::runtime_error('[' + std::to_string(lo) + ',' + std::to_string(hi) + "] is an illegal codepoint range!");
     361//    } else if (LLVM_UNLIKELY(hi >= 0x110000)) {
     362//        throw std::runtime_error(std::to_string(hi) + " exceeds maximum code point.");
     363//    }
     364
     365//    auto r = mRuns.begin();
     366//    auto q = mQuads.begin();
     367//    unsigned offset = 0;
     368
     369//    auto lo_quad_no = lo / QUAD_BITS;
     370//    auto lo_offset = lo & MOD_QUAD_BIT_MASK;
     371
     372//    auto hi_quad_no = hi / QUAD_BITS;
     373//    auto hi_offset = hi & MOD_QUAD_BIT_MASK;
     374
     375//    // Scan up to the lo codepoint
     376//    for (;;) {
     377//        assert (r != mRuns.end());
     378//        const auto l = lengthOf(*r);
     379//        if ((offset + l) > lo_quad_no) {
     380//            break;
     381//        }
     382//        if (typeOf(*r) == Mixed) {
     383//            q += lengthOf(*r);
     384//        }
     385//        offset += l;
     386//        ++r;
     387//    }
     388
     389//    // Test whether the range is already 'full' and skip ahead to the first empty or mixed quad.
     390//    // If the entire [lo,hi] range is already covered by a Full run, abort.
     391//    while (typeOf(*r) == Full) {
     392//        const auto l = lengthOf(*r);
     393//        lo_quad_no += l;
     394//        offset = lo_quad_no;
     395//        lo_offset = 0;
     396//        if (lo_quad_no > hi_quad_no) {
     397//            return;
     398//        }
     399//        ++r;
     400//    }
     401
     402//    // Otherwise, some portion of this range has to be inserted into the current sparse set.
     403//    // Begin by inserting the initial (potentially) partial lo quad.
     404//    const bitquad_t lo_quad = (FULL_QUAD_MASK << lo_offset);
     405//    const bitquad_t hi_quad = (FULL_QUAD_MASK >> (QUAD_BITS - 1 - hi_offset));
     406//    bitquad_t quad = (lo_quad_no == hi_quad_no) ? (lo_quad & hi_quad) : lo_quad;
     407//    run_type_t newType = (quad == FULL_QUAD_MASK) ? Full : ((quad == 0) ? Empty : Mixed);
     408//    run_type_t runType = typeOf(*r);
     409//    // If the original run is Mixed, we may be able to simply update the quad accordingly.
     410//    if (runType == Mixed) {
     411//        q += (lo_quad_no - offset);
     412//        quad |= *q;
     413//        if (LLVM_LIKELY(quad != FULL_QUAD_MASK)) {
     414//            *q = quad;
     415//            if (lo_quad_no == hi_quad_no) {
     416//                return;
     417//            }
     418//        } else { // we filled a Mixed quad
     419//            mQuads.erase(q);
     420//        }
     421//        newType = Full;
     422//    }
     423//    auto length = lengthOf(*r);
     424//    auto splitAt = length - (lo_quad_no - offset) - 1;
     425//    if (splitAt) {
     426//        // reduce the original run length
     427//        lengthOf(*r) = splitAt;
     428//        // and add in a new quad
     429//        r = mRuns.emplace(r, newType, 1);
     430//    } else { // we're inserting this quad at the beginning of the run
     431//        typeOf(*r) = newType;
     432//        lengthOf(*r) = 1;
     433//    }
     434//    if (newType == Mixed) {
     435//        q = mQuads.emplace(q, quad);
     436//    }
     437//    length -= splitAt + 1;
     438//    auto remaining = (hi_quad_no - lo_quad_no);
     439//    // We're inserting a Full run so if the original run type was Full and exceeds the
     440//    // length of what we're inserting, we can abort without considering the hi_quad
     441//    if (runType == Full && length > remaining) {
     442//        return;
     443//    }
     444//    if (remaining) {
     445//        r = mRuns.emplace(r, Full, remaining);
     446
     447
     448
     449//    }
     450
     451//}
     452
     453
     454
    324455/** ------------------------------------------------------------------------------------------------------------- *
    325456 * @brief contains
     
    356487bool UnicodeSet::intersects(const codepoint_t lo, const codepoint_t hi) const {
    357488    for (auto range : *this) {
    358         if (hi_codepoint(range) < lo) {
     489        if (range.second < lo) {
    359490            continue;
    360491        }
    361         if (lo_codepoint(range) > hi) {
     492        if (range.first > hi) {
    362493            break;
    363494        }
     
    394525void UnicodeSet::iterator::advance(const unsigned n) {
    395526
    396     assert (n == 1);   
     527    assert (n == 1);
    397528
    398529    if (LLVM_UNLIKELY(mMinCodePoint >= 0x110000)) {
     
    414545                break;
    415546            }
    416         }
    417         else { // if (typeOf(t) == Mixed)
     547        } else { // if (typeOf(t) == Mixed)
    418548            while (mMixedRunIndex != lengthOf(*mRunIterator)) {
    419549                const bitquad_t m = (*mQuadIterator) & (FULL_QUAD_MASK << mQuadOffset);
     
    461591                break;
    462592            }
    463         }
    464         else { // if (typeOf(t) == Mixed)
     593        } else { // if (typeOf(t) == Mixed)
    465594            while (mMixedRunIndex != lengthOf(*mRunIterator)) {
    466595                const bitquad_t m = ((~(*mQuadIterator)) & FULL_QUAD_MASK) & (FULL_QUAD_MASK << mQuadOffset);
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.h

    r4631 r4812  
    33#include <stdint.h>
    44#include <vector>
    5 #include <re/re_cc.h>
    65#include <boost/iterator/iterator_facade.hpp>
    76
     
    4645    using quad_iterator_return_t = std::pair<run_t, bitquad_t>;
    4746
    48     using codepoint_t = re::codepoint_t;
    49     using interval_t = re::interval_t;
     47    using codepoint_t = unsigned;
     48    using interval_t = std::pair<codepoint_t, codepoint_t>;
     49
    5050    using RunVector = std::vector<run_t>;
    5151    using QuadVector = std::vector<bitquad_t>;
     52
     53    using size_type = RunVector::size_type;
    5254
    5355    class iterator : public boost::iterator_facade<iterator, interval_t, boost::forward_traversal_tag, interval_t> {
     
    112114
    113115        inline run_type_t type() const {
    114             return std::get<0>(*mRunIterator);
     116            return mRunIterator->first;
    115117        }
    116118
    117119        inline length_t length() const {
    118             return std::get<1>(*mRunIterator) - mOffset;
     120            return mRunIterator->second - mOffset;
    119121        }
    120122
     
    145147    bool intersects(const codepoint_t lo, const codepoint_t hi) const;
    146148
     149    inline void insert(const codepoint_t cp) {
     150        *this = std::move(*this + UnicodeSet(cp));
     151    }
     152
     153    inline void insert_range(const codepoint_t lo, const codepoint_t hi) {
     154        *this = std::move(*this + UnicodeSet(lo, hi));
     155    }
     156
     157    bool empty() const;
     158
     159    size_type size() const;
     160
     161    interval_t front() const;
     162
     163    interval_t back() const;
     164
    147165    void dump(llvm::raw_ostream & out) const;
    148166
     
    152170    UnicodeSet operator-(const UnicodeSet & other) const;
    153171    UnicodeSet operator^(const UnicodeSet & other) const;
     172
    154173    inline UnicodeSet & operator=(const UnicodeSet & other) = default;
    155174    inline UnicodeSet & operator=(UnicodeSet && other) = default;
     
    172191};
    173192
     193enum : UnicodeSet::codepoint_t { UNICODE_MAX = 0x10FFFF };
     194
    174195inline void UnicodeSet::swap(UnicodeSet & other) {
    175196    mRuns.swap(other.mRuns); mQuads.swap(other.mQuads);
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.cpp

    r4797 r4812  
    102102        }
    103103        if (combine) {
    104             auto i = cc->cbegin();
    105             for (auto j = i; ++j != cc->cend(); i = j) {
     104            auto i = cc->begin(), e = cc->end();
     105            for (auto j = i; ++j != e; i = j) {
    106106                if ((lo_codepoint(i) + 2) != lo_codepoint(j)) {
    107107                    combine  = false;
  • icGREP/icgrep-devel/icgrep/re/re_cc.cpp

    r4797 r4812  
    1111
    1212namespace re {
    13 CC::IntervalAllocator CC::mCharSetAllocator;
    1413
    1514CC::CC(const CC * cc1, const CC * cc2)
    1615: RE(ClassTypeId::CC)
    17 , mSparseCharSet(cc1->cbegin(), cc1->cend(), mCharSetAllocator) {
    18     for (const interval_t & i : cc2->mSparseCharSet) {
    19         insert_range(lo_codepoint(i), hi_codepoint(i));
    20     }
     16, mSparseCharSet(std::move(cc1->mSparseCharSet + cc2->mSparseCharSet)) {
     17
    2118}
    2219
    2320CC::CC(const CC & cc)
    2421: RE(ClassTypeId::CC)
    25 , mSparseCharSet(cc.cbegin(), cc.cend(), mCharSetAllocator) {
     22, mSparseCharSet(cc.mSparseCharSet) {
    2623
    2724}
     
    4946}
    5047
    51 void CC::insert_range(const codepoint_t lo, const codepoint_t hi) {
    52     for (auto i = mSparseCharSet.begin(); i != mSparseCharSet.end(); ) {
    53         if (hi < lo_codepoint(i) - 1) {
    54             mSparseCharSet.emplace(i, lo, hi);
    55             return;
    56         } else if (lo > hi_codepoint(i) + 1) {
    57             ++i;
    58         } else {
    59             // ranges overlap; expand the range to include the overlapp
    60             lo_codepoint(i) = std::min(lo_codepoint(i), lo);
    61             hi_codepoint(i) = std::max(hi_codepoint(i), hi);
    62             // Test whether the new hi code point of this range touches the subsequent
    63             // interval. If so extend it over that one and remove it from the list.
    64             for (auto j = i + 1; j != mSparseCharSet.end(); ) {
    65                 if (LLVM_LIKELY(hi_codepoint(i) + 1 < lo_codepoint(j))) {
    66                     break;
    67                 }
    68                 hi_codepoint(i) = std::max(hi_codepoint(i), hi_codepoint(j));
    69                 j = mSparseCharSet.erase(j);
    70             }
    71             return;
    72         }
    73     }
    74     mSparseCharSet.emplace_back(lo, hi);
    75 }
    76 
    77 void CC::remove_range(const codepoint_t lo, const codepoint_t hi) {
    78     for (auto i = mSparseCharSet.begin(); i != mSparseCharSet.end(); ) {
    79         if (lo > hi_codepoint(i) + 1) {
    80             ++i;
    81         }
    82         else if (hi < lo_codepoint(i) - 1) {
    83             break;
    84         }
    85         else if (lo <= lo_codepoint(i) && hi >= hi_codepoint(i)) {
    86             i = mSparseCharSet.erase(i);
    87         }
    88         else if (lo <= lo_codepoint(i)) {
    89             lo_codepoint(i) = hi + 1;
    90             break;
    91         }
    92         else if (hi >= hi_codepoint(i)) {
    93             hi_codepoint(i) = lo - 1;
    94             ++i;
    95         }
    96         else {         
    97             mSparseCharSet.emplace(++i, hi + 1, hi_codepoint(i));
    98             hi_codepoint(i) = lo - 1;
    99             break;
    100         }
    101     }
    102 }
    103 
    10448CC * subtractCC(const CC * a, const CC * b) {
    105     CC * diff = makeCC();
    106     auto i = a->cbegin();
    107     const auto i_end = a->cend();
    108     auto j = b->cbegin();
    109     const auto j_end = b->cend();
    110     while (i != i_end && j != j_end) {
    111         if (hi_codepoint(j) < lo_codepoint(i)) {
    112             ++j;
    113         }
    114         else { // test whether the intervals overlap
    115             if (lo_codepoint(i) < lo_codepoint(j)) {
    116                 diff->insert_range(lo_codepoint(i), std::min(lo_codepoint(j) - 1, hi_codepoint(i)));
    117             }
    118             if (hi_codepoint(i) > hi_codepoint(j)) {
    119                 diff->insert_range(std::max(hi_codepoint(j) + 1, lo_codepoint(i)), hi_codepoint(i));
    120             }
    121             ++i;
    122         }
    123     }
    124     for (; i != i_end; ++i) {
    125         diff->insert_range(lo_codepoint(i), hi_codepoint(i));
    126     }
    127     return diff;
     49    return makeCC(a->mSparseCharSet - b->mSparseCharSet);
    12850}
    12951   
    13052CC * intersectCC(const CC * a, const CC * b) {
    131     CC * isect = makeCC();
    132     auto ai = a->cbegin();
    133     const auto ai_end = a->cend();
    134     auto bi = b->cbegin();
    135     const auto bi_end = b->cend();
    136     while (ai != ai_end && bi != bi_end) {
    137         if (hi_codepoint(ai) < lo_codepoint(bi)) {
    138             ++ai;
    139         }
    140         else if (hi_codepoint(bi) < lo_codepoint(ai)) {
    141             ++bi;
    142         }
    143         else {
    144             isect->insert_range(std::max(lo_codepoint(ai), lo_codepoint(bi)), std::min(hi_codepoint(ai), hi_codepoint(bi)));
    145             if (hi_codepoint(ai) < hi_codepoint(bi)) {
    146                 ++ai;
    147             }
    148             else {
    149                 ++bi;
    150             }
    151         }
    152     }
    153     return isect;
     53    return makeCC(a->mSparseCharSet & b->mSparseCharSet);
    15454}
    15555   
  • icGREP/icgrep-devel/icgrep/re/re_cc.h

    r4621 r4812  
    1212#include <string>
    1313#include <vector>
     14#include <UCD/unicode_set.h>
    1415#include <slab_allocator.h>
    1516
    1617namespace re {
    1718
    18 using codepoint_t = unsigned;
    19 using interval_t = std::pair<codepoint_t, codepoint_t>;
     19using codepoint_t = UCD::UnicodeSet::codepoint_t;
     20using interval_t = UCD::UnicodeSet::interval_t;
    2021
    2122enum CC_type {UnicodeClass, ByteClass};
     
    3132    }
    3233
    33     using IntervalAllocator = SlabAllocator<interval_t>;
    34     using IntervalVector = std::vector<interval_t, IntervalAllocator>;
    35 
    36     using iterator = IntervalVector::iterator;
    37     using const_iterator = IntervalVector::const_iterator;
    38     using size_type = IntervalVector::size_type;
    39     using reference = IntervalVector::reference;
    40     using const_reference = IntervalVector::const_reference;
    41 
    42     static const codepoint_t UNICODE_MAX = 0x10FFFF;
     34    using iterator = UCD::UnicodeSet::iterator;
     35    using size_type = UCD::UnicodeSet::size_type;
    4336
    4437    std::string canonicalName(const CC_type type) const;
    4538
    46     interval_t & operator [](unsigned i) {
    47         return mSparseCharSet[i];
    48     }
    49 
    50     const interval_t & operator [](unsigned i) const {
    51         return mSparseCharSet[i];
    52     }
    53 
    5439    inline codepoint_t min_codepoint() const {
    55         return empty() ? 0 : std::get<0>(front());
     40        return mSparseCharSet.front().first;
    5641    }
    5742
    5843    inline codepoint_t max_codepoint() const {
    59         return empty() ? 0 : std::get<1>(back());
     44        return mSparseCharSet.back().second;
    6045    }
    6146
    62     void insert_range(const codepoint_t lo, const codepoint_t hi);
    63 
    64     void remove_range(const codepoint_t lo, const codepoint_t hi);
     47    void insert_range(const codepoint_t lo, const codepoint_t hi) {
     48        mSparseCharSet.insert_range(lo, hi);
     49    }
    6550
    6651    inline void insert(const codepoint_t codepoint) {
    67         insert_range(codepoint, codepoint);
     52        mSparseCharSet.insert(codepoint);
    6853    }
    6954
    70     inline void remove(const codepoint_t codepoint) {
    71         remove_range(codepoint, codepoint);
    72     }
    73 
    74     inline iterator begin() {
     55    inline iterator begin() const {
    7556        return mSparseCharSet.begin();
    7657    }
    7758
    78     inline iterator end() {
     59    inline iterator end() const {
    7960        return mSparseCharSet.end();
    8061    }
    8162
    82     inline reference front() {
     63    inline interval_t front() const {
    8364        return mSparseCharSet.front();
    8465    }
    8566
    86     inline reference back() {
    87         return mSparseCharSet.back();
    88     }
    89 
    90     inline const_iterator begin() const {
    91         return mSparseCharSet.cbegin();
    92     }
    93 
    94     inline const_iterator end() const {
    95         return mSparseCharSet.cend();
    96     }
    97 
    98     inline const_iterator cbegin() const {
    99         return mSparseCharSet.cbegin();
    100     }
    101 
    102     inline const_iterator cend() const {
    103         return mSparseCharSet.cend();
    104     }
    105 
    106     inline const_reference front() const {
    107         return mSparseCharSet.front();
    108     }
    109 
    110     inline const_reference back() const {
     67    inline interval_t back() const {
    11168        return mSparseCharSet.back();
    11269    }
     
    12986    friend CC * makeCC(const std::initializer_list<interval_t> list);
    13087    friend CC * makeCC(const std::vector<interval_t> & list);
    131     friend CC * subtractCC(const CC * cc1, const CC * cc2);
     88    friend CC * makeCC(UCD::UnicodeSet && set);
     89    friend CC * subtractCC(const CC * a, const CC * b);
     90    friend CC * intersectCC(const CC * a, const CC * b);
     91    friend CC * caseInsensitize(const CC * a, const CC * b);
    13292
    13393    inline CC()
    13494    : RE(ClassTypeId::CC)
    135     , mSparseCharSet(mCharSetAllocator) {
     95    , mSparseCharSet() {
    13696
    13797    }
     
    13999    inline CC(const codepoint_t codepoint)
    140100    : RE(ClassTypeId::CC)
    141     , mSparseCharSet(mCharSetAllocator) {
    142         insert(codepoint);
     101    , mSparseCharSet(codepoint) {
     102
    143103    }
    144104    inline CC(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint)
    145105    : RE(ClassTypeId::CC)
    146     , mSparseCharSet(mCharSetAllocator) {
    147         insert_range(lo_codepoint, hi_codepoint);
     106    , mSparseCharSet(lo_codepoint, hi_codepoint) {
     107
    148108    }
    149109    CC(const CC * cc1, const CC * cc2);
     110
     111    inline CC(UCD::UnicodeSet && set)
     112    : RE(ClassTypeId::CC)
     113    , mSparseCharSet(std::move(set)) {
     114
     115    }
    150116
    151117    template <typename itr>
    152118    CC * initialize(itr begin, itr end);
    153119private:   
    154     IntervalVector mSparseCharSet;
    155     static IntervalAllocator mCharSetAllocator;
     120    UCD::UnicodeSet mSparseCharSet;
    156121};
    157122
    158 inline static CC::iterator begin(CC & cc) {
     123inline static CC::iterator begin(const CC & cc) {
    159124    return cc.begin();
    160125}
    161126
    162 inline static CC::iterator end(CC & cc) {
     127inline static CC::iterator end(const CC & cc) {
    163128    return cc.end();
    164129}
    165130
    166 inline static CC::const_iterator begin(const CC & cc) {
    167     return cc.cbegin();
    168 }
    169 
    170 inline static CC::const_iterator end(const CC & cc) {
    171     return cc.cend();
    172 }
    173 
    174 inline codepoint_t & lo_codepoint(interval_t & i) {
    175     return std::get<0>(i);
    176 }
    177131inline codepoint_t lo_codepoint(const interval_t & i) {
    178132    return std::get<0>(i);
    179133}
    180 inline codepoint_t & lo_codepoint(const CC::iterator i) {
    181     return lo_codepoint(*i);
    182 }
    183 inline codepoint_t lo_codepoint(const CC::const_iterator i) {
     134inline codepoint_t lo_codepoint(const CC::iterator i) {
    184135    return lo_codepoint(*i);
    185136}
    186137
    187 inline codepoint_t & hi_codepoint(interval_t & i) {
    188     return std::get<1>(i);
    189 }
    190138inline codepoint_t hi_codepoint(const interval_t & i) {
    191139    return std::get<1>(i);
    192140}
    193 inline codepoint_t & hi_codepoint(const CC::iterator i) {
    194     return hi_codepoint(*i);
    195 }
    196 inline codepoint_t hi_codepoint(const CC::const_iterator i) {
     141inline codepoint_t hi_codepoint(const CC::iterator i) {
    197142    return hi_codepoint(*i);
    198143}
     
    200145template<typename itr>
    201146CC * CC::initialize(itr begin, itr end) {
    202     mSparseCharSet.resize(std::distance(begin, end));
    203147    for (auto i = begin; i != end; ++i) {
    204         assert (i == begin || lo_codepoint(*i) > max_codepoint());
    205         mSparseCharSet[std::distance(begin, i)] = *i;
     148        mSparseCharSet.insert_range(i->first, i->second);
    206149    }
    207150    return this;
    208151}
    209 
    210152
    211153/**
     
    241183}
    242184
     185inline CC * makeCC(UCD::UnicodeSet && set) {
     186    return makeCC(std::move(set));
     187}
     188
    243189CC * subtractCC(const CC * a, const CC * b);
    244190   
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r4809 r4812  
    475475    // It is an error if a 4-byte sequence is used to encode a codepoint
    476476    // above the Unicode maximum.
    477     if (cp > CC::UNICODE_MAX) {
     477    if (cp > UCD::UNICODE_MAX) {
    478478        throw InvalidUTF8Encoding();
    479479    }
     
    929929    }
    930930    if (count < mindigits) throw ParseFailure("Octal sequence has too few digits");
    931     if (value > CC::UNICODE_MAX) throw ParseFailure("Octal value too large");
     931    if (value > UCD::UNICODE_MAX) throw ParseFailure("Octal value too large");
    932932    return value;
    933933}
     
    948948    }
    949949    if (count < mindigits) throw ParseFailure("Hexadecimal sequence has too few digits");
    950     if (value > CC::UNICODE_MAX) throw ParseFailure("Hexadecimal value too large");
     950    if (value > UCD::UNICODE_MAX) throw ParseFailure("Hexadecimal value too large");
    951951    return value;
    952952}
     
    966966    if (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) {
    967967        caseInsensitiveInsert(cc, cp);
    968     }
    969     else cc->insert(cp);
     968    } else {
     969        cc->insert(cp);
     970    }
    970971}
    971972
     
    973974    if (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) {
    974975        caseInsensitiveInsertRange(cc, lo, hi);
    975     }
    976     else cc->insert_range(lo, hi);
     976    } else {
     977        cc->insert_range(lo, hi);
     978    }
    977979}
    978980
Note: See TracChangeset for help on using the changeset viewer.