Ignore:
Timestamp:
Oct 1, 2015, 2:54:17 PM (4 years ago)
Author:
nmedfort
Message:

Embedded UnicodeSet? into CC objects (will currently cause memory leak)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_cc.h

    r4621 r4812  
    1212#include <string>
    1313#include <vector>
     14#include <UCD/unicode_set.h>
    1415#include <slab_allocator.h>
    1516
    1617namespace re {
    1718
    18 using codepoint_t = unsigned;
    19 using interval_t = std::pair<codepoint_t, codepoint_t>;
     19using codepoint_t = UCD::UnicodeSet::codepoint_t;
     20using interval_t = UCD::UnicodeSet::interval_t;
    2021
    2122enum CC_type {UnicodeClass, ByteClass};
     
    3132    }
    3233
    33     using IntervalAllocator = SlabAllocator<interval_t>;
    34     using IntervalVector = std::vector<interval_t, IntervalAllocator>;
    35 
    36     using iterator = IntervalVector::iterator;
    37     using const_iterator = IntervalVector::const_iterator;
    38     using size_type = IntervalVector::size_type;
    39     using reference = IntervalVector::reference;
    40     using const_reference = IntervalVector::const_reference;
    41 
    42     static const codepoint_t UNICODE_MAX = 0x10FFFF;
     34    using iterator = UCD::UnicodeSet::iterator;
     35    using size_type = UCD::UnicodeSet::size_type;
    4336
    4437    std::string canonicalName(const CC_type type) const;
    4538
    46     interval_t & operator [](unsigned i) {
    47         return mSparseCharSet[i];
    48     }
    49 
    50     const interval_t & operator [](unsigned i) const {
    51         return mSparseCharSet[i];
    52     }
    53 
    5439    inline codepoint_t min_codepoint() const {
    55         return empty() ? 0 : std::get<0>(front());
     40        return mSparseCharSet.front().first;
    5641    }
    5742
    5843    inline codepoint_t max_codepoint() const {
    59         return empty() ? 0 : std::get<1>(back());
     44        return mSparseCharSet.back().second;
    6045    }
    6146
    62     void insert_range(const codepoint_t lo, const codepoint_t hi);
    63 
    64     void remove_range(const codepoint_t lo, const codepoint_t hi);
     47    void insert_range(const codepoint_t lo, const codepoint_t hi) {
     48        mSparseCharSet.insert_range(lo, hi);
     49    }
    6550
    6651    inline void insert(const codepoint_t codepoint) {
    67         insert_range(codepoint, codepoint);
     52        mSparseCharSet.insert(codepoint);
    6853    }
    6954
    70     inline void remove(const codepoint_t codepoint) {
    71         remove_range(codepoint, codepoint);
    72     }
    73 
    74     inline iterator begin() {
     55    inline iterator begin() const {
    7556        return mSparseCharSet.begin();
    7657    }
    7758
    78     inline iterator end() {
     59    inline iterator end() const {
    7960        return mSparseCharSet.end();
    8061    }
    8162
    82     inline reference front() {
     63    inline interval_t front() const {
    8364        return mSparseCharSet.front();
    8465    }
    8566
    86     inline reference back() {
    87         return mSparseCharSet.back();
    88     }
    89 
    90     inline const_iterator begin() const {
    91         return mSparseCharSet.cbegin();
    92     }
    93 
    94     inline const_iterator end() const {
    95         return mSparseCharSet.cend();
    96     }
    97 
    98     inline const_iterator cbegin() const {
    99         return mSparseCharSet.cbegin();
    100     }
    101 
    102     inline const_iterator cend() const {
    103         return mSparseCharSet.cend();
    104     }
    105 
    106     inline const_reference front() const {
    107         return mSparseCharSet.front();
    108     }
    109 
    110     inline const_reference back() const {
     67    inline interval_t back() const {
    11168        return mSparseCharSet.back();
    11269    }
     
    12986    friend CC * makeCC(const std::initializer_list<interval_t> list);
    13087    friend CC * makeCC(const std::vector<interval_t> & list);
    131     friend CC * subtractCC(const CC * cc1, const CC * cc2);
     88    friend CC * makeCC(UCD::UnicodeSet && set);
     89    friend CC * subtractCC(const CC * a, const CC * b);
     90    friend CC * intersectCC(const CC * a, const CC * b);
     91    friend CC * caseInsensitize(const CC * a, const CC * b);
    13292
    13393    inline CC()
    13494    : RE(ClassTypeId::CC)
    135     , mSparseCharSet(mCharSetAllocator) {
     95    , mSparseCharSet() {
    13696
    13797    }
     
    13999    inline CC(const codepoint_t codepoint)
    140100    : RE(ClassTypeId::CC)
    141     , mSparseCharSet(mCharSetAllocator) {
    142         insert(codepoint);
     101    , mSparseCharSet(codepoint) {
     102
    143103    }
    144104    inline CC(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint)
    145105    : RE(ClassTypeId::CC)
    146     , mSparseCharSet(mCharSetAllocator) {
    147         insert_range(lo_codepoint, hi_codepoint);
     106    , mSparseCharSet(lo_codepoint, hi_codepoint) {
     107
    148108    }
    149109    CC(const CC * cc1, const CC * cc2);
     110
     111    inline CC(UCD::UnicodeSet && set)
     112    : RE(ClassTypeId::CC)
     113    , mSparseCharSet(std::move(set)) {
     114
     115    }
    150116
    151117    template <typename itr>
    152118    CC * initialize(itr begin, itr end);
    153119private:   
    154     IntervalVector mSparseCharSet;
    155     static IntervalAllocator mCharSetAllocator;
     120    UCD::UnicodeSet mSparseCharSet;
    156121};
    157122
    158 inline static CC::iterator begin(CC & cc) {
     123inline static CC::iterator begin(const CC & cc) {
    159124    return cc.begin();
    160125}
    161126
    162 inline static CC::iterator end(CC & cc) {
     127inline static CC::iterator end(const CC & cc) {
    163128    return cc.end();
    164129}
    165130
    166 inline static CC::const_iterator begin(const CC & cc) {
    167     return cc.cbegin();
    168 }
    169 
    170 inline static CC::const_iterator end(const CC & cc) {
    171     return cc.cend();
    172 }
    173 
    174 inline codepoint_t & lo_codepoint(interval_t & i) {
    175     return std::get<0>(i);
    176 }
    177131inline codepoint_t lo_codepoint(const interval_t & i) {
    178132    return std::get<0>(i);
    179133}
    180 inline codepoint_t & lo_codepoint(const CC::iterator i) {
    181     return lo_codepoint(*i);
    182 }
    183 inline codepoint_t lo_codepoint(const CC::const_iterator i) {
     134inline codepoint_t lo_codepoint(const CC::iterator i) {
    184135    return lo_codepoint(*i);
    185136}
    186137
    187 inline codepoint_t & hi_codepoint(interval_t & i) {
    188     return std::get<1>(i);
    189 }
    190138inline codepoint_t hi_codepoint(const interval_t & i) {
    191139    return std::get<1>(i);
    192140}
    193 inline codepoint_t & hi_codepoint(const CC::iterator i) {
    194     return hi_codepoint(*i);
    195 }
    196 inline codepoint_t hi_codepoint(const CC::const_iterator i) {
     141inline codepoint_t hi_codepoint(const CC::iterator i) {
    197142    return hi_codepoint(*i);
    198143}
     
    200145template<typename itr>
    201146CC * CC::initialize(itr begin, itr end) {
    202     mSparseCharSet.resize(std::distance(begin, end));
    203147    for (auto i = begin; i != end; ++i) {
    204         assert (i == begin || lo_codepoint(*i) > max_codepoint());
    205         mSparseCharSet[std::distance(begin, i)] = *i;
     148        mSparseCharSet.insert_range(i->first, i->second);
    206149    }
    207150    return this;
    208151}
    209 
    210152
    211153/**
     
    241183}
    242184
     185inline CC * makeCC(UCD::UnicodeSet && set) {
     186    return makeCC(std::move(set));
     187}
     188
    243189CC * subtractCC(const CC * a, const CC * b);
    244190   
Note: See TracChangeset for help on using the changeset viewer.