Ignore:
Timestamp:
Nov 28, 2017, 1:45:19 AM (23 months ago)
Author:
nmedfort
Message:

Bug fix for segment pipeline parallel mode + memory management improvements.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.h

    r5742 r5748  
    11#ifndef UNICODE_SET_H
    22#define UNICODE_SET_H
     3
     4#include "UCD_Config.h"
    35#include <stdint.h>
    46#include <vector>
     
    3032//
    3133
    32 namespace llvm {
    33 class raw_ostream;
    34 }
     34namespace llvm { class raw_ostream; }
     35namespace re { class RE; }
    3536
    3637namespace UCD {
    3738
    38 typedef unsigned codepoint_t;
    39 enum : codepoint_t { UNICODE_MAX = 0x10FFFF };
    40 
    4139enum run_type_t : uint16_t {Empty, Mixed, Full};
    4240
    4341class UnicodeSet {
     42    friend class re::RE;
     43    template<typename RunVector, typename QuadVector> friend void assign(UnicodeSet *, const RunVector &, const QuadVector &) noexcept;
    4444public:
    4545
    4646    using bitquad_t = uint32_t;
    4747    using length_t = uint16_t;
     48    using size_type = size_t;
     49
    4850    using run_t = std::pair<run_type_t, length_t>;
    4951    using quad_iterator_return_t = std::pair<run_t, bitquad_t>;
    50 
    51     using interval_t = std::pair<codepoint_t, codepoint_t>;
    52 
    53     using RunVector = std::vector<run_t, ProxyAllocator<run_t>>;
    54     using QuadVector = std::vector<bitquad_t, ProxyAllocator<bitquad_t>>;
    55     using RunIterator = RunVector::const_iterator;
    56     using QuadIterator = QuadVector::const_iterator;
    57 
    58     using size_type = RunVector::size_type;
    5952
    6053    class iterator : public boost::iterator_facade<iterator, interval_t, boost::forward_traversal_tag, interval_t> {
     
    6356    protected:
    6457
    65         iterator(const RunVector::const_iterator runIterator, const QuadVector::const_iterator quadIterator, const codepoint_t baseCodePoint)
     58        iterator(const run_t * const runIterator, const bitquad_t * const quadIterator, const codepoint_t baseCodePoint)
    6659        : mRunIterator(runIterator), mQuadIterator(quadIterator)
    6760        , mMixedRunIndex(0), mQuadOffset(0), mBaseCodePoint(baseCodePoint), mMinCodePoint(baseCodePoint), mMaxCodePoint(baseCodePoint) {
     
    8376        }
    8477    private:
    85         RunIterator         mRunIterator;
    86         QuadIterator        mQuadIterator;
     78        const run_t *       mRunIterator;
     79        const bitquad_t *   mQuadIterator;
    8780        unsigned            mMixedRunIndex;
    8881        bitquad_t           mQuadOffset;
     
    9487    inline iterator begin() const {
    9588        // note: preincrement forces the iterator to advance onto and capture the first interval.
    96         return ++iterator(mRuns.cbegin(), mQuads.cbegin(), 0);
     89        return ++iterator(mRuns, mQuads, 0);
    9790    }
    9891
    9992    inline iterator end() const {
    100         return iterator(mRuns.cend(), mQuads.cend(), UNICODE_MAX+1);
    101     }
    102 
    103     bool empty() const; // The set has no members
    104    
    105     bool full() const;  // The set has the full set of possible Unicode codepoints.
     93        return iterator(mRuns, mQuads, UNICODE_MAX+1);
     94    }
     95
     96    bool empty() const { // The set has no members
     97        return (mRunLength == 1) && mRuns->first == Empty;
     98    }
     99
     100    bool full() const {  // The set has the full set of possible Unicode codepoints.
     101        return (mRunLength == 1) && mRuns->first == Full;
     102    }
    106103   
    107104    codepoint_t at(const size_type k) const; // return the k-th codepoint (or throw an error if it doesn't exist)
    108105
    109     bool contains(const codepoint_t codepoint) const;
    110 
    111     bool intersects(const codepoint_t lo, const codepoint_t hi) const;
     106    bool contains(const codepoint_t codepoint) const noexcept;
     107
     108    bool intersects(const codepoint_t lo, const codepoint_t hi) const noexcept;
    112109   
    113     bool intersects(const UnicodeSet & other) const;
    114 
    115     bool subset(const UnicodeSet & other) const;
     110    bool intersects(const UnicodeSet & other) const noexcept;
     111
     112    bool subset(const UnicodeSet & other) const noexcept;
    116113   
    117114    void insert(const codepoint_t cp);
    118115
     116    void insert(const UnicodeSet & other) noexcept;
     117
     118    void invert() noexcept;
     119
    119120    void insert_range(const codepoint_t lo, const codepoint_t hi);
    120121
    121     size_type size() const; // number of intervals in this set
    122 
    123     size_type count() const; // number of codepoints in this set
    124 
    125     interval_t front() const;
    126 
    127     interval_t back() const;
    128 
    129     void print(llvm::raw_ostream & out) const;
    130 
    131     void dump(llvm::raw_ostream & out) const;
    132 
    133     UnicodeSet operator~() const;
    134     UnicodeSet operator&(const UnicodeSet & other) const;
    135     UnicodeSet operator+(const UnicodeSet & other) const;
    136     UnicodeSet operator-(const UnicodeSet & other) const;
    137     UnicodeSet operator^(const UnicodeSet & other) const;
    138 
    139     inline UnicodeSet & operator=(const UnicodeSet & other) = default;
    140     inline UnicodeSet & operator=(UnicodeSet && other) = default;
    141     bool operator==(const UnicodeSet & other) const;
    142     bool operator<(const UnicodeSet & other) const;
    143 
    144     UnicodeSet(run_type_t emptyOrFull = Empty, ProxyAllocator<> allocator = GlobalAllocator);
    145     UnicodeSet(const codepoint_t codepoint, ProxyAllocator<> allocator = GlobalAllocator);
    146     UnicodeSet(const codepoint_t lo, const codepoint_t hi, ProxyAllocator<> allocator = GlobalAllocator);
    147     UnicodeSet(const UnicodeSet & other, ProxyAllocator<> allocator = GlobalAllocator);
    148     UnicodeSet(std::initializer_list<run_t> r, std::initializer_list<bitquad_t> q, ProxyAllocator<> allocator = GlobalAllocator);
    149     UnicodeSet(std::initializer_list<interval_t>::iterator begin, std::initializer_list<interval_t>::iterator end, ProxyAllocator<> allocator = GlobalAllocator);
    150     UnicodeSet(const std::vector<interval_t>::iterator begin, const std::vector<interval_t>::iterator end, ProxyAllocator<> allocator = GlobalAllocator);
    151    
    152     inline void swap(UnicodeSet & other);
    153     inline void swap(UnicodeSet && other);
     122    size_type size() const noexcept; // number of intervals in this set
     123
     124    size_type count() const noexcept; // number of codepoints in this set
     125
     126    interval_t front() const noexcept;
     127
     128    interval_t back() const noexcept;
     129
     130    void print(llvm::raw_ostream & out) const noexcept;
     131
     132    void dump(llvm::raw_ostream & out) const noexcept;
     133
     134    UnicodeSet operator~() const noexcept;
     135    UnicodeSet operator&(const UnicodeSet & other) const noexcept;
     136    UnicodeSet operator+(const UnicodeSet & other) const noexcept;
     137    UnicodeSet operator-(const UnicodeSet & other) const noexcept;
     138    UnicodeSet operator^(const UnicodeSet & other) const noexcept;
     139
     140    UnicodeSet & operator=(const UnicodeSet & other) noexcept;
     141    UnicodeSet & operator=(const UnicodeSet && other) noexcept;
     142    bool operator==(const UnicodeSet & other) const noexcept;
     143    bool operator<(const UnicodeSet & other) const noexcept;
     144
     145    UnicodeSet() noexcept;
     146    UnicodeSet(const codepoint_t codepoint) noexcept;
     147    UnicodeSet(const codepoint_t lo, const codepoint_t hi) noexcept;
     148    UnicodeSet(const UnicodeSet & other) noexcept;
     149    UnicodeSet(const UnicodeSet && other) noexcept;
     150
     151    UnicodeSet(const std::vector<interval_t>::iterator begin, const std::vector<interval_t>::iterator end) noexcept;
     152    UnicodeSet(std::initializer_list<interval_t>::iterator begin, std::initializer_list<interval_t>::iterator end) noexcept;
     153    UnicodeSet(run_t * const runs, const uint32_t runLength, const uint32_t runCapacity, bitquad_t * const quads, const uint32_t quadLength, const uint32_t quadCapacity) noexcept;
     154
     155    UnicodeSet(std::initializer_list<run_t> r, std::initializer_list<bitquad_t> q) noexcept;
    154156
    155157    inline static void Reset() {
     
    159161protected:
    160162
    161     UnicodeSet(std::vector<run_t> && r, std::vector<bitquad_t> && q, ProxyAllocator<> allocator = GlobalAllocator);
    162    
    163163    class quad_iterator : public boost::iterator_facade<quad_iterator, quad_iterator_return_t, boost::random_access_traversal_tag, quad_iterator_return_t> {
    164164        friend class UnicodeSet;
    165165        friend class boost::iterator_core_access;
    166166    public:
    167         explicit quad_iterator(RunIterator runIterator, RunIterator runEnd, QuadIterator quadIterator, QuadIterator quadEnd, const run_type_t type, const length_t remaining)
     167        explicit quad_iterator(const run_t * const runIterator, const run_t * const runEnd, const bitquad_t * const quadIterator, const bitquad_t * const quadEnd, const run_type_t type, const length_t remaining)
    168168        : mRunIterator(runIterator)
    169169        , mRunEnd(runEnd)
     
    209209
    210210    private:
    211         RunIterator         mRunIterator;   
    212         const RunIterator   mRunEnd;
    213         QuadIterator        mQuadIterator;       
     211        const run_t *           mRunIterator;
     212        const run_t * const     mRunEnd;
     213        const bitquad_t *       mQuadIterator;
    214214        #ifndef NDEBUG
    215         const QuadIterator mQuadEnd;
     215        const bitquad_t * const mQuadEnd;
    216216        #endif
    217         run_type_t          mType;
    218         length_t            mRemaining;
     217        run_type_t              mType;
     218        length_t                mRemaining;
    219219    };
    220220
    221221    inline quad_iterator quad_begin() const {
    222         assert (mRuns.cbegin() != mRuns.cend());
    223         return quad_iterator(mRuns.cbegin(), mRuns.cend(), mQuads.cbegin(), mQuads.cend(), mRuns.cbegin()->first, mRuns.cbegin()->second);
     222        return quad_iterator(mRuns, mRuns + mRunLength, mQuads, mQuads + mQuadLength, std::get<0>(*mRuns), std::get<1>(*mRuns));
    224223    }
    225224
    226225    inline quad_iterator quad_end() const {
    227         return quad_iterator(mRuns.cend(), mRuns.cend(), mQuads.cend(), mQuads.cend(), Empty, 0);
     226        return quad_iterator(mRuns + mRunLength, mRuns + mRunLength, mQuads + mQuadLength, mQuads + mQuadLength, Empty, 0);
    228227    }
    229228
    230229private:
    231230
    232     RunVector               mRuns;
    233     QuadVector              mQuads;
     231    run_t *                 mRuns;
     232    bitquad_t *             mQuads;
     233
     234    uint32_t                mRunLength;
     235    uint32_t                mQuadLength;
     236
     237    uint32_t                mRunCapacity;
     238    uint32_t                mQuadCapacity;
     239
    234240    static SlabAllocator<>  GlobalAllocator;
    235241};
    236242
    237 
    238 inline void UnicodeSet::swap(UnicodeSet & other) {
    239     mRuns.swap(other.mRuns); mQuads.swap(other.mQuads);
    240243}
    241244
    242 inline void UnicodeSet::swap(UnicodeSet && other) {
    243     mRuns.swap(other.mRuns); mQuads.swap(other.mQuads);
    244 }
    245 
    246 }
    247 
    248245#endif
    249246
Note: See TracChangeset for help on using the changeset viewer.