Ignore:
Timestamp:
Oct 1, 2015, 2:54:17 PM (4 years ago)
Author:
nmedfort
Message:

Embedded UnicodeSet? into CC objects (will currently cause memory leak)

Location:
icGREP/icgrep-devel/icgrep/UCD
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp

    r4667 r4812  
    1010#include <sstream>
    1111#include <algorithm>
     12#include <assert.h>
     13#include <llvm/Support/Casting.h>
     14
     15using namespace llvm;
    1216
    1317namespace UCD {
  • icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.cpp

    r4808 r4812  
    1919    assert (!suffix->empty());
    2020    mSuffixVar = mCharacterClassCompiler.compileCC(suffix, entry);
    21     generateRange(ifRanges, 0, CC::UNICODE_MAX, entry);
     21    generateRange(ifRanges, 0, UNICODE_MAX, entry);
    2222}
    2323
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.cpp

    r4631 r4812  
    2525#include <include/simd-lib/builtins.hpp>
    2626
    27 using namespace re;
    28 
    2927namespace UCD {
    3028
     
    3634const size_t QUAD_BITS = (8 * sizeof(bitquad_t));
    3735const size_t MOD_QUAD_BIT_MASK = QUAD_BITS - 1;
    38 const size_t UNICODE_QUAD_COUNT = (CC::UNICODE_MAX + 1) / QUAD_BITS;
     36const size_t UNICODE_QUAD_COUNT = (UNICODE_MAX + 1) / QUAD_BITS;
    3937const bitquad_t FULL_QUAD_MASK = -1;
    4038
    4139inline run_type_t typeOf(const run_t & run) {
    42     return std::get<0>(run);
     40    return run.first;
    4341}
    4442
    4543inline UnicodeSet::length_t lengthOf(const run_t & run) {
    46     return std::get<1>(run);
     44    return run.second;
    4745}
    4846
     
    7977}
    8078
     79#ifndef NDEBUG
    8180/** ------------------------------------------------------------------------------------------------------------- *
    8281 * @brief runLengthSumsUpToUnicodeQuadCount
     
    9089    }
    9190    return sum == UNICODE_QUAD_COUNT;
     91}
     92#endif
     93
     94/** ------------------------------------------------------------------------------------------------------------- *
     95 * @brief empty
     96 ** ------------------------------------------------------------------------------------------------------------- */
     97bool UnicodeSet::empty() const {
     98    return (mRuns.size() == 1) && typeOf(mRuns.front()) == Empty;
     99}
     100
     101/** ------------------------------------------------------------------------------------------------------------- *
     102 * @brief size
     103 ** ------------------------------------------------------------------------------------------------------------- */
     104UnicodeSet::size_type UnicodeSet::size() const {
     105    return std::distance(begin(), end());
     106}
     107
     108/** ------------------------------------------------------------------------------------------------------------- *
     109 * @brief front
     110 ** ------------------------------------------------------------------------------------------------------------- */
     111UnicodeSet::interval_t UnicodeSet::front() const {
     112    return *begin();
     113}
     114
     115/** ------------------------------------------------------------------------------------------------------------- *
     116 * @brief back
     117 ** ------------------------------------------------------------------------------------------------------------- */
     118UnicodeSet::interval_t UnicodeSet::back() const {
     119    auto back = begin();
     120    for (auto i = back; i != end(); back = i++);
     121    return *back;
    92122}
    93123
     
    322352}
    323353
     354///** ------------------------------------------------------------------------------------------------------------- *
     355// * @brief insert_range
     356// ** ------------------------------------------------------------------------------------------------------------- */
     357//void UnicodeSet::insert_range(const codepoint_t lo, const codepoint_t hi)  {
     358
     359//    if (LLVM_UNLIKELY(lo > hi)) {
     360//        throw std::runtime_error('[' + std::to_string(lo) + ',' + std::to_string(hi) + "] is an illegal codepoint range!");
     361//    } else if (LLVM_UNLIKELY(hi >= 0x110000)) {
     362//        throw std::runtime_error(std::to_string(hi) + " exceeds maximum code point.");
     363//    }
     364
     365//    auto r = mRuns.begin();
     366//    auto q = mQuads.begin();
     367//    unsigned offset = 0;
     368
     369//    auto lo_quad_no = lo / QUAD_BITS;
     370//    auto lo_offset = lo & MOD_QUAD_BIT_MASK;
     371
     372//    auto hi_quad_no = hi / QUAD_BITS;
     373//    auto hi_offset = hi & MOD_QUAD_BIT_MASK;
     374
     375//    // Scan up to the lo codepoint
     376//    for (;;) {
     377//        assert (r != mRuns.end());
     378//        const auto l = lengthOf(*r);
     379//        if ((offset + l) > lo_quad_no) {
     380//            break;
     381//        }
     382//        if (typeOf(*r) == Mixed) {
     383//            q += lengthOf(*r);
     384//        }
     385//        offset += l;
     386//        ++r;
     387//    }
     388
     389//    // Test whether the range is already 'full' and skip ahead to the first empty or mixed quad.
     390//    // If the entire [lo,hi] range is already covered by a Full run, abort.
     391//    while (typeOf(*r) == Full) {
     392//        const auto l = lengthOf(*r);
     393//        lo_quad_no += l;
     394//        offset = lo_quad_no;
     395//        lo_offset = 0;
     396//        if (lo_quad_no > hi_quad_no) {
     397//            return;
     398//        }
     399//        ++r;
     400//    }
     401
     402//    // Otherwise, some portion of this range has to be inserted into the current sparse set.
     403//    // Begin by inserting the initial (potentially) partial lo quad.
     404//    const bitquad_t lo_quad = (FULL_QUAD_MASK << lo_offset);
     405//    const bitquad_t hi_quad = (FULL_QUAD_MASK >> (QUAD_BITS - 1 - hi_offset));
     406//    bitquad_t quad = (lo_quad_no == hi_quad_no) ? (lo_quad & hi_quad) : lo_quad;
     407//    run_type_t newType = (quad == FULL_QUAD_MASK) ? Full : ((quad == 0) ? Empty : Mixed);
     408//    run_type_t runType = typeOf(*r);
     409//    // If the original run is Mixed, we may be able to simply update the quad accordingly.
     410//    if (runType == Mixed) {
     411//        q += (lo_quad_no - offset);
     412//        quad |= *q;
     413//        if (LLVM_LIKELY(quad != FULL_QUAD_MASK)) {
     414//            *q = quad;
     415//            if (lo_quad_no == hi_quad_no) {
     416//                return;
     417//            }
     418//        } else { // we filled a Mixed quad
     419//            mQuads.erase(q);
     420//        }
     421//        newType = Full;
     422//    }
     423//    auto length = lengthOf(*r);
     424//    auto splitAt = length - (lo_quad_no - offset) - 1;
     425//    if (splitAt) {
     426//        // reduce the original run length
     427//        lengthOf(*r) = splitAt;
     428//        // and add in a new quad
     429//        r = mRuns.emplace(r, newType, 1);
     430//    } else { // we're inserting this quad at the beginning of the run
     431//        typeOf(*r) = newType;
     432//        lengthOf(*r) = 1;
     433//    }
     434//    if (newType == Mixed) {
     435//        q = mQuads.emplace(q, quad);
     436//    }
     437//    length -= splitAt + 1;
     438//    auto remaining = (hi_quad_no - lo_quad_no);
     439//    // We're inserting a Full run so if the original run type was Full and exceeds the
     440//    // length of what we're inserting, we can abort without considering the hi_quad
     441//    if (runType == Full && length > remaining) {
     442//        return;
     443//    }
     444//    if (remaining) {
     445//        r = mRuns.emplace(r, Full, remaining);
     446
     447
     448
     449//    }
     450
     451//}
     452
     453
     454
    324455/** ------------------------------------------------------------------------------------------------------------- *
    325456 * @brief contains
     
    356487bool UnicodeSet::intersects(const codepoint_t lo, const codepoint_t hi) const {
    357488    for (auto range : *this) {
    358         if (hi_codepoint(range) < lo) {
     489        if (range.second < lo) {
    359490            continue;
    360491        }
    361         if (lo_codepoint(range) > hi) {
     492        if (range.first > hi) {
    362493            break;
    363494        }
     
    394525void UnicodeSet::iterator::advance(const unsigned n) {
    395526
    396     assert (n == 1);   
     527    assert (n == 1);
    397528
    398529    if (LLVM_UNLIKELY(mMinCodePoint >= 0x110000)) {
     
    414545                break;
    415546            }
    416         }
    417         else { // if (typeOf(t) == Mixed)
     547        } else { // if (typeOf(t) == Mixed)
    418548            while (mMixedRunIndex != lengthOf(*mRunIterator)) {
    419549                const bitquad_t m = (*mQuadIterator) & (FULL_QUAD_MASK << mQuadOffset);
     
    461591                break;
    462592            }
    463         }
    464         else { // if (typeOf(t) == Mixed)
     593        } else { // if (typeOf(t) == Mixed)
    465594            while (mMixedRunIndex != lengthOf(*mRunIterator)) {
    466595                const bitquad_t m = ((~(*mQuadIterator)) & FULL_QUAD_MASK) & (FULL_QUAD_MASK << mQuadOffset);
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.h

    r4631 r4812  
    33#include <stdint.h>
    44#include <vector>
    5 #include <re/re_cc.h>
    65#include <boost/iterator/iterator_facade.hpp>
    76
     
    4645    using quad_iterator_return_t = std::pair<run_t, bitquad_t>;
    4746
    48     using codepoint_t = re::codepoint_t;
    49     using interval_t = re::interval_t;
     47    using codepoint_t = unsigned;
     48    using interval_t = std::pair<codepoint_t, codepoint_t>;
     49
    5050    using RunVector = std::vector<run_t>;
    5151    using QuadVector = std::vector<bitquad_t>;
     52
     53    using size_type = RunVector::size_type;
    5254
    5355    class iterator : public boost::iterator_facade<iterator, interval_t, boost::forward_traversal_tag, interval_t> {
     
    112114
    113115        inline run_type_t type() const {
    114             return std::get<0>(*mRunIterator);
     116            return mRunIterator->first;
    115117        }
    116118
    117119        inline length_t length() const {
    118             return std::get<1>(*mRunIterator) - mOffset;
     120            return mRunIterator->second - mOffset;
    119121        }
    120122
     
    145147    bool intersects(const codepoint_t lo, const codepoint_t hi) const;
    146148
     149    inline void insert(const codepoint_t cp) {
     150        *this = std::move(*this + UnicodeSet(cp));
     151    }
     152
     153    inline void insert_range(const codepoint_t lo, const codepoint_t hi) {
     154        *this = std::move(*this + UnicodeSet(lo, hi));
     155    }
     156
     157    bool empty() const;
     158
     159    size_type size() const;
     160
     161    interval_t front() const;
     162
     163    interval_t back() const;
     164
    147165    void dump(llvm::raw_ostream & out) const;
    148166
     
    152170    UnicodeSet operator-(const UnicodeSet & other) const;
    153171    UnicodeSet operator^(const UnicodeSet & other) const;
     172
    154173    inline UnicodeSet & operator=(const UnicodeSet & other) = default;
    155174    inline UnicodeSet & operator=(UnicodeSet && other) = default;
     
    172191};
    173192
     193enum : UnicodeSet::codepoint_t { UNICODE_MAX = 0x10FFFF };
     194
    174195inline void UnicodeSet::swap(UnicodeSet & other) {
    175196    mRuns.swap(other.mRuns); mQuads.swap(other.mQuads);
Note: See TracChangeset for help on using the changeset viewer.