Ignore:
Timestamp:
Nov 22, 2017, 3:32:58 PM (22 months ago)
Author:
nmedfort
Message:

Improvements to memory usage of CCs

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.cpp

    r5740 r5742  
    4444
    4545
    46 SlabAllocator<> UnicodeSet::mAllocator;
     46SlabAllocator<> UnicodeSet::GlobalAllocator;
    4747
    4848const uint64_t QUAD_BITS = (8 * sizeof(bitquad_t));
     
    101101    unsigned mixedQuads = 0;
    102102    for (auto run : runs) {
     103        const auto type = typeOf(run);
     104        if (LLVM_UNLIKELY(type != Empty && type != Mixed && type != Full)) {
     105            throw std::runtime_error("illegal run type " + std::to_string(type) + " found");
     106        }
    103107        const auto l = lengthOf(run);
    104         if (l == 0) {
    105             throw std::runtime_error("Zero-length quad found!");
    106         }
    107         if (typeOf(run) == Mixed) {
     108        if (LLVM_UNLIKELY(l == 0)) {
     109            throw std::runtime_error("zero-length quad found");
     110        }
     111        if (type == Mixed) {
    108112            mixedQuads += l;
    109113        }
    110114        sum += l;
    111115    }
    112     if (sum != UNICODE_QUAD_COUNT) {
    113         throw std::runtime_error("Invalid quad count: found " + std::to_string(sum) + " but expected " + std::to_string(UNICODE_QUAD_COUNT));
    114     }
    115     if (mixedQuads != quads.size()) {
    116         throw std::runtime_error("Invalid mixed quad count: found " + std::to_string(quads.size()) + " but expected " + std::to_string(mixedQuads));
    117     }
    118     for (auto quad : quads) {
    119         if (quad == 0) {
     116    if (LLVM_UNLIKELY(sum != UNICODE_QUAD_COUNT)) {
     117        throw std::runtime_error("found " + std::to_string(sum) + " quads but expected " + std::to_string(UNICODE_QUAD_COUNT));
     118    }
     119    if (LLVM_UNLIKELY(mixedQuads != quads.size())) {
     120        throw std::runtime_error("found " + std::to_string(quads.size()) + " mixed quad but expected " + std::to_string(mixedQuads));
     121    }
     122    for (const auto quad : quads) {
     123        if (LLVM_UNLIKELY(quad == 0)) {
    120124            throw std::runtime_error("Empty quad found in Mixed quad array!");
    121         } else if (quad == FULL_QUAD_MASK) {
     125        } else if (LLVM_UNLIKELY(quad == FULL_QUAD_MASK)) {
    122126            throw std::runtime_error("Full quad found in Mixed quad array!");
    123127        }
     
    286290        *qi++ = ~quad;
    287291    }
    288     return UnicodeSet(std::move(runs), std::move(quads));
     292    return UnicodeSet(std::move(runs), std::move(quads), mRuns.get_allocator());
    289293}
    290294
     
    293297 ** ------------------------------------------------------------------------------------------------------------- */
    294298UnicodeSet UnicodeSet::operator&(const UnicodeSet & other) const {
     299
     300    std::vector<run_t> runs;
     301    std::vector<bitquad_t> quads;   
     302   
     303    auto i1 = quad_begin(), i2 = other.quad_begin();
     304   
     305    for (;;) {
     306        assert ("neither run can be zero length unless both are of zero length" && ((i1.length() != 0) ^ (i2.length() == 0)));
     307        const auto n = std::min(i1.length(), i2.length());
     308        if (LLVM_UNLIKELY(n == 0)) {
     309            break;
     310        }
     311        if ((i1.type() == Full) && (i2.type() == Full)) {
     312            append_run(Full, n, runs);
     313            i1 += n;
     314            i2 += n;
     315        } else if ((i1.type() == Empty) || (i2.type() == Empty)) {
     316            append_run(Empty, n, runs);
     317            i1 += n;
     318            i2 += n;
     319        } else if (i1.type() == Full) {
     320            for (unsigned i = 0; i != n; ++i, ++i2) {
     321                append_quad(i2.quad(), quads, runs);
     322            }
     323            i1 += n;
     324        } else if (i2.type() == Full) {
     325            for (unsigned i = 0; i != n; ++i, ++i1) {
     326                append_quad(i1.quad(), quads, runs);
     327            }
     328            i2 += n;
     329        } else { // both Mixed
     330            assert (i1.type() == Mixed && i2.type() == Mixed);
     331            for (unsigned i = 0; i != n; ++i, ++i1, ++i2) {
     332                append_quad(i1.quad() & i2.quad(), quads, runs);
     333            }
     334        }
     335    }
     336    assert (i1 == quad_end() && i2 == other.quad_end());
     337    return UnicodeSet(std::move(runs), std::move(quads), mRuns.get_allocator());
     338}
     339
     340/** ------------------------------------------------------------------------------------------------------------- *
     341 * @brief union
     342 ** ------------------------------------------------------------------------------------------------------------- */
     343UnicodeSet UnicodeSet::operator+(const UnicodeSet & other) const {
    295344    std::vector<run_t> runs;
    296345    std::vector<bitquad_t> quads;
    297     const auto e1 = quad_end();
    298     const auto e2 = other.quad_end();
    299346    auto i1 = quad_begin(), i2 = other.quad_begin();
    300347    for (;;) {
     
    304351            break;
    305352        }
    306         if ((i1.type() == Full) && (i2.type() == Full)) {
     353        if ((i1.type() == Empty) && (i2.type() == Empty)) {
     354            append_run(Empty, n, runs);
     355            i1 += n;
     356            i2 += n;
     357        } else if ((i1.type() == Full) || (i2.type() == Full)) {
    307358            append_run(Full, n, runs);
    308359            i1 += n;
    309360            i2 += n;
    310         } else if ((i1.type() == Empty) || (i2.type() == Empty)) {
    311             append_run(Empty, n, runs);
    312             i1 += n;
    313             i2 += n;
    314         } else if (i1.type() == Full) {
     361        } else if (i1.type() == Empty) {
     362            assert (i2.type() == Mixed);
    315363            for (unsigned i = 0; i != n; ++i, ++i2) {
    316364                append_quad(i2.quad(), quads, runs);
    317365            }
    318366            i1 += n;
    319         } else if (i2.type() == Full) {
     367        } else if (i2.type() == Empty) {
     368            assert (i1.type() == Mixed);
    320369            for (unsigned i = 0; i != n; ++i, ++i1) {
    321370                append_quad(i1.quad(), quads, runs);
     
    323372            i2 += n;
    324373        } else { // both Mixed
    325             assert (i1.type() == Mixed && i2.type() == Mixed);
    326             for (unsigned i = 0; i != n; ++i, ++i1, ++i2) {
    327                 append_quad(i1.quad() & i2.quad(), quads, runs);
    328             }
    329         }
    330     }
    331     assert (i1 == e1 && i2 == e2);
    332     return UnicodeSet(std::move(runs), std::move(quads));
    333 }
    334 
    335 /** ------------------------------------------------------------------------------------------------------------- *
    336  * @brief union
    337  ** ------------------------------------------------------------------------------------------------------------- */
    338 UnicodeSet UnicodeSet::operator+(const UnicodeSet & other) const {
     374            assert (i1.type() == Mixed && i2.type() == Mixed);
     375            for (unsigned i = 0; i < n; ++i, ++i1, ++i2) {
     376                append_quad(i1.quad() | i2.quad(), quads, runs);
     377            }
     378        }
     379    }
     380    assert (i1 == quad_end() && i2 == other.quad_end());
     381    return UnicodeSet(std::move(runs), std::move(quads), mRuns.get_allocator());
     382}
     383
     384/** ------------------------------------------------------------------------------------------------------------- *
     385 * @brief difference
     386 ** ------------------------------------------------------------------------------------------------------------- */
     387UnicodeSet UnicodeSet::operator-(const UnicodeSet & other) const {
    339388    std::vector<run_t> runs;
    340389    std::vector<bitquad_t> quads;
    341     const auto e1 = quad_end();
    342     const auto e2 = other.quad_end();
    343390    auto i1 = quad_begin(), i2 = other.quad_begin();
    344391    for (;;) {
    345392        assert ("neither run can be zero length unless both are of zero length" && ((i1.length() != 0) ^ (i2.length() == 0)));
    346393        const auto n = std::min(i1.length(), i2.length());
    347         if (LLVM_UNLIKELY(n == 0)) {
    348             break;
    349         }
    350         if ((i1.type() == Empty) && (i2.type() == Empty)) {
    351             append_run(Empty, n, runs);
    352             i1 += n;
    353             i2 += n;
    354         } else if ((i1.type() == Full) || (i2.type() == Full)) {
    355             append_run(Full, n, runs);
    356             i1 += n;
    357             i2 += n;
    358         } else if (i1.type() == Empty) {
    359             assert (i2.type() == Mixed);
    360             for (unsigned i = 0; i != n; ++i, ++i2) {
    361                 append_quad(i2.quad(), quads, runs);
    362             }
    363             i1 += n;
    364         } else if (i2.type() == Empty) {
    365             assert (i1.type() == Mixed);
    366             for (unsigned i = 0; i != n; ++i, ++i1) {
    367                 append_quad(i1.quad(), quads, runs);
    368             }
    369             i2 += n;
    370         } else { // both Mixed
    371             assert (i1.type() == Mixed && i2.type() == Mixed);
    372             for (unsigned i = 0; i < n; ++i, ++i1, ++i2) {
    373                 append_quad(i1.quad() | i2.quad(), quads, runs);
    374             }
    375         }
    376     }
    377    
    378     assert (i1 == e1 && i2 == e2);
    379     return UnicodeSet(std::move(runs), std::move(quads));
    380 }
    381 
    382 /** ------------------------------------------------------------------------------------------------------------- *
    383  * @brief difference
    384  ** ------------------------------------------------------------------------------------------------------------- */
    385 UnicodeSet UnicodeSet::operator-(const UnicodeSet & other) const {
    386     std::vector<run_t> runs;
    387     std::vector<bitquad_t> quads;
    388     const auto e1 = quad_end();
    389     const auto e2 = other.quad_end();
    390     auto i1 = quad_begin(), i2 = other.quad_begin();
    391     for (;;) {
    392         assert ("neither run can be zero length unless both are of zero length" && ((i1.length() != 0) ^ (i2.length() == 0)));
    393         const auto n = std::min(i1.length(), i2.length());
    394         assert (n != 0 || (i1 == e1 && i2 == e2));
    395394        if (LLVM_UNLIKELY(n == 0)) {
    396395            break;
     
    416415            i2 += n;
    417416        } else { // both Mixed
    418             assert (i1.type() == Mixed && i2.type() == Mixed);
     417            assert (i1.type() == Mixed && i2.type() == Mixed);
    419418            for (unsigned i = 0; i != n; ++i, ++i1, ++i2) {               
    420419                append_quad(i1.quad() &~ i2.quad(), quads, runs);
     
    422421        }
    423422    }
    424     assert (i1 == e1 && i2 == e2);
    425     return UnicodeSet(std::move(runs), std::move(quads));
     423    assert (i1 == quad_end() && i2 == other.quad_end());
     424    return UnicodeSet(std::move(runs), std::move(quads), mRuns.get_allocator());
    426425}
    427426
     
    432431    std::vector<run_t> runs;
    433432    std::vector<bitquad_t> quads;
    434     const auto e1 = quad_end();
    435     const auto e2 = other.quad_end();
    436433    auto i1 = quad_begin(), i2 = other.quad_begin();
    437434    for (;;) {
     
    466463            i2 += n;
    467464        } else { // both Mixed
    468             assert (i1.type() == Mixed && i2.type() == Mixed);
     465            assert (i1.type() == Mixed && i2.type() == Mixed);
    469466            for (unsigned i = 0; i != n; ++i, ++i1, ++i2) {
    470467                append_quad(i1.quad() ^ i2.quad(), quads, runs);
     
    472469        }
    473470    }
    474     assert (i1 == e1 && i2 == e2);
    475     return UnicodeSet(std::move(runs), std::move(quads));
     471    assert (i1 == quad_end() && i2 == other.quad_end());
     472    return UnicodeSet(std::move(runs), std::move(quads), mRuns.get_allocator());
    476473}
    477474
     
    739736    return false;
    740737}
    741    
    742    
     738       
    743739/** ------------------------------------------------------------------------------------------------------------- *
    744740 * @brief intersects
     
    751747        auto n = std::min(i1.length(), i2.length());
    752748        if (LLVM_UNLIKELY(n == 0)) {
     749            assert (i1 == quad_end() && i2 == other.quad_end());
    753750            return false;
    754751        }
     
    759756            return true;
    760757        } else { //both Mixed
     758            assert (i1.type() == Mixed && i2.type() == Mixed);
    761759            for (; n; --n, ++i1, ++i2) {
    762760                if ((i1.quad() & i2.quad()) != 0) return true;
     
    766764}
    767765
     766/** ------------------------------------------------------------------------------------------------------------- *
     767 * @brief isSubsetOf
     768 * @param other
     769 *
     770 * Return true if this UnicodeSet is a subset of other
     771 ** ------------------------------------------------------------------------------------------------------------- */
     772bool UnicodeSet::subset(const UnicodeSet & other) const {
     773    for (auto i1 = quad_begin(), i2 = other.quad_begin();; ) {
     774        auto n = std::min(i1.length(), i2.length());
     775        if (LLVM_UNLIKELY(n == 0)) {
     776            assert (i1 == quad_end() && i2 == other.quad_end());
     777            return true;
     778        }
     779        if (i1.type() == Empty || i2.type() == Full) {
     780            i1 += n;
     781            i2 += n;
     782        } else if (i1.type() == Full || i2.type() == Empty) {
     783            return false;
     784        } else { //both Mixed
     785            assert (i1.type() == Mixed && i2.type() == Mixed);
     786            for (; n; --n, ++i1, ++i2) {
     787                if (i1.quad() &~ i2.quad()) return false;
     788            }
     789        }
     790    }
     791}
    768792
    769793/** ------------------------------------------------------------------------------------------------------------- *
     
    771795 ** ------------------------------------------------------------------------------------------------------------- */
    772796void UnicodeSet::quad_iterator::advance(unsigned n) {
    773     while (n > 0) {
    774         assert (mRemaining > 0);
     797    assert (mRemaining > 0);
     798    while (n > 0) {   
    775799        if (mRemaining > n) {
    776800            if (mType == Mixed) {
     
    785809            mQuadIterator += mRemaining;
    786810        }
    787         n -= mRemaining;       
     811        n -= mRemaining;
    788812        ++mRunIterator;
    789813        if (LLVM_UNLIKELY(mRunIterator == mRunEnd)) {
     
    905929 * @brief Empty/Full Set Constructor
    906930 ** ------------------------------------------------------------------------------------------------------------- */
    907 UnicodeSet::UnicodeSet(run_type_t emptyOrFull)
    908 : mRuns(mAllocator)
    909 , mQuads(mAllocator)
    910 {
     931UnicodeSet::UnicodeSet(run_type_t emptyOrFull, ProxyAllocator<> allocator)
     932: mRuns(allocator)
     933, mQuads(allocator) {
    911934    assert((emptyOrFull == Empty) || (emptyOrFull == Full));
    912935    append_run(emptyOrFull, UNICODE_QUAD_COUNT, mRuns);
     
    917940 * @brief Singleton Set Constructor
    918941 ** ------------------------------------------------------------------------------------------------------------- */
    919 UnicodeSet::UnicodeSet(const codepoint_t codepoint)
    920 : mRuns(mAllocator)
    921 , mQuads(mAllocator)
    922 {
     942UnicodeSet::UnicodeSet(const codepoint_t codepoint, ProxyAllocator<> allocator)
     943: mRuns(allocator)
     944, mQuads(allocator) {
    923945    const codepoint_t quad_no = codepoint / QUAD_BITS;
    924946    append_run(Empty, quad_no, mRuns);
     
    931953 * @brief Range Set Constructor
    932954 ** ------------------------------------------------------------------------------------------------------------- */
    933 UnicodeSet::UnicodeSet(const codepoint_t lo, const codepoint_t hi)
    934 : mRuns(mAllocator)
    935 , mQuads(mAllocator)
     955UnicodeSet::UnicodeSet(const codepoint_t lo, const codepoint_t hi, ProxyAllocator<> allocator)
     956: mRuns(allocator)
     957, mQuads(allocator)
    936958{
    937959    const codepoint_t lo_index = lo / QUAD_BITS;
     
    959981template <typename itr>
    960982void convertIntervalRangesToSparseSet(const itr begin, const itr end, UnicodeSet::RunVector & mRuns, UnicodeSet::QuadVector & mQuads) {
    961     assert (std::is_sorted(begin, end, [](const interval_t l, const interval_t r) {
    962         assert (l.first <= l.second);
    963         assert (l.second <= UNICODE_MAX);
    964         assert (r.first <= r.second);
    965         assert (r.second <= UNICODE_MAX);
     983
     984    std::vector<run_t> runs;
     985    std::vector<bitquad_t> quads;
     986
     987    assert ("interval list must be totally ordered" && std::is_sorted(begin, end, [](const interval_t l, const interval_t r) {
     988        if (l.first > l.second) return false;
     989        if (l.second > UNICODE_MAX) return false;
     990        if (r.first > r.second) return false;
     991        if (r.second > UNICODE_MAX) return false;
    966992        return l.second < r.first;
    967993    }));
    968 
    969     std::vector<run_t> runs;
    970     std::vector<bitquad_t> quads;
    971 
     994   
    972995    codepoint_t prior_index = 0;
    973996    bitquad_t mask = 0;
     
    10041027        append_run(Empty, UNICODE_QUAD_COUNT - prior_index, runs);
    10051028    }   
    1006     assert (verify(runs, quads));
    10071029    mRuns.assign(runs.begin(), runs.end());
    10081030    mQuads.assign(quads.begin(), quads.end());
     
    10121034 * @brief Interval Range Constructor
    10131035 ** ------------------------------------------------------------------------------------------------------------- */
    1014 UnicodeSet::UnicodeSet(std::initializer_list<interval_t>::iterator begin, std::initializer_list<interval_t>::iterator end)
    1015 : mRuns(0, {Empty, 0}, mAllocator)
    1016 , mQuads(0, 0, mAllocator) {
     1036UnicodeSet::UnicodeSet(std::initializer_list<interval_t>::iterator begin, std::initializer_list<interval_t>::iterator end, ProxyAllocator<> allocator)
     1037: mRuns(allocator)
     1038, mQuads(allocator) {
    10171039    convertIntervalRangesToSparseSet(begin, end, mRuns, mQuads);
     1040    assert (verify(mRuns, mQuads));
    10181041}
    10191042
     
    10211044 * @brief Interval Range Constructor
    10221045 ** ------------------------------------------------------------------------------------------------------------- */
    1023 UnicodeSet::UnicodeSet(const std::vector<interval_t>::iterator begin, const std::vector<interval_t>::iterator end)
    1024 : mRuns(0, {Empty, 0}, mAllocator)
    1025 , mQuads(0, 0, mAllocator) {
     1046UnicodeSet::UnicodeSet(const std::vector<interval_t>::iterator begin, const std::vector<interval_t>::iterator end, ProxyAllocator<> allocator)
     1047: mRuns(allocator)
     1048, mQuads(allocator) {
    10261049    convertIntervalRangesToSparseSet(begin, end, mRuns, mQuads);
     1050    assert (verify(mRuns, mQuads));
    10271051}
    10281052
     
    10301054 * @brief Copy Constructor
    10311055 ** ------------------------------------------------------------------------------------------------------------- */
    1032 UnicodeSet::UnicodeSet(const UnicodeSet & other)
    1033 : mRuns(other.mRuns, mAllocator)
    1034 , mQuads(other.mQuads, mAllocator) {
     1056UnicodeSet::UnicodeSet(const UnicodeSet & other, ProxyAllocator<> allocator)
     1057: mRuns(other.mRuns, allocator)
     1058, mQuads(other.mQuads, allocator) {
    10351059    assert (verify(mRuns, mQuads));
    10361060}
     
    10391063 * @brief Initializer Constructor
    10401064 ** ------------------------------------------------------------------------------------------------------------- */
    1041 UnicodeSet::UnicodeSet(std::initializer_list<run_t> r, std::initializer_list<bitquad_t> q)
    1042 : mRuns(r.begin(), r.end(), mAllocator)
    1043 , mQuads(q.begin(), q.end(), mAllocator) {
     1065UnicodeSet::UnicodeSet(std::initializer_list<run_t> r, std::initializer_list<bitquad_t> q, ProxyAllocator<> allocator)
     1066: mRuns(r.begin(), r.end(), allocator)
     1067, mQuads(q.begin(), q.end(), allocator) {
    10441068    assert (verify(mRuns, mQuads));
    10451069}
     
    10481072 * @brief Internal Vector Constructor
    10491073 ** ------------------------------------------------------------------------------------------------------------- */
    1050 inline UnicodeSet::UnicodeSet(std::vector<run_t> && r, std::vector<bitquad_t> && q)
    1051 : mRuns(r.begin(), r.end(), mAllocator)
    1052 , mQuads(q.begin(), q.end(), mAllocator) {
     1074inline UnicodeSet::UnicodeSet(std::vector<run_t> && r, std::vector<bitquad_t> && q, ProxyAllocator<> allocator)
     1075: mRuns(r.begin(), r.end(), allocator)
     1076, mQuads(q.begin(), q.end(), allocator) {
    10531077    assert (verify(mRuns, mQuads));
    10541078}
Note: See TracChangeset for help on using the changeset viewer.