Changeset 5727 for icGREP


Ignore:
Timestamp:
Nov 7, 2017, 6:12:56 PM (18 months ago)
Author:
cameron
Message:

Small fixes, constructing/testing full UnicodeSets?.

Location:
icGREP/icgrep-devel/icgrep
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.cpp

    r5706 r5727  
    3232//
    3333// Select the correct built-in scan function, dependent on whatever
    34 // bitquad_t resolves to, when scan_forwrad_zeroes<bitquad_t> is called.
     34// bitquad_t resolves to, when scan_forward_zeroes<bitquad_t> is called.
    3535template <typename T> int scan_forward_zeroes(T x);
    3636template <> inline int scan_forward_zeroes<unsigned int>(unsigned int x){return __builtin_ctz(x);}
     
    126126bool UnicodeSet::empty() const {
    127127    return (mRuns.size() == 1) && typeOf(mRuns.front()) == Empty;
     128}
     129
     130/** ------------------------------------------------------------------------------------------------------------- *
     131 * @brief full
     132 ** ------------------------------------------------------------------------------------------------------------- */
     133bool UnicodeSet::full() const {
     134    return (mRuns.size() == 1) && typeOf(mRuns.front()) == Full;
    128135}
    129136
     
    424431void UnicodeSet::insert(const codepoint_t cp) {
    425432
    426     if (LLVM_UNLIKELY(cp >= UNICODE_MAX)) {
     433    if (LLVM_UNLIKELY(cp > UNICODE_MAX)) {
    427434        throw std::runtime_error(std::to_string(cp) + " exceeds maximum code point.");
    428435    }
     
    696703    assert (n == 1);
    697704
    698     if (LLVM_UNLIKELY(mMinCodePoint >= 0x110000)) {
     705    if (LLVM_UNLIKELY(mMinCodePoint > UNICODE_MAX)) {
    699706        throw std::runtime_error("UnicodeSet iterator exceeded maximum code point.");
    700707    }
     
    702709    bool found = false;
    703710    // Find the start of our interval
    704     while ( mBaseCodePoint < 0x110000 ) {
     711    while ( mBaseCodePoint <= UNICODE_MAX ) {
    705712        // Find the first non-empty block
    706713        if (typeOf(*mRunIterator) != Mixed) {           
     
    738745
    739746    if (!found) {
    740         assert (mBaseCodePoint == 0x110000);
    741         mMinCodePoint = 0x110000;
     747        assert (mBaseCodePoint == (UNICODE_MAX+1));
     748        mMinCodePoint = (UNICODE_MAX+1);
    742749        return;
    743750    }
     
    747754    found = false;
    748755    // Find the end of our interval
    749     while ( mBaseCodePoint < 0x110000 ) {
     756    while ( mBaseCodePoint <= UNICODE_MAX ) {
    750757
    751758        // Find the first non-Full block
     
    783790        }
    784791    }
    785     // if the very last block is a mixed block and we go past it, the last code point of the range is 0x10FFFF
     792    // if the very last block is a mixed block and we go past it, the last code point of the range is UNICODE_MAX
    786793    if (!found) {
    787         assert (mBaseCodePoint == 0x110000);
    788         mMaxCodePoint = 0x10FFFF;
     794        assert (mBaseCodePoint == (UNICODE_MAX+1));
     795        mMaxCodePoint = UNICODE_MAX;
    789796    }
    790797
    791798    assert (mMinCodePoint <= mMaxCodePoint);
    792799}
    793 
    794 /** ------------------------------------------------------------------------------------------------------------- *
    795  * @brief Empty Set Constructor
    796  ** ------------------------------------------------------------------------------------------------------------- */
    797 UnicodeSet::UnicodeSet()
     800   
     801/** ------------------------------------------------------------------------------------------------------------- *
     802 * @brief Empty/Full Set Constructor
     803 ** ------------------------------------------------------------------------------------------------------------- */
     804UnicodeSet::UnicodeSet(run_type_t emptyOrFull)
    798805: mRuns(mAllocator)
    799806, mQuads(mAllocator)
    800807{
    801     append_run(Empty, UNICODE_QUAD_COUNT, mRuns);
     808    assert((emptyOrFull == Empty) || (emptyOrFull == Full));
     809    append_run(emptyOrFull, UNICODE_QUAD_COUNT, mRuns);
    802810    assert (verify(mRuns, mQuads));
    803811}
    804 
     812           
    805813/** ------------------------------------------------------------------------------------------------------------- *
    806814 * @brief Singleton Set Constructor
     
    850858    assert (std::is_sorted(begin, end, [](const interval_t l, const interval_t r) {
    851859        assert (l.first <= l.second);
    852         assert (l.second < UNICODE_MAX);
     860        assert (l.second <= UNICODE_MAX);
    853861        assert (r.first <= r.second);
    854         assert (r.second < UNICODE_MAX);
     862        assert (r.second <= UNICODE_MAX);
    855863        return l.second < r.first;
    856864    }));
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.h

    r5632 r5727  
    9898
    9999    inline iterator end() const {
    100         return iterator(mRuns.cend(), mQuads.cend(), 0x110000);
    101     }
    102 
    103     bool empty() const;
    104 
     100        return iterator(mRuns.cend(), mQuads.cend(), UNICODE_MAX+1);
     101    }
     102
     103    bool empty() const; // The set has no members
     104   
     105    bool full() const;  // The set has the full set of possible Unicode codepoints.
     106   
    105107    bool contains(const codepoint_t codepoint) const;
    106108
     
    134136    bool operator<(const UnicodeSet & other) const;
    135137
    136     UnicodeSet();
     138    UnicodeSet(run_type_t emptyOrFull = Empty);
    137139    UnicodeSet(const codepoint_t codepoint);
    138140    UnicodeSet(const codepoint_t lo, const codepoint_t hi);
     
    141143    UnicodeSet(std::initializer_list<interval_t>::iterator begin, std::initializer_list<interval_t>::iterator end);
    142144    UnicodeSet(const std::vector<interval_t>::iterator begin, const std::vector<interval_t>::iterator end);
    143 
     145   
    144146    inline void swap(UnicodeSet & other);
    145147    inline void swap(UnicodeSet && other);
  • icGREP/icgrep-devel/icgrep/cc/multiplex_CCs.cpp

    r5630 r5727  
    1616// The breakpoints may be determined by iterating through the interval
    1717// representation of each CC.   For each interval (lo, hi), lo and hi+1
    18 // are breakpoints.
     18// are breakpoints. 
    1919//
    2020// For each breakpoint, a bitset is computed identifying the source CCs whose
     
    7979        // Start a new range.
    8080        range_lo = bkpt_entry.first;
     81        if (range_lo > UCD::UNICODE_MAX) continue;
    8182        current_set ^= bkpt_entry.second;
    8283        auto idx_iter = CC_set_to_exclusive_set_map.find(current_set);
  • icGREP/icgrep-devel/icgrep/re/re_analysis.cpp

    r5723 r5727  
    8989        return subtractCC(matchableCodepoints(diff->getLH()), matchableCodepoints(diff->getRH()));
    9090    } else if (const Intersect * e = dyn_cast<Intersect>(re)) {
    91         return intersectCC(matchableCodepoints(diff->getLH()), matchableCodepoints(diff->getRH()));
     91        return intersectCC(matchableCodepoints(e->getLH()), matchableCodepoints(e->getRH()));
    9292    } else if (isa<Any>(re)) {
    9393        return makeCC(0, 0x10FFFF);
Note: See TracChangeset for help on using the changeset viewer.