Ignore:
Timestamp:
Jul 1, 2015, 3:30:08 PM (4 years ago)
Author:
nmedfort
Message:

Temporary check-in.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.cpp

    r4626 r4627  
    2424#include <llvm/Support/Format.h>
    2525#include <include/simd-lib/builtins.hpp>
    26 #include <iostream>
    2726
    2827using namespace re;
     
    385384    assert (n == 1);   
    386385
    387     if (LLVM_UNLIKELY(mBaseCodePoint >= CC::UNICODE_MAX)) {
    388         mRunIterator = mRunEnd;
    389         mQuadIterator = mQuadEnd;
    390         mMixedRunIndex = 0;
    391         mQuadOffset = 0;
    392         return;
    393     }
    394 
     386    if (LLVM_UNLIKELY(mMinCodePoint >= 0x110000)) {
     387        throw std::runtime_error("UnicodeSet iterator exceeded maximum code point.");
     388    }
     389
     390    bool found = false;
    395391    // Find the start of our interval
    396     for ( ; mBaseCodePoint < CC::UNICODE_MAX; ++mRunIterator) {
     392    while ( mBaseCodePoint < 0x110000 ) {
    397393        // Find the first non-empty block
    398         if (typeOf(*mRunIterator) != Mixed) {
    399             mBaseCodePoint += lengthOf(*mRunIterator) * QUAD_BITS;
    400             mQuadOffset = 0;
    401             mMixedRunIndex = 0;
     394        if (typeOf(*mRunIterator) != Mixed) {           
    402395            // If we found a full run, this must be the start of our interval.
    403             // Otherwise it must be empty.
    404             if (typeOf(*mRunIterator) == Full) {
    405                 mMinCodePoint = mBaseCodePoint;
     396            const auto baseCodePoint = mBaseCodePoint;
     397            const auto type = typeOf(*mRunIterator);
     398            mBaseCodePoint += lengthOf(*mRunIterator++) * QUAD_BITS;
     399            if (type == Full) {
     400                mMinCodePoint = baseCodePoint;
     401                found = true;
    406402                break;
    407403            }
    408404        }
    409405        else { // if (typeOf(t) == Mixed)
    410             bool found = false;
    411406            while (mMixedRunIndex != lengthOf(*mRunIterator)) {
    412407                const bitquad_t m = (*mQuadIterator) & (FULL_QUAD_MASK << mQuadOffset);
     
    420415                }
    421416                mBaseCodePoint += QUAD_BITS;
     417                ++mQuadIterator;
    422418                ++mMixedRunIndex;
    423                 ++mQuadIterator;
    424419                mQuadOffset = 0;
    425420            }
    426             // If we found nothing in the quad, restart the loop.
    427             if (found) {
    428                 break;
    429             }
    430         }
    431     }
    432 
    433     // Find the end of our interval
    434     for ( ; mBaseCodePoint < CC::UNICODE_MAX; ++mRunIterator) {
    435         // If this run is Empty, the max code point is the last computed base code point - 1.
    436         if (typeOf(*mRunIterator) == Empty) {
    437             mMaxCodePoint = mBaseCodePoint - 1;
    438             break;
    439         }
    440         // If this run is Full, increment the base code point; we need to check whether
    441         // the next run is Empty or Mixed to know if we've found the max code point of
    442         // the current interval.
    443         else if (typeOf(*mRunIterator) == Full) {
    444             mBaseCodePoint += lengthOf(*mRunIterator) * QUAD_BITS;
     421            if (found) break;
     422            ++mRunIterator;
    445423            mQuadOffset = 0;
    446424            mMixedRunIndex = 0;
    447             continue;
     425        }
     426    }
     427
     428    if (!found) {
     429        assert (mBaseCodePoint == 0x110000);
     430        mMinCodePoint = 0x110000;
     431        return;
     432    }
     433
     434    // at this stage, the max code point is the previous max code point (initially 0)
     435    assert (mMaxCodePoint <= mMinCodePoint);
     436    found = false;
     437    // Find the end of our interval
     438    while ( mBaseCodePoint < 0x110000 ) {
     439
     440        // Find the first non-Full block
     441        if (typeOf(*mRunIterator) != Mixed) {
     442            // If this run is Empty, the max code point is the last computed base code point - 1.
     443            const auto baseCodePoint = mBaseCodePoint;
     444            const auto type = typeOf(*mRunIterator);
     445            mBaseCodePoint += lengthOf(*mRunIterator++) * QUAD_BITS;
     446            if (type == Empty) {
     447                mMaxCodePoint = baseCodePoint - 1;
     448                found = true;
     449                break;
     450            }
    448451        }
    449452        else { // if (typeOf(t) == Mixed)
    450             bool found = false;
    451453            while (mMixedRunIndex != lengthOf(*mRunIterator)) {
    452                 const bitquad_t m = (~(*mQuadIterator)) & (FULL_QUAD_MASK << mQuadOffset);
     454                const bitquad_t m = ((~(*mQuadIterator)) & FULL_QUAD_MASK) & (FULL_QUAD_MASK << mQuadOffset);
     455
    453456                // If we found a marker in m, it marks the end of our current interval.
    454457                // Find it and break out of the loop.
     
    460463                }
    461464                mBaseCodePoint += QUAD_BITS;
     465                ++mQuadIterator;
    462466                ++mMixedRunIndex;
    463                 ++mQuadIterator;
    464467                mQuadOffset = 0;
    465468            }
    466             // If we found nothing in the quad, restart the loop.
    467             if (found) {
    468                 break;
    469             }
    470         }
    471     }
    472 
     469            if (found) break;
     470            ++mRunIterator;
     471            mQuadOffset = 0;
     472            mMixedRunIndex = 0;
     473        }
     474    }
     475    // if the very last block is a mixed block and we go past it, the last code point of the range is 0x10FFFF
     476    if (!found) {
     477        assert (mBaseCodePoint == 0x110000);
     478        mMaxCodePoint = 0x10FFFF;
     479    }
     480
     481    assert (mMinCodePoint <= mMaxCodePoint);
    473482}
    474483
Note: See TracChangeset for help on using the changeset viewer.