Changeset 4627


Ignore:
Timestamp:
Jul 1, 2015, 3:30:08 PM (4 years ago)
Author:
nmedfort
Message:

Temporary check-in.

Location:
icGREP/icgrep-devel/icgrep
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp

    r4626 r4627  
    2020    }
    2121    return s.str();
     22}
     23
     24int PropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
     25    throw std::runtime_error("Property " + value_spec + " unsupported.");
    2226}
    2327
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.h

    r4626 r4627  
    2828        }
    2929                PropertyObject(property_t p, ClassTypeId k) : the_property(p), the_kind(k) {}
    30        
     30        virtual int GetPropertyValueEnumCode(const std::string & value_spec);
    3131                property_t the_property;
    3232                ClassTypeId the_kind;
     
    6262                                         const std::vector<UnicodeSet> sets) :
    6363                PropertyObject(p, ClassTypeId::EnumeratedProperty), property_value_enum_names(enum_names), property_value_full_names(names), property_value_aliases(aliases), aliases_initialized(false), property_value_sets(sets) {}
    64         int GetPropertyValueEnumCode(const std::string & value_spec);
     64        virtual int GetPropertyValueEnumCode(const std::string & value_spec);
    6565        UnicodeSet GetCodepointSet(const std::string & value_spec);
    6666        UnicodeSet GetCodepointSet(const int property_enum_val) const;
  • icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.cpp

    r4626 r4627  
    431431        {0x10000, 0x10FFFF}};
    432432
     433//    llvm::raw_os_ostream out(std::cerr);
     434
     435//    for (auto range : set) {
     436//        out << range.first << ',' << range.second << "\n";
     437//    }
     438
     439//    out.flush();
     440
    433441    return generateWithIfHierarchy(defaultIfHierachy, set, entry);
    434442}
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.cpp

    r4626 r4627  
    2424#include <llvm/Support/Format.h>
    2525#include <include/simd-lib/builtins.hpp>
    26 #include <iostream>
    2726
    2827using namespace re;
     
    385384    assert (n == 1);   
    386385
    387     if (LLVM_UNLIKELY(mBaseCodePoint >= CC::UNICODE_MAX)) {
    388         mRunIterator = mRunEnd;
    389         mQuadIterator = mQuadEnd;
    390         mMixedRunIndex = 0;
    391         mQuadOffset = 0;
    392         return;
    393     }
    394 
     386    if (LLVM_UNLIKELY(mMinCodePoint >= 0x110000)) {
     387        throw std::runtime_error("UnicodeSet iterator exceeded maximum code point.");
     388    }
     389
     390    bool found = false;
    395391    // Find the start of our interval
    396     for ( ; mBaseCodePoint < CC::UNICODE_MAX; ++mRunIterator) {
     392    while ( mBaseCodePoint < 0x110000 ) {
    397393        // Find the first non-empty block
    398         if (typeOf(*mRunIterator) != Mixed) {
    399             mBaseCodePoint += lengthOf(*mRunIterator) * QUAD_BITS;
    400             mQuadOffset = 0;
    401             mMixedRunIndex = 0;
     394        if (typeOf(*mRunIterator) != Mixed) {           
    402395            // If we found a full run, this must be the start of our interval.
    403             // Otherwise it must be empty.
    404             if (typeOf(*mRunIterator) == Full) {
    405                 mMinCodePoint = mBaseCodePoint;
     396            const auto baseCodePoint = mBaseCodePoint;
     397            const auto type = typeOf(*mRunIterator);
     398            mBaseCodePoint += lengthOf(*mRunIterator++) * QUAD_BITS;
     399            if (type == Full) {
     400                mMinCodePoint = baseCodePoint;
     401                found = true;
    406402                break;
    407403            }
    408404        }
    409405        else { // if (typeOf(t) == Mixed)
    410             bool found = false;
    411406            while (mMixedRunIndex != lengthOf(*mRunIterator)) {
    412407                const bitquad_t m = (*mQuadIterator) & (FULL_QUAD_MASK << mQuadOffset);
     
    420415                }
    421416                mBaseCodePoint += QUAD_BITS;
     417                ++mQuadIterator;
    422418                ++mMixedRunIndex;
    423                 ++mQuadIterator;
    424419                mQuadOffset = 0;
    425420            }
    426             // If we found nothing in the quad, restart the loop.
    427             if (found) {
    428                 break;
    429             }
    430         }
    431     }
    432 
    433     // Find the end of our interval
    434     for ( ; mBaseCodePoint < CC::UNICODE_MAX; ++mRunIterator) {
    435         // If this run is Empty, the max code point is the last computed base code point - 1.
    436         if (typeOf(*mRunIterator) == Empty) {
    437             mMaxCodePoint = mBaseCodePoint - 1;
    438             break;
    439         }
    440         // If this run is Full, increment the base code point; we need to check whether
    441         // the next run is Empty or Mixed to know if we've found the max code point of
    442         // the current interval.
    443         else if (typeOf(*mRunIterator) == Full) {
    444             mBaseCodePoint += lengthOf(*mRunIterator) * QUAD_BITS;
     421            if (found) break;
     422            ++mRunIterator;
    445423            mQuadOffset = 0;
    446424            mMixedRunIndex = 0;
    447             continue;
     425        }
     426    }
     427
     428    if (!found) {
     429        assert (mBaseCodePoint == 0x110000);
     430        mMinCodePoint = 0x110000;
     431        return;
     432    }
     433
     434    // at this stage, the max code point is the previous max code point (initially 0)
     435    assert (mMaxCodePoint <= mMinCodePoint);
     436    found = false;
     437    // Find the end of our interval
     438    while ( mBaseCodePoint < 0x110000 ) {
     439
     440        // Find the first non-Full block
     441        if (typeOf(*mRunIterator) != Mixed) {
     442            // If this run is Empty, the max code point is the last computed base code point - 1.
     443            const auto baseCodePoint = mBaseCodePoint;
     444            const auto type = typeOf(*mRunIterator);
     445            mBaseCodePoint += lengthOf(*mRunIterator++) * QUAD_BITS;
     446            if (type == Empty) {
     447                mMaxCodePoint = baseCodePoint - 1;
     448                found = true;
     449                break;
     450            }
    448451        }
    449452        else { // if (typeOf(t) == Mixed)
    450             bool found = false;
    451453            while (mMixedRunIndex != lengthOf(*mRunIterator)) {
    452                 const bitquad_t m = (~(*mQuadIterator)) & (FULL_QUAD_MASK << mQuadOffset);
     454                const bitquad_t m = ((~(*mQuadIterator)) & FULL_QUAD_MASK) & (FULL_QUAD_MASK << mQuadOffset);
     455
    453456                // If we found a marker in m, it marks the end of our current interval.
    454457                // Find it and break out of the loop.
     
    460463                }
    461464                mBaseCodePoint += QUAD_BITS;
     465                ++mQuadIterator;
    462466                ++mMixedRunIndex;
    463                 ++mQuadIterator;
    464467                mQuadOffset = 0;
    465468            }
    466             // If we found nothing in the quad, restart the loop.
    467             if (found) {
    468                 break;
    469             }
    470         }
    471     }
    472 
     469            if (found) break;
     470            ++mRunIterator;
     471            mQuadOffset = 0;
     472            mMixedRunIndex = 0;
     473        }
     474    }
     475    // if the very last block is a mixed block and we go past it, the last code point of the range is 0x10FFFF
     476    if (!found) {
     477        assert (mBaseCodePoint == 0x110000);
     478        mMaxCodePoint = 0x10FFFF;
     479    }
     480
     481    assert (mMinCodePoint <= mMaxCodePoint);
    473482}
    474483
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.h

    r4626 r4627  
    5555        friend class boost::iterator_core_access;
    5656    protected:
    57         iterator(RunVector::const_iterator runIterator, QuadVector::const_iterator quadIterator,
    58                  RunVector::const_iterator runEnd, QuadVector::const_iterator quadEnd)
     57
     58        iterator(const RunVector::const_iterator runIterator, const QuadVector::const_iterator quadIterator, const codepoint_t baseCodePoint)
    5959        : mRunIterator(runIterator), mQuadIterator(quadIterator)
    60         , mMixedRunIndex(0), mQuadOffset(0), mBaseCodePoint(0), mMinCodePoint(0), mMaxCodePoint(0)
    61         , mRunEnd(runEnd), mQuadEnd(quadEnd)
    62         {
     60        , mMixedRunIndex(0), mQuadOffset(0), mBaseCodePoint(baseCodePoint), mMinCodePoint(baseCodePoint), mMaxCodePoint(baseCodePoint) {
    6361
    6462        }
     
    7573
    7674        inline bool equal(const iterator & other) const {
    77             return (mRunIterator == other.mRunIterator) && (mQuadIterator == other.mQuadIterator) &&
    78                    (mMixedRunIndex == other.mMixedRunIndex) && (mQuadOffset == other.mQuadOffset);
     75            return (mMinCodePoint == other.mMinCodePoint);
    7976        }
    8077    private:
     
    8683        codepoint_t                         mMinCodePoint;
    8784        codepoint_t                         mMaxCodePoint;
    88         const RunVector::const_iterator     mRunEnd;
    89         const QuadVector::const_iterator    mQuadEnd;
    9085    };
    9186
    9287    inline iterator begin() const {
    93         // note: pre-increment is intentional to move the iterator onto the first non-Empty interval.
    94         return ++iterator(mRuns.cbegin(), mQuads.cbegin(), mRuns.cend(), mQuads.cend());
     88        // note: preincrement forces the iterator to advance onto and capture the first interval.
     89        return ++iterator(mRuns.cbegin(), mQuads.cbegin(), 0);
    9590    }
    9691
    9792    inline iterator end() const {
    98         return iterator(mRuns.cend(), mQuads.cend(), mRuns.cend(), mQuads.cend());
     93        return iterator(mRuns.cend(), mQuads.cend(), 0x110000);
    9994    }
    10095
  • icGREP/icgrep-devel/icgrep/resolve_properties.cpp

    r4626 r4627  
    4343}
    4444
     45inline int GetPropertyValueEnumCode(const UCD::property_t type, const std::string & value) {
     46    return property_object_table[type]->GetPropertyValueEnumCode(value);
     47}
     48
    4549void resolveProperties(RE * re) {
    4650    if (Alt * alt = dyn_cast<Alt>(re)) {
     
    8185                if (theprop == gc) {
    8286                    // General Category
    83                     int valcode = dyn_cast<EnumeratedPropertyObject> (property_object_table[gc])->GetPropertyValueEnumCode(value);
     87                    int valcode = GetPropertyValueEnumCode(gc, value);
    8488                    if (valcode < 0) {
    8589                        throw UnicodePropertyExpressionError("Erroneous property value for general_category property");
     
    8993                else if (theprop == sc) {
    9094                    // Script property identified
    91                     int valcode = dyn_cast<EnumeratedPropertyObject> (property_object_table[sc])->GetPropertyValueEnumCode(value);
     95                    int valcode = GetPropertyValueEnumCode(sc, value);
    9296                    if (valcode < 0) {
    9397                        throw UnicodePropertyExpressionError("Erroneous property value for script property");
     
    97101                else if (theprop == scx) {
    98102                    // Script extension property identified
    99                     int valcode = dyn_cast<EnumeratedPropertyObject> (property_object_table[sc])->GetPropertyValueEnumCode(value);
     103                    int valcode = GetPropertyValueEnumCode(sc, value);
    100104                    if (valcode >= 0) {
    101105                        throw UnicodePropertyExpressionError("Erroneous property value for script_extension property");
     
    105109                else if (theprop == blk) {
    106110                    // Block property identified
    107                     int valcode = dyn_cast<EnumeratedPropertyObject> (property_object_table[blk])->GetPropertyValueEnumCode(value);
     111                    int valcode = GetPropertyValueEnumCode(blk, value);
    108112                    if (valcode >= 0) {
    109113                         throw UnicodePropertyExpressionError("Erroneous property value for block property");
     
    132136            else {
    133137                // No namespace (property) name.   Try as a general category.
    134                 int valcode = dyn_cast<EnumeratedPropertyObject> (property_object_table[gc])->GetPropertyValueEnumCode(value);
     138                int valcode = GetPropertyValueEnumCode(gc, value);
    135139                if (valcode >= 0) {
    136140                    theprop = gc;
     
    138142                    return;
    139143                }
    140                 valcode = dyn_cast<EnumeratedPropertyObject> (property_object_table[sc])->GetPropertyValueEnumCode(value);
     144                valcode = GetPropertyValueEnumCode(sc, value);
    141145                if (valcode >= 0) {
    142146                    theprop = sc;
     
    283287        else {
    284288            // No namespace (property) name.   Try as a general category.
    285             int valcode = cast<EnumeratedPropertyObject>(property_object_table[gc])->GetPropertyValueEnumCode(value);
     289            int valcode = GetPropertyValueEnumCode(gc, value);
    286290            if (valcode >= 0) {
    287291                return cast<EnumeratedPropertyObject>(property_object_table[gc])->GetCodepointSet(valcode);
Note: See TracChangeset for help on using the changeset viewer.