Ignore:
Timestamp:
Oct 10, 2017, 6:55:18 AM (22 months ago)
Author:
cameron
Message:

Completion of regexp support for numeric properties: allow matches to NaN

Location:
icGREP/icgrep-devel/icgrep/UCD
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp

    r5684 r5685  
    290290public:
    291291   
    292     SetByLineNumberAccumulator(const std::vector<UCD::codepoint_t> & cps)
    293     : mCodepointTableByLineNum(cps) {}
     292    SetByLineNumberAccumulator(const std::vector<UCD::codepoint_t> & cps, UnicodeSet defaultValueSet)
     293    : mCodepointTableByLineNum(cps), mDefaultValueSet(defaultValueSet) {}
    294294   
    295295    void accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) override;
     
    297297private:
    298298    const std::vector<UCD::codepoint_t> & mCodepointTableByLineNum;
     299    UnicodeSet mDefaultValueSet;
    299300    UnicodeSet mAccumSet;
    300301};
    301302void SetByLineNumberAccumulator::accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) {
    302     assert (line_start <= line_end);
    303     mAccumSet.insert(mCodepointTableByLineNum[lineNum]);
     303    if (lineNum >= mCodepointTableByLineNum.size()) mAccumSet = mAccumSet + mDefaultValueSet;
     304    else mAccumSet.insert(mCodepointTableByLineNum[lineNum]);
    304305}
    305306
     
    328329const UnicodeSet NumericPropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
    329330    UnicodeSet matched;
    330     // TODO:  Should we allow matches to NaN???
    331     SetByLineNumberAccumulator accum(mExplicitCps);
     331    SetByLineNumberAccumulator accum(mExplicitCps, mNaNCodepointSet);
    332332    grepBuffer(pattern, mStringBuffer, mBufSize, & accum);
    333333    return matched + accum.getAccumulatedSet();
     
    367367        matched = matched + mNullCodepointSet;
    368368    }
    369     SetByLineNumberAccumulator accum(mExplicitCps);
     369    SetByLineNumberAccumulator accum(mExplicitCps, mNullCodepointSet);
    370370    grepBuffer(pattern, mStringBuffer, mBufSize, & accum);
    371371    return matched + accum.getAccumulatedSet();
     
    399399const UnicodeSet StringOverridePropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
    400400    UnicodeSet base_set = mBaseObject.GetCodepointSetMatchingPattern(pattern) - mOverriddenSet;
    401     SetByLineNumberAccumulator accum(mExplicitCps);
     401    SetByLineNumberAccumulator accum(mExplicitCps, UnicodeSet());
    402402    grepBuffer(pattern, mStringBuffer, mBufSize, & accum);
    403403    return base_set + accum.getAccumulatedSet();
  • icGREP/icgrep-devel/icgrep/UCD/UnicodeData.h

    r5670 r5685  
    4729447294              0xfffeffff}};
    4729547295
    47296        const unsigned buffer_length = 3924;
     47296       const unsigned buffer_length = 3928;
    4729747297        const static char __attribute__ ((aligned (32))) string_buffer[4096] = u8R"__(0
    47298472981
     
    48795487950
    48796487969
     48797NaN
    4879748798)__";
    4879848799
Note: See TracChangeset for help on using the changeset viewer.