Changeset 5685


Ignore:
Timestamp:
Oct 10, 2017, 6:55:18 AM (19 months ago)
Author:
cameron
Message:

Completion of regexp support for numeric properties: allow matches to NaN

Location:
icGREP/icgrep-devel
Files:
2 deleted
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/UCD-scripts/README-generate-UCD.txt

    r5642 r5685  
    331. Edit UCD_config.py - set UCD_src_dir, UCD_output_dir
    44
    5 2. UnicodeNameData
    6    python UnicodeNameData.py
    7    
    8 3. python UCD_properties.py
     52. python3 UCD_properties.py
    96
    10 4. python casefold.py
    11 
    12 5. python generate_UCD_tests.py
     7#  Needs to be updated,
     83. python3 generate_UCD_tests.py
    139    copy output to icgrep-devel/QA/proptest.xml
    1410
  • icGREP/icgrep-devel/UCD-scripts/UCD_properties.py

    r5673 r5685  
    147147    for cp in cps:
    148148        string_buffer += cp_value_map[cp] + "\n"
     149    string_buffer += "NaN\n"  # This inserts the standard default value for strings as the last entry
    149150    buffer_length = len(string_buffer.encode("utf-8"))
    150151    f.write(s.substitute(prop_enum = property_code,
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp

    r5684 r5685  
    290290public:
    291291   
    292     SetByLineNumberAccumulator(const std::vector<UCD::codepoint_t> & cps)
    293     : mCodepointTableByLineNum(cps) {}
     292    SetByLineNumberAccumulator(const std::vector<UCD::codepoint_t> & cps, UnicodeSet defaultValueSet)
     293    : mCodepointTableByLineNum(cps), mDefaultValueSet(defaultValueSet) {}
    294294   
    295295    void accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) override;
     
    297297private:
    298298    const std::vector<UCD::codepoint_t> & mCodepointTableByLineNum;
     299    UnicodeSet mDefaultValueSet;
    299300    UnicodeSet mAccumSet;
    300301};
    301302void SetByLineNumberAccumulator::accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) {
    302     assert (line_start <= line_end);
    303     mAccumSet.insert(mCodepointTableByLineNum[lineNum]);
     303    if (lineNum >= mCodepointTableByLineNum.size()) mAccumSet = mAccumSet + mDefaultValueSet;
     304    else mAccumSet.insert(mCodepointTableByLineNum[lineNum]);
    304305}
    305306
     
    328329const UnicodeSet NumericPropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
    329330    UnicodeSet matched;
    330     // TODO:  Should we allow matches to NaN???
    331     SetByLineNumberAccumulator accum(mExplicitCps);
     331    SetByLineNumberAccumulator accum(mExplicitCps, mNaNCodepointSet);
    332332    grepBuffer(pattern, mStringBuffer, mBufSize, & accum);
    333333    return matched + accum.getAccumulatedSet();
     
    367367        matched = matched + mNullCodepointSet;
    368368    }
    369     SetByLineNumberAccumulator accum(mExplicitCps);
     369    SetByLineNumberAccumulator accum(mExplicitCps, mNullCodepointSet);
    370370    grepBuffer(pattern, mStringBuffer, mBufSize, & accum);
    371371    return matched + accum.getAccumulatedSet();
     
    399399const UnicodeSet StringOverridePropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
    400400    UnicodeSet base_set = mBaseObject.GetCodepointSetMatchingPattern(pattern) - mOverriddenSet;
    401     SetByLineNumberAccumulator accum(mExplicitCps);
     401    SetByLineNumberAccumulator accum(mExplicitCps, UnicodeSet());
    402402    grepBuffer(pattern, mStringBuffer, mBufSize, & accum);
    403403    return base_set + accum.getAccumulatedSet();
  • icGREP/icgrep-devel/icgrep/UCD/UnicodeData.h

    r5670 r5685  
    4729447294              0xfffeffff}};
    4729547295
    47296        const unsigned buffer_length = 3924;
     47296       const unsigned buffer_length = 3928;
    4729747297        const static char __attribute__ ((aligned (32))) string_buffer[4096] = u8R"__(0
    47298472981
     
    48795487950
    48796487969
     48797NaN
    4879748798)__";
    4879848799
Note: See TracChangeset for help on using the changeset viewer.