Ignore:
Timestamp:
Oct 9, 2017, 9:28:24 AM (21 months ago)
Author:
cameron
Message:

Refactoring progress: \N uses name property; delay resolution of recursive property expressions, property object regexp support

Location:
icGREP/icgrep-devel/icgrep/UCD
Files:
2 deleted
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp

    r5672 r5679  
    1414#include <llvm/Support/raw_ostream.h>
    1515#include <llvm/Support/ErrorHandling.h>
     16#include <toolchain/grep_pipeline.h>
     17#include <util/aligned_allocator.h>
     18#include <re/re_nullable.h>
    1619using namespace llvm;
    1720
     
    3841}
    3942
     43const UnicodeSet PropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
     44    llvm::report_fatal_error("GetCodepointSetMatchingPattern unsupported");
     45}
     46   
    4047const UnicodeSet EnumeratedPropertyObject::GetCodepointSet(const std::string & value_spec) {
    4148    const int property_enum_val = GetPropertyValueEnumCode(value_spec);
     
    4451    }
    4552    return GetCodepointSet(property_enum_val);
     53}
     54
     55class PropertyValueAccumulator : public grep::MatchAccumulator {
     56public:
     57   
     58    PropertyValueAccumulator(const char * searchBuffer, std::vector<std::string> & accumulatedPropertyValues)
     59    : mSearchBuffer(searchBuffer), mParsedPropertyValueSet(accumulatedPropertyValues) {}
     60   
     61    void accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) override;
     62private:
     63    const char * mSearchBuffer;
     64    std::vector<std::string> & mParsedPropertyValueSet;
     65};
     66void PropertyValueAccumulator::accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) {
     67    assert (line_start <= line_end);
     68    mParsedPropertyValueSet.emplace_back(mSearchBuffer + line_start, mSearchBuffer + line_end);
     69}
     70
     71    const UnicodeSet EnumeratedPropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
     72   
     73   
     74        AlignedAllocator<char, 32> alloc;
     75        std::vector<std::string> accumulatedValues;
     76       
     77        const std::string & str = GetPropertyValueGrepString();
     78       
     79        const unsigned segmentSize = 8;
     80        const auto n = str.length();
     81        const auto w = 256 * segmentSize;
     82        const auto m = w - (n % w);
     83       
     84        char * aligned = alloc.allocate(n + m, 0);
     85        std::memcpy(aligned, str.data(), n);
     86        std::memset(aligned + n, 0, m);
     87       
     88        PropertyValueAccumulator accum(aligned, accumulatedValues);
     89        grepBuffer(pattern, aligned, n, & accum);
     90        alloc.deallocate(aligned, 0);
     91       
     92        UnicodeSet a;
     93        for (auto v : accumulatedValues) {
     94            int e = GetPropertyValueEnumCode(v);
     95            a = a + GetCodepointSet(e);
     96        }
     97        return a;
    4698}
    4799
     
    167219}
    168220
     221const UnicodeSet BinaryPropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
     222    llvm::report_fatal_error("Enumerated Property GetCodepointSetMatchingPattern not yet implemented");
     223}
     224   
    169225const std::string & BinaryPropertyObject::GetPropertyValueGrepString() {
    170226    if (mPropertyValueGrepString.empty()) {
     
    196252}
    197253   
     254class SetByLineNumberAccumulator : public grep::MatchAccumulator {
     255public:
     256   
     257    SetByLineNumberAccumulator(const std::vector<UCD::codepoint_t> & cps)
     258    : mCodepointTableByLineNum(cps) {}
     259   
     260    void accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) override;
     261    UnicodeSet getAccumulatedSet() { return mAccumSet; }
     262private:
     263    const std::vector<UCD::codepoint_t> & mCodepointTableByLineNum;
     264    UnicodeSet mAccumSet;
     265};
     266void SetByLineNumberAccumulator::accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) {
     267    assert (line_start <= line_end);
     268    mAccumSet.insert(mCodepointTableByLineNum[lineNum]);
     269}
     270
     271
    198272const UnicodeSet NumericPropertyObject::GetCodepointSet(const std::string & value_spec) {
    199273    if (value_spec == "NaN") return mNaNCodepointSet;
     
    217291}
    218292
     293const UnicodeSet NumericPropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
     294    UnicodeSet matched;
     295    llvm::report_fatal_error("NumericPropertyObject NaN matching issue!");
     296    SetByLineNumberAccumulator accum(mExplicitCps);
     297    grepBuffer(pattern, mStringBuffer, mBufSize, & accum);
     298    return matched + accum.getAccumulatedSet();
     299}
     300
     301
    219302const UnicodeSet StringPropertyObject::GetCodepointSet(const std::string & value_spec) {
    220303    if (value_spec == "") return mNullCodepointSet;
     
    244327}
    245328
     329const UnicodeSet StringPropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
     330    UnicodeSet matched;
     331    if (re::RE_Nullable::isNullable(pattern)) {
     332        matched = matched + mNullCodepointSet;
     333    }
     334    //llvm::report_fatal_error("StringPropertyObject reflexive set issue!");
     335    SetByLineNumberAccumulator accum(mExplicitCps);
     336    grepBuffer(pattern, mStringBuffer, mBufSize, & accum);
     337    return matched + accum.getAccumulatedSet();
     338}
     339   
    246340const UnicodeSet StringOverridePropertyObject::GetCodepointSet(const std::string & value_spec) {
    247341    // First step: get the codepoints from the base object and then remove any overridden ones.
     
    264358}
    265359   
     360   
     361const UnicodeSet StringOverridePropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
     362    UnicodeSet base_set = mBaseObject.GetCodepointSetMatchingPattern(pattern) - mOverriddenSet;
     363    SetByLineNumberAccumulator accum(mExplicitCps);
     364    grepBuffer(pattern, mStringBuffer, mBufSize, & accum);
     365    return base_set + accum.getAccumulatedSet();
     366}
     367
     368
    266369const std::string & ObsoletePropertyObject::GetPropertyValueGrepString() {
    267370    llvm::report_fatal_error("Property " + UCD::property_full_name[the_property] + " is obsolete.");
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.h

    r5673 r5679  
    1414#include <vector>
    1515#include <unordered_map>
     16namespace re {class RE;}
    1617
    1718namespace UCD {
     
    4041    }
    4142    PropertyObject(property_t p, ClassTypeId k) : the_property(p), the_kind(k) {}
    42     virtual const UnicodeSet GetCodepointSet(const std::string &);
     43    virtual const UnicodeSet GetCodepointSet(const std::string & prop_value_string);
     44    virtual const UnicodeSet GetCodepointSetMatchingPattern(re::RE * pattern);
     45
    4346    virtual const std::string & GetPropertyValueGrepString();
    4447    property_t the_property;
     
    6164       
    6265    }
    63     const UnicodeSet GetCodepointSet(const std::string & value_spec) override;
     66    const UnicodeSet GetCodepointSet(const std::string & prop_value_string) override;
     67    const UnicodeSet GetCodepointSetMatchingPattern(re::RE * pattern) override;
    6468    const UnicodeSet & GetCodepointSet(const int property_enum_val);
    6569    const std::string & GetPropertyValueGrepString() override;
     
    99103    const std::string & GetPropertyValueGrepString() override;
    100104    const UnicodeSet GetCodepointSet(const std::string & value_spec) override;
     105    const UnicodeSet GetCodepointSetMatchingPattern(re::RE * pattern) override;
    101106    const UnicodeSet & GetCodepointSet(const int property_enum_val) const;
    102107    std::vector<UnicodeSet> & GetEnumerationBasisSets();
     
    175180    }
    176181    const UnicodeSet GetCodepointSet(const std::string & numeric_spec) override;
    177    
     182    const UnicodeSet GetCodepointSetMatchingPattern(re::RE * pattern) override;
     183
    178184private:
    179185    UnicodeSet mNaNCodepointSet;  // codepoints for which the property value is NaN (not a number).
     
    202208    }
    203209    const UnicodeSet GetCodepointSet(const std::string & value_spec) override;
     210    const UnicodeSet GetCodepointSetMatchingPattern(re::RE * pattern) override;
    204211
    205212private:
     
    231238    }
    232239    const UnicodeSet GetCodepointSet(const std::string & value_spec) override;
    233    
     240    const UnicodeSet GetCodepointSetMatchingPattern(re::RE * pattern) override;
     241
    234242private:
    235243    PropertyObject & mBaseObject;  // the base object that provides default values for this property unless overridden.
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.cpp

    r5667 r5679  
    1414#include <re/re_seq.h>
    1515#include <re/re_assertion.h>
     16#include <re/re_parser.h>
    1617#include "UCD/PropertyAliases.h"
    1718#include "UCD/PropertyObjects.h"
     
    141142            }
    142143            auto theprop = propit->second;
    143             return property_object_table[theprop]->GetCodepointSet(value);
     144            if ((value.length() > 0) && (value[0] == '/')) {
     145                // resolve a regular expression
     146                re::RE * propValueRe = RE_Parser::parse(value.substr(1), re::DEFAULT_MODE, re::PCRE);
     147                return property_object_table[theprop]->GetCodepointSetMatchingPattern(propValueRe);
     148            }
     149            else {
     150                return property_object_table[theprop]->GetCodepointSet(value);
     151            }
    144152        }
    145153        else {
Note: See TracChangeset for help on using the changeset viewer.