Changeset 5679 for icGREP


Ignore:
Timestamp:
Oct 9, 2017, 9:28:24 AM (17 months ago)
Author:
cameron
Message:

Refactoring progress: \N uses name property; delay resolution of recursive property expressions, property object regexp support

Location:
icGREP/icgrep-devel/icgrep
Files:
2 added
2 deleted
9 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5673 r5679  
    5959SET(OBJECT_CACHE_SRC toolchain/object_cache.cpp)
    6060
     61SET(GREP_CORE_SRC toolchain/grep_pipeline.cpp kernels/linebreak_kernel.cpp kernels/grep_kernel.cpp kernels/scanmatchgen.cpp )
     62
    6163SET(TOOLCHAIN_SRC toolchain/toolchain.cpp toolchain/pipeline.cpp)
    6264
     
    8789add_library(PabloADT ${PABLO_SRC})
    8890add_library(RegExpADT re/re_re.cpp re/re_cc.cpp re/re_rep.cpp re/re_diff.cpp re/re_intersect.cpp re/printer_re.cpp)
    89 add_library(RegExpCompiler re/re_parser.cpp re/re_memoizer.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_star_normal.cpp re/re_minimizer.cpp re/re_local.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp re/re_name_gather.cpp re/re_collect_unicodesets.cpp re/re_multiplex.cpp re/re_parser_pcre.cpp re/re_parser_ere.cpp re/re_parser_bre.cpp re/re_parser_prosite.cpp re/re_utility.cpp)
     91add_library(RegExpCompiler re/re_parser.cpp re/re_memoizer.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_star_normal.cpp re/re_minimizer.cpp re/re_local.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp re/re_name_gather.cpp re/re_collect_unicodesets.cpp re/re_multiplex.cpp re/re_parser_pcre.cpp re/re_parser_ere.cpp re/re_parser_bre.cpp re/re_parser_prosite.cpp re/re_utility.cpp ${GREP_CORE_SRC})
    9092add_library(CCADT cc/cc_compiler.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/CaseFolding.cpp cc/alphabet.cpp cc/multiplex_CCs.cpp)
    91 add_library(UCDlib UCD/unicode_set.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp UCD/UnicodeNameData.cpp)
     93add_library(UCDlib UCD/unicode_set.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp)
    9294
    9395# force the compiler to compile the object cache to ensure that the versioning information is up to date
     
    99101target_link_libraries (PabloADT CodeGen)
    100102target_link_libraries (CCADT PabloADT)
    101 target_link_libraries (UCDlib RegExpADT PabloADT CCADT)
     103target_link_libraries (UCDlib RegExpCompiler RegExpADT PabloADT CCADT)
    102104target_link_libraries (RegExpADT PabloADT CCADT UCDlib)
    103 target_link_libraries (RegExpCompiler RegExpADT)
    104 
    105 add_executable(icgrep icgrep.cpp grep_interface.cpp grep_engine.cpp kernels/scanmatchgen.cpp kernels/u8u32_kernel.cpp kernels/delmask_kernel.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/linebreak_kernel.cpp kernels/streams_merge.cpp kernels/grep_kernel.cpp kernels/until_n.cpp)
     105target_link_libraries (RegExpCompiler UCDlib RegExpADT)
     106
     107add_executable(icgrep icgrep.cpp grep_interface.cpp grep_engine.cpp kernels/u8u32_kernel.cpp kernels/delmask_kernel.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/linebreak_kernel.cpp kernels/streams_merge.cpp kernels/grep_kernel.cpp kernels/until_n.cpp)
    106108add_executable(u8u16 u8u16.cpp)
    107109add_executable(base64 base64.cpp kernels/radix64.cpp)
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp

    r5672 r5679  
    1414#include <llvm/Support/raw_ostream.h>
    1515#include <llvm/Support/ErrorHandling.h>
     16#include <toolchain/grep_pipeline.h>
     17#include <util/aligned_allocator.h>
     18#include <re/re_nullable.h>
    1619using namespace llvm;
    1720
     
    3841}
    3942
     43const UnicodeSet PropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
     44    llvm::report_fatal_error("GetCodepointSetMatchingPattern unsupported");
     45}
     46   
    4047const UnicodeSet EnumeratedPropertyObject::GetCodepointSet(const std::string & value_spec) {
    4148    const int property_enum_val = GetPropertyValueEnumCode(value_spec);
     
    4451    }
    4552    return GetCodepointSet(property_enum_val);
     53}
     54
     55class PropertyValueAccumulator : public grep::MatchAccumulator {
     56public:
     57   
     58    PropertyValueAccumulator(const char * searchBuffer, std::vector<std::string> & accumulatedPropertyValues)
     59    : mSearchBuffer(searchBuffer), mParsedPropertyValueSet(accumulatedPropertyValues) {}
     60   
     61    void accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) override;
     62private:
     63    const char * mSearchBuffer;
     64    std::vector<std::string> & mParsedPropertyValueSet;
     65};
     66void PropertyValueAccumulator::accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) {
     67    assert (line_start <= line_end);
     68    mParsedPropertyValueSet.emplace_back(mSearchBuffer + line_start, mSearchBuffer + line_end);
     69}
     70
     71    const UnicodeSet EnumeratedPropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
     72   
     73   
     74        AlignedAllocator<char, 32> alloc;
     75        std::vector<std::string> accumulatedValues;
     76       
     77        const std::string & str = GetPropertyValueGrepString();
     78       
     79        const unsigned segmentSize = 8;
     80        const auto n = str.length();
     81        const auto w = 256 * segmentSize;
     82        const auto m = w - (n % w);
     83       
     84        char * aligned = alloc.allocate(n + m, 0);
     85        std::memcpy(aligned, str.data(), n);
     86        std::memset(aligned + n, 0, m);
     87       
     88        PropertyValueAccumulator accum(aligned, accumulatedValues);
     89        grepBuffer(pattern, aligned, n, & accum);
     90        alloc.deallocate(aligned, 0);
     91       
     92        UnicodeSet a;
     93        for (auto v : accumulatedValues) {
     94            int e = GetPropertyValueEnumCode(v);
     95            a = a + GetCodepointSet(e);
     96        }
     97        return a;
    4698}
    4799
     
    167219}
    168220
     221const UnicodeSet BinaryPropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
     222    llvm::report_fatal_error("Enumerated Property GetCodepointSetMatchingPattern not yet implemented");
     223}
     224   
    169225const std::string & BinaryPropertyObject::GetPropertyValueGrepString() {
    170226    if (mPropertyValueGrepString.empty()) {
     
    196252}
    197253   
     254class SetByLineNumberAccumulator : public grep::MatchAccumulator {
     255public:
     256   
     257    SetByLineNumberAccumulator(const std::vector<UCD::codepoint_t> & cps)
     258    : mCodepointTableByLineNum(cps) {}
     259   
     260    void accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) override;
     261    UnicodeSet getAccumulatedSet() { return mAccumSet; }
     262private:
     263    const std::vector<UCD::codepoint_t> & mCodepointTableByLineNum;
     264    UnicodeSet mAccumSet;
     265};
     266void SetByLineNumberAccumulator::accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) {
     267    assert (line_start <= line_end);
     268    mAccumSet.insert(mCodepointTableByLineNum[lineNum]);
     269}
     270
     271
    198272const UnicodeSet NumericPropertyObject::GetCodepointSet(const std::string & value_spec) {
    199273    if (value_spec == "NaN") return mNaNCodepointSet;
     
    217291}
    218292
     293const UnicodeSet NumericPropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
     294    UnicodeSet matched;
     295    llvm::report_fatal_error("NumericPropertyObject NaN matching issue!");
     296    SetByLineNumberAccumulator accum(mExplicitCps);
     297    grepBuffer(pattern, mStringBuffer, mBufSize, & accum);
     298    return matched + accum.getAccumulatedSet();
     299}
     300
     301
    219302const UnicodeSet StringPropertyObject::GetCodepointSet(const std::string & value_spec) {
    220303    if (value_spec == "") return mNullCodepointSet;
     
    244327}
    245328
     329const UnicodeSet StringPropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
     330    UnicodeSet matched;
     331    if (re::RE_Nullable::isNullable(pattern)) {
     332        matched = matched + mNullCodepointSet;
     333    }
     334    //llvm::report_fatal_error("StringPropertyObject reflexive set issue!");
     335    SetByLineNumberAccumulator accum(mExplicitCps);
     336    grepBuffer(pattern, mStringBuffer, mBufSize, & accum);
     337    return matched + accum.getAccumulatedSet();
     338}
     339   
    246340const UnicodeSet StringOverridePropertyObject::GetCodepointSet(const std::string & value_spec) {
    247341    // First step: get the codepoints from the base object and then remove any overridden ones.
     
    264358}
    265359   
     360   
     361const UnicodeSet StringOverridePropertyObject::GetCodepointSetMatchingPattern(re::RE * pattern) {
     362    UnicodeSet base_set = mBaseObject.GetCodepointSetMatchingPattern(pattern) - mOverriddenSet;
     363    SetByLineNumberAccumulator accum(mExplicitCps);
     364    grepBuffer(pattern, mStringBuffer, mBufSize, & accum);
     365    return base_set + accum.getAccumulatedSet();
     366}
     367
     368
    266369const std::string & ObsoletePropertyObject::GetPropertyValueGrepString() {
    267370    llvm::report_fatal_error("Property " + UCD::property_full_name[the_property] + " is obsolete.");
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.h

    r5673 r5679  
    1414#include <vector>
    1515#include <unordered_map>
     16namespace re {class RE;}
    1617
    1718namespace UCD {
     
    4041    }
    4142    PropertyObject(property_t p, ClassTypeId k) : the_property(p), the_kind(k) {}
    42     virtual const UnicodeSet GetCodepointSet(const std::string &);
     43    virtual const UnicodeSet GetCodepointSet(const std::string & prop_value_string);
     44    virtual const UnicodeSet GetCodepointSetMatchingPattern(re::RE * pattern);
     45
    4346    virtual const std::string & GetPropertyValueGrepString();
    4447    property_t the_property;
     
    6164       
    6265    }
    63     const UnicodeSet GetCodepointSet(const std::string & value_spec) override;
     66    const UnicodeSet GetCodepointSet(const std::string & prop_value_string) override;
     67    const UnicodeSet GetCodepointSetMatchingPattern(re::RE * pattern) override;
    6468    const UnicodeSet & GetCodepointSet(const int property_enum_val);
    6569    const std::string & GetPropertyValueGrepString() override;
     
    99103    const std::string & GetPropertyValueGrepString() override;
    100104    const UnicodeSet GetCodepointSet(const std::string & value_spec) override;
     105    const UnicodeSet GetCodepointSetMatchingPattern(re::RE * pattern) override;
    101106    const UnicodeSet & GetCodepointSet(const int property_enum_val) const;
    102107    std::vector<UnicodeSet> & GetEnumerationBasisSets();
     
    175180    }
    176181    const UnicodeSet GetCodepointSet(const std::string & numeric_spec) override;
    177    
     182    const UnicodeSet GetCodepointSetMatchingPattern(re::RE * pattern) override;
     183
    178184private:
    179185    UnicodeSet mNaNCodepointSet;  // codepoints for which the property value is NaN (not a number).
     
    202208    }
    203209    const UnicodeSet GetCodepointSet(const std::string & value_spec) override;
     210    const UnicodeSet GetCodepointSetMatchingPattern(re::RE * pattern) override;
    204211
    205212private:
     
    231238    }
    232239    const UnicodeSet GetCodepointSet(const std::string & value_spec) override;
    233    
     240    const UnicodeSet GetCodepointSetMatchingPattern(re::RE * pattern) override;
     241
    234242private:
    235243    PropertyObject & mBaseObject;  // the base object that provides default values for this property unless overridden.
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.cpp

    r5667 r5679  
    1414#include <re/re_seq.h>
    1515#include <re/re_assertion.h>
     16#include <re/re_parser.h>
    1617#include "UCD/PropertyAliases.h"
    1718#include "UCD/PropertyObjects.h"
     
    141142            }
    142143            auto theprop = propit->second;
    143             return property_object_table[theprop]->GetCodepointSet(value);
     144            if ((value.length() > 0) && (value[0] == '/')) {
     145                // resolve a regular expression
     146                re::RE * propValueRe = RE_Parser::parse(value.substr(1), re::DEFAULT_MODE, re::PCRE);
     147                return property_object_table[theprop]->GetCodepointSetMatchingPattern(propValueRe);
     148            }
     149            else {
     150                return property_object_table[theprop]->GetCodepointSet(value);
     151            }
    144152        }
    145153        else {
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5678 r5679  
    99#include <llvm/IR/Module.h>
    1010#include <boost/filesystem.hpp>
    11 #include <UCD/UnicodeNameData.h>
    1211#include <UCD/resolve_properties.h>
    1312#include <kernels/charclasses.h>
     
    523522}
    524523
    525    
    526 void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, size_t line_start, size_t line_end) {
    527     reinterpret_cast<MatchAccumulator *>(accum_addr)->accumulate_match(lineNum, line_start, line_end);
    528 }
    529 
    530    
    531 
    532 
    533 void grepBuffer(re::RE * pattern, char * UnicodeDataBuffer, size_t bufferLength, MatchAccumulator * accum) {
    534     const unsigned segmentSize = 8;
    535 
    536     ParabixDriver pxDriver("codepointEngine");
    537     auto & idb = pxDriver.getBuilder();
    538     Module * M = idb->getModule();
    539    
    540     Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getVoidTy(), idb->getInt8PtrTy(), idb->getSizeTy(), nullptr));
    541     mainFunc->setCallingConv(CallingConv::C);
    542     auto args = mainFunc->arg_begin();
    543     Value * const buffer = &*(args++);
    544     buffer->setName("buffer");
    545     Value * length = &*(args++);
    546     length->setName("length");
    547    
    548     idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
    549    
    550     StreamSetBuffer * ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
    551     kernel::Kernel * sourceK = pxDriver.addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy(), segmentSize));
    552     sourceK->setInitialArguments({buffer, length});
    553     pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
    554    
    555     StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize));
    556    
    557     kernel::Kernel * s2pk = pxDriver.addKernelInstance(make_unique<kernel::S2PKernel>(idb));
    558     pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    559    
    560     kernel::Kernel * linebreakK = pxDriver.addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, 8));
    561     StreamSetBuffer * LineBreakStream = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
    562     pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
    563    
    564     kernel::Kernel * requiredStreamsK = pxDriver.addKernelInstance(make_unique<kernel::RequiredStreams_UTF8>(idb));
    565     StreamSetBuffer * RequiredStreams = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(4, 1), segmentSize));
    566     pxDriver.makeKernelCall(requiredStreamsK, {BasisBits}, {RequiredStreams});
    567    
    568     StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
    569     kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, pattern));
    570     pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams}, {MatchResults});
    571    
    572     StreamSetBuffer * MatchedLines = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
    573     kernel::Kernel * matchedLinesK = pxDriver.addKernelInstance(make_unique<kernel::MatchedLinesKernel>(idb));
    574     pxDriver.makeKernelCall(matchedLinesK, {MatchResults, LineBreakStream}, {MatchedLines});
    575    
    576     kernel::Kernel * scanMatchK = pxDriver.addKernelInstance(make_unique<kernel::ScanMatchKernel>(idb, GrepType::CallBack, 8));
    577     scanMatchK->setInitialArguments({ConstantInt::get(idb->getIntAddrTy(), reinterpret_cast<intptr_t>(accum))});
    578     pxDriver.makeKernelCall(scanMatchK, {MatchedLines, LineBreakStream, ByteStream}, {});
    579     pxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
    580     pxDriver.generatePipelineIR();
    581     pxDriver.deallocateBuffers();
    582     idb->CreateRetVoid();
    583     pxDriver.finalizeObject();
    584    
    585     typedef void (*GrepFunctionType)(const char * buffer, const size_t length);
    586     auto f = reinterpret_cast<GrepFunctionType>(pxDriver.getMain());
    587     f(UnicodeDataBuffer, bufferLength);
    588 }
    589 
    590 class CodepointAccumulator : public MatchAccumulator {
    591 public:
    592    
    593     CodepointAccumulator(const char * searchBuffer) : mSearchBuffer(searchBuffer), mParsedCodePointSet(re::makeCC()) {}
    594    
    595     void accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) override;
    596     re::CC * getCodePoints() {return mParsedCodePointSet;}
    597 private:
    598     const char * mSearchBuffer;
    599     re::CC * mParsedCodePointSet;
    600 };
    601 
    602 void CodepointAccumulator::accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) {
    603     assert (line_start <= line_end);
    604     re::codepoint_t c = 0;
    605     size_t line_pos = line_start;
    606     while (isxdigit(mSearchBuffer[line_pos])) {
    607         assert (line_pos < line_end);
    608         if (isdigit(mSearchBuffer[line_pos])) {
    609             c = (c << 4) | (mSearchBuffer[line_pos] - '0');
    610         }
    611         else {
    612             c = (c << 4) | (tolower(mSearchBuffer[line_pos]) - 'a' + 10);
    613         }
    614         line_pos++;
    615     }
    616     assert(((line_pos - line_start) >= 4) && ((line_pos - line_start) <= 6)); // UCD format 4 to 6 hex digits.
    617     mParsedCodePointSet->insert(c);
    618 }
    619 re::CC * grepCodepoints(re::RE * pattern, char * UnicodeDataBuffer, size_t bufferLength) {
    620    
    621     CodepointAccumulator accum(UnicodeDataBuffer);
    622    
    623     grepBuffer(pattern, UnicodeDataBuffer, bufferLength, & accum);
    624     return accum.getCodePoints();
    625 }
    626 
    627 
    628 class PropertyValueAccumulator : public MatchAccumulator {
    629 public:
    630    
    631     PropertyValueAccumulator(const char * searchBuffer, std::vector<std::string> & accumulatedPropertyValues)
    632        : mSearchBuffer(searchBuffer), mParsedPropertyValueSet(accumulatedPropertyValues) {}
    633    
    634     void accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) override;
    635 private:
    636     const char * mSearchBuffer;
    637     std::vector<std::string> & mParsedPropertyValueSet;
    638 };
    639 void PropertyValueAccumulator::accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) {
    640     assert (line_start <= line_end);
    641     mParsedPropertyValueSet.emplace_back(mSearchBuffer + line_start, mSearchBuffer + line_end);
    642 }
    643 
    644 
    645 const std::vector<std::string> grepPropertyValues(const std::string& propertyName, re::RE * propertyValuePattern) {
    646     ParabixDriver pxDriver("propertyValueEngine");
    647     AlignedAllocator<char, 32> alloc;
    648     std::vector<std::string> accumulatedValues;
    649 
    650     const std::string & str = UCD::getPropertyValueGrepString(propertyName);
    651 
    652     auto & idb = pxDriver.getBuilder();
    653 
    654     const unsigned segmentSize = 8;
    655     const auto n = str.length();
    656     const auto w = idb->getBitBlockWidth() * segmentSize;
    657     const auto m = w - (n % w);
    658 
    659     char * aligned = alloc.allocate(n + m, 0);
    660     std::memcpy(aligned, str.data(), n);
    661     std::memset(aligned + n, 0, m);
    662 
    663     PropertyValueAccumulator accum(aligned, accumulatedValues);
    664     grepBuffer(propertyValuePattern, aligned, n, & accum);
    665     alloc.deallocate(aligned, 0);
    666     return accumulatedValues;
    667 }
    668 }
     524}
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5678 r5679  
    2424void *DoGrepThreadFunction(void *args);
    2525   
    26    
    27 class MatchAccumulator {
    28 public:
    29     MatchAccumulator() {};
    30     virtual void accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) = 0;
    31 };
    32 
    33 void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, size_t line_start, size_t line_end);
    34    
    35 
    3626class GrepEngine {
    3727public:
     
    6151void PrintResults();
    6252   
    63 //void grepBuffer(re::RE * pattern, const char * buffer, size_t bufferLength, MatchAccumulator * accum);
    64 
    65 re::CC * grepCodepoints(re::RE * pattern, char * UnicodeDataBuffer, size_t bufferLength);
    66    
    67 const std::vector<std::string> grepPropertyValues(const std::string& propertyName, re::RE * propertyValuePattern);
    68 
    6953}
    7054
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r5673 r5679  
    2222#include <re/re_assertion.h>
    2323#include <re/printer_re.h>
    24 #include <UCD/UnicodeNameData.h>
    2524#include <UCD/resolve_properties.h>
    2625#include <UCD/CaseFolding.h>
     
    676675            }
    677676            ++mCursor;
    678             return parseRegexPropertyValue(canonicalize(start, prop_end), std::string(val_start, current));
     677            //return parseRegexPropertyValue(canonicalize(start, prop_end), std::string(val_start, current));
     678            return createName(canonicalize(start, prop_end), std::string(val_start-1, current));
    679679        }
    680680    }
     
    682682}
    683683
    684 RE * RE_Parser::parseRegexPropertyValue(const std::string & propName, const std::string& regexValue) {
    685     RE * propValueRe = RE_Parser::parse("^" + regexValue + "$", fModeFlagSet, mReSyntax);
    686     const auto matches = grep::grepPropertyValues(propName, propValueRe);
    687     if (matches.empty()) {
    688         ParseFailure("regex " + regexValue + " match no property values");
    689     } else if (matches.size() == 1) {
    690         return createName(propName, matches.front());
    691     } else {
    692         std::vector<re::RE *> alt;
    693         for (auto value : matches) {
    694             alt.push_back(createName(propName, value));
    695         }
    696         return makeAlt(alt.begin(), alt.end());
    697     }
    698 }
    699 
    700684Name * RE_Parser::parseNamePatternExpression(){
    701685
    702     ModeFlagSet outerFlags = fModeFlagSet;
    703     fModeFlagSet = 1;
    704 
    705     bool outerNested = fNested;
    706     fNested = true;
    707 
    708     RE * nameRE = parse_RE();
    709 
    710     // Reset outer parsing state.
    711     fModeFlagSet = outerFlags;
    712     fNested = outerNested;
    713 
    714     // Embed the nameRE in ";.*$nameRE" to skip the codepoint field of Uname.txt
    715     RE * embedded = makeSeq({mMemoizer.memoize(makeCC(0x3B)), makeRep(makeAny(), 0, Rep::UNBOUNDED_REP), nameRE});
    716    
    717     CC * codepoints = grep::grepCodepoints(embedded, getUnicodeNameDataPtr(), getUnicodeNameDataSize());
    718    
    719     if (codepoints) {
    720         Name * const result = mMemoizer.memoize(codepoints);
    721         assert (*cast<CC>(result->getDefinition()) == *codepoints);
    722         return result;
    723     }
    724     return nullptr;
     686    const auto start = mCursor.pos();
     687    while (mCursor.more()) {
     688        if (*mCursor == '\\') {
     689            ++mCursor;
     690            if (!mCursor.more()) {
     691                break;
     692            }
     693        }
     694        else if (*mCursor == '}') {
     695            break;
     696        }
     697        ++mCursor;
     698    }
     699    std::string nameRegexp = "/(?i)" + std::string(start, mCursor.pos());
     700    return createName("na", nameRegexp);
    725701}
    726702
  • icGREP/icgrep-devel/icgrep/re/re_parser.h

    r5646 r5679  
    2121
    2222enum ModeFlagType : unsigned {
    23     NONE = 0,
     23    DEFAULT_MODE = 0,
    2424    CASE_INSENSITIVE_MODE_FLAG = 1,
    2525    MULTILINE_MODE_FLAG = 2,      // not currently implemented
     
    141141
    142142    virtual RE * parsePropertyExpression();
    143     RE * parseRegexPropertyValue(const std::string& propName, const std::string& regexValue);
    144143
    145144    Name * parseNamePatternExpression();
  • icGREP/icgrep-devel/icgrep/re/re_parser_bre.cpp

    r5267 r5679  
    290290            ++mCursor;
    291291        }
    292         if (*mCursor == '=') {
    293             // We have a property-name = value expression
    294             const auto prop_end = mCursor.pos();
    295             mCursor++;
    296             auto val_start = mCursor.pos();
    297             if (*val_start != '\\' || !isCharAhead('/')) {
    298                 // property-value is normal string
    299                 while (mCursor.more()) {
    300                     if (isEscapedCharAhead('}') || *mCursor == ':') {
    301                         break;
    302                     }
    303                     ++mCursor;
    304                 }
    305                 return createName(canonicalize(start, prop_end), canonicalize(val_start, mCursor.pos()));
    306             } else {
    307                 // property-value is another regex
    308                 ++mCursor;
    309                 auto previous = val_start;
    310                 auto current = (++mCursor).pos();
    311                 val_start = current;
    312 
    313                 while (true) {
    314                     if (*current == '/' && *previous == '\\') {
    315                         break;
    316                     }
    317 
    318                     if (!mCursor.more()) {
    319                         ParseFailure("Malformed property expression");
    320                     }
    321 
    322                     previous = current;
    323                     current = (++mCursor).pos();
    324                 }
    325                 ++mCursor;
    326                 return parseRegexPropertyValue(canonicalize(start, prop_end), canonicalize(val_start, previous));
    327             }
    328         }
    329292        return createName(canonicalize(start, mCursor.pos()));
    330293    }
Note: See TracChangeset for help on using the changeset viewer.