Changeset 5241


Ignore:
Timestamp:
Dec 29, 2016, 12:03:21 AM (2 years ago)
Author:
nmedfort
Message:

Potential fix for '\p{script=/.*hir.*/}'

Location:
icGREP/icgrep-devel/icgrep
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5240 r5241  
    160160
    161161SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG")
    162 SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g -fsanitize=address -fno-omit-frame-pointer") #
     162SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g") #-fsanitize=address -fno-omit-frame-pointer
    163163
    164164add_test(
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5240 r5241  
    3838#include <util/aligned_allocator.h>
    3939
     40using namespace parabix;
     41
    4042static cl::OptionCategory bGrepOutputOptions("Output Options",
    4143                                             "These options control the output.");
     
    5961std::string IRFilename = "icgrep.ll";
    6062std::string PTXFilename = "icgrep.ptx";
     63
     64static re::CC * parsedCodePointSet = nullptr;
     65static std::vector<std::string> parsedPropertyValues;
    6166
    6267void GrepEngine::doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly, bool UTF_16) {
     
    117122    }
    118123}
    119 
    120 using namespace parabix;
    121124
    122125Function * generateGPUKernel(Module * m, IDISA::IDISA_Builder * iBuilder, bool CountOnly){
     
    423426}
    424427
    425 re::CC *  GrepEngine::grepCodepoints() {
    426 
    427     setParsedCodePointSet();
     428re::CC * GrepEngine::grepCodepoints() {
     429    parsedCodePointSet = re::makeCC();
    428430    char * mFileBuffer = getUnicodeNameDataPtr();
    429431    size_t mFileSize = getUnicodeNameDataSize();
    430 
    431432    mGrepFunction(mFileBuffer, mFileSize, 0);
    432 
    433     return getParsedCodePointSet();
     433    return parsedCodePointSet;
    434434}
    435435
    436436const std::vector<std::string> & GrepEngine::grepPropertyValues(const std::string& propertyName) {
    437437    AlignedAllocator<char, 32> alloc;
    438     setParsedPropertyValues();
     438    parsedPropertyValues.clear();
    439439    const std::string & str = UCD::getPropertyValueGrepString(propertyName);
    440     char * aligned = alloc.allocate(str.length() + 1, 0);
    441     std::memcpy(aligned, str.data(), str.length());
    442     aligned[str.length()] = '\0';
    443     mGrepFunction(aligned, str.length(), 0);
     440    const auto n = str.length();
     441    char * aligned = alloc.allocate(n + 32, 0);
     442    std::memcpy(aligned, str.data(), n);
     443    std::memset(aligned + n, 0, 32);
     444    mGrepFunction(aligned, n, 0);
    444445    alloc.deallocate(aligned, 0);
    445     return getParsedPropertyValues();
     446    return parsedPropertyValues;
    446447}
    447448
     
    546547}
    547548
    548 re::CC * parsedCodePointSet;
    549 
    550549extern "C" {
    551550    void insert_codepoints(size_t lineNum, size_t line_start, size_t line_end, const char * buffer) {
     
    566565}
    567566
    568 void setParsedCodePointSet(){
    569     parsedCodePointSet = re::makeCC();
    570 }
    571 
    572 re::CC * getParsedCodePointSet(){
    573     return parsedCodePointSet;
    574 }
    575 
    576 
    577 static std::vector<std::string> parsedPropertyValues;
    578 
    579567extern "C" {
    580568    void insert_property_values(size_t lineNum, size_t line_start, size_t line_end, const char * buffer) {
    581         auto result = std::string(buffer + line_start, buffer + line_end);
    582         parsedPropertyValues.push_back(result);
    583     }
    584 }
    585 
    586 inline void setParsedPropertyValues() {
    587     parsedPropertyValues.clear();
    588 }
    589 
    590 inline const std::vector<std::string>& getParsedPropertyValues() {
    591     return parsedPropertyValues;
    592 }
    593 
     569//      When the error occurs, this is somehow getting an extra match:
     570//        33: (261,269)
     571//        138: (1235,1253)
     572//        172: (1419,1423)
     573//        278: (1949,2040) *****
     574//        script : .*hir.*        (Alt[Name "hiragana" ,Name "katakanaorhiragana" ,Name "hira" ,Name "hiraganQ0ᅵᅵᅵᅵK" ])
     575        parsedPropertyValues.emplace_back(buffer + line_start, buffer + line_end);
     576    }
     577}
    594578
    595579void icgrep_Linking(Module * m, ExecutionEngine * e) {
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5240 r5241  
    5050void setParsedCodePointSet();
    5151
    52 const std::vector<std::string>& getParsedPropertyValues();
    5352void setParsedPropertyValues();
    5453
  • icGREP/icgrep-devel/icgrep/re/re_name_resolve.cpp

    r5234 r5241  
    3636
    3737struct NameResolver {
    38 
    3938    RE * resolve(RE * re) {
    4039        if (Name * name = dyn_cast<Name>(re)) {
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r5234 r5241  
    2727#include <string>
    2828#include <algorithm>
     29#include <iostream>
    2930
    3031namespace re {
     
    3334RE * RE_Parser::parse(const std::string & regular_expression, ModeFlagSet initialFlags, RE_Syntax syntax) {
    3435    std::unique_ptr<RE_Parser> parser = nullptr;
    35 
    3636    switch (syntax) {
    3737        case RE_Syntax::PCRE:
     
    5252            break;
    5353    }
    54 
    55 
    5654    parser->fModeFlagSet = initialFlags;
    5755    parser->fNested = false;
     
    634632}
    635633
    636 RE * RE_Parser::parseRegexPropertyValue(const std::string& propName, const std::string& regexValue) {
    637     auto regexValueForGrep = "^" + regexValue + "$";
    638     RE* propValueRe = RE_Parser::parse(regexValueForGrep, fModeFlagSet, mReSyntax);
     634RE * RE_Parser::parseRegexPropertyValue(const std::string & propName, const std::string& regexValue) {
     635    RE * propValueRe = RE_Parser::parse("^" + regexValue + "$", fModeFlagSet, mReSyntax);
    639636    GrepEngine engine;
    640637    engine.grepCodeGen("NamePattern", propValueRe, false, false, GrepType::PropertyValue);
    641     auto grepValue = engine.grepPropertyValues(propName);
    642 
    643     auto grepValueSize = grepValue.size();
    644     if (!grepValueSize) {
     638    const auto matches = engine.grepPropertyValues(propName);
     639    if (matches.empty()) {
    645640        ParseFailure("regex " + regexValue + " match no property values");
    646     } else if (grepValueSize == 1) {
    647         // handle right value
    648         return createName(std::string(propName), std::string(grepValue[0]));
     641    } else if (matches.size() == 1) {
     642        return createName(propName, matches.front());
    649643    } else {
    650         std::vector<re::RE*> valueRes;
    651         for (auto iter = grepValue.begin(); iter != grepValue.end(); ++iter) {
    652             valueRes.push_back(createName(std::string(propName), std::string(*iter)));
    653         }
    654 
    655         return makeAlt(valueRes.begin(), valueRes.end());
     644        std::vector<re::RE *> alt;
     645        for (auto value : matches) {
     646            alt.push_back(createName(propName, value));
     647        }
     648        return makeAlt(alt.begin(), alt.end());
    656649    }
    657650}
     
    11131106}
    11141107
    1115 Name * RE_Parser::createName(std::string && value) {
     1108Name * RE_Parser::createName(std::string value) {
    11161109    auto key = std::make_pair("", value);
    11171110    auto f = mNameMap.find(key);
     
    11241117    }
    11251118
    1126 Name * RE_Parser::createName(std::string && prop, std::string && value) {
     1119Name * RE_Parser::createName(std::string prop, std::string value) {
    11271120    auto key = std::make_pair(prop, value);
    11281121    auto f = mNameMap.find(key);
  • icGREP/icgrep-devel/icgrep/re/re_parser.h

    r5218 r5241  
    164164    Name * makeWordSet();
    165165   
    166     Name * createName(std::string && value);
    167     Name * createName(std::string && prop, std::string && value);
     166    Name * createName(std::string value);
     167    Name * createName(std::string prop, std::string value);
    168168
    169169    virtual bool isUnsupportChartsetOperator(char c);
Note: See TracChangeset for help on using the changeset viewer.