Ignore:
Timestamp:
Nov 7, 2016, 3:54:09 PM (3 years ago)
Author:
xwa163
Message:
  1. Extend Regex Syntax, include: (a) RL2.6 of UTS#18, support regex in property value. e.g. \p{script=/.*hir.*/} (b) Support syntax of property expression when parsing boundary. e.g. \b{greek} (c) Extend property expression in non capture group. e.g. (?\p{upper}:\p{greek}\p{script=hira})
  2. Add related test cases
File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5204 r5206  
    2525#include <llvm/IR/TypeBuilder.h>
    2626#include <UCD/UnicodeNameData.h>
     27#include <UCD/resolve_properties.h>
    2728
    2829
     
    198199}
    199200
    200 Function * generateCPUKernel(Module * m, IDISA::IDISA_Builder * iBuilder, bool isNameExpression){
     201Function * generateCPUKernel(Module * m, IDISA::IDISA_Builder * iBuilder, GrepType grepType){
    201202    Type * const size_ty = iBuilder->getSizeTy();
    202203    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
     
    219220    MatchResults.setStreamSetBuffer(rsltStream, fileSize);
    220221
    221     kernel::ScanMatchKernel scanMatchK(iBuilder, isNameExpression);
     222    kernel::ScanMatchKernel scanMatchK(iBuilder, grepType);
    222223    scanMatchK.generateKernel({&MatchResults}, {});
    223224           
     
    230231}
    231232
    232 void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, bool isNameExpression) {
     233void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, GrepType grepType) {
    233234    isUTF_16 = UTF_16;
    234235    int addrSpace = 0;
     
    269270    unsigned encodingBits = UTF_16 ? 16 : 8;
    270271
    271     mIsNameExpression = isNameExpression;
     272    mGrepType = grepType;
    272273
    273274    Type * const int32ty = iBuilder->getInt32Ty();
     
    402403            Value * icgrepInstance = icgrepK.createInstance({});
    403404
    404             kernel::ScanMatchKernel scanMatchK(iBuilder, mIsNameExpression);
     405            kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType);
    405406            scanMatchK.generateKernel({&MatchResults}, {});               
    406407            Value * scanMatchInstance = scanMatchK.createInstance({iBuilder->CreateBitCast(inputStream, int8PtrTy), fileSize, fileIdx});
     
    431432   
    432433        Compile2PTX(M, IRFilename, PTXFilename);
    433         mainCPUFn = generateCPUKernel(cpuM, CPUBuilder, mIsNameExpression);
     434        mainCPUFn = generateCPUKernel(cpuM, CPUBuilder, mGrepType);
    434435        if (CountOnly) return;
    435436    }
     
    472473
    473474    return getParsedCodePointSet();
     475}
     476
     477const std::vector<std::string> & GrepEngine::grepPropertyValues(const std::string& propertyName) {
     478    setParsedPropertyValues();
     479
     480    std::string str = UCD::getPropertyValueGrepString(propertyName);
     481
     482    //use const_cast to workaround const input
     483    mGrepFunction(const_cast<char*>(str.c_str()), str.size(), 0);
     484
     485    return getParsedProeprtyValues();
    474486}
    475487
     
    607619
    608620
     621std::vector<std::string> parsedPropertyValues;
     622
     623extern "C" {
     624    void insert_property_values(size_t lineNum, size_t line_start, size_t line_end, const char * buffer) {
     625        auto result = std::string(buffer + line_start, buffer + line_end);
     626        parsedPropertyValues.push_back(result);
     627    }
     628}
     629
     630inline void setParsedPropertyValues() {
     631    parsedPropertyValues.clear();
     632}
     633
     634inline const std::vector<std::string>& getParsedProeprtyValues() {
     635    return parsedPropertyValues;
     636}
     637
     638
    609639void icgrep_Linking(Module * m, ExecutionEngine * e) {
    610640    Module::FunctionListType & fns = m->getFunctionList();
     
    621651            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&insert_codepoints);
    622652        }
     653        if (fnName == "insert_property_values") {
     654            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&insert_property_values);
     655        }
    623656#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
    624657        else {
Note: See TracChangeset for help on using the changeset viewer.