Changeset 5769 for icGREP/icgrep-devel


Ignore:
Timestamp:
Dec 9, 2017, 5:23:39 AM (18 months ago)
Author:
cameron
Message:

Decoupling case-insensitive transform from parser

Location:
icGREP/icgrep-devel/icgrep
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5768 r5769  
    2323#include <pablo/pablo_kernel.h>
    2424#include <re/re_cc.h>
     25#include <re/casing.h>
    2526#include <re/re_toolchain.h>
    2627#include <toolchain/toolchain.h>
     
    120121   
    121122    for (unsigned i = 0; i < n; i++) {
     123        REs[i] = resolveCaseInsensitiveMode(REs[i], false);
    122124        REs[i] = re::resolveNames(REs[i]);
    123125        const auto UnicodeSets = re::collectUnicodeSets(REs[i]);
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5768 r5769  
    3737}
    3838
    39 RegularExpressionOptimizer::RegularExpressionOptimizer(re::RE * const re_ast)
    40 : mRE(re_ast)
    41 , mSignature(Printer_RE::PrintRE(mRE)) {
    42 
    43 }
    44 
    4539void RequiredStreams_UTF8::generatePabloMethod() {
    4640   
     
    177171
    178172
     173ICGrepSignature::ICGrepSignature(re::RE * const re_ast)
     174: mRE(re_ast)
     175, mSignature(Printer_RE::PrintRE(mRE)) {
     176   
     177}
     178
    179179ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, RE * const re, unsigned numOfCharacterClasses)
    180 : RegularExpressionOptimizer(re)
     180: ICGrepSignature(re)
    181181, PabloKernel(iBuilder,
    182182              "ic" + sha1sum(mSignature),
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.h

    r5646 r5769  
    1111namespace re { class RE; }
    1212namespace kernel {
    13 
    14 struct RegularExpressionOptimizer {
    15     RegularExpressionOptimizer(re::RE * re_ast);
    16 protected:
    17     re::RE * const  mRE;
    18     std::string     mSignature;
    19 };
    20 
    2113
    2214   
     
    4032
    4133
    42 class ICGrepKernel : public RegularExpressionOptimizer, public pablo::PabloKernel {
     34struct ICGrepSignature {
     35    ICGrepSignature(re::RE * re_ast);
     36protected:
     37    re::RE * const  mRE;
     38    std::string     mSignature;
     39};
     40
     41   
     42class ICGrepKernel : public ICGrepSignature, public pablo::PabloKernel {
    4343public:
    4444    ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, re::RE * const re_ast, const unsigned numOfCharacterClasses = 8);
  • icGREP/icgrep-devel/icgrep/re/re_name_resolve.cpp

    r5763 r5769  
     1#include <re/re_re.h>
    12#include "re_name_resolve.h"
    23#include <re/re_name.h>
     
    1011#include <re/re_assertion.h>
    1112#include <re/re_analysis.h>
     13#include <re/re_group.h>
    1214#include <re/re_memoizer.hpp>
    13 #include <UCD/ucd_compiler.hpp>
    1415#include <UCD/resolve_properties.h>
    1516#include <boost/container/flat_set.hpp>
     
    9899            ix->setLH(resolve(ix->getLH()));
    99100            ix->setRH(resolve(ix->getRH()));
     101        } else if (Group * g = dyn_cast<Group>(re)) {
     102            g->setRE(resolve(g->getRE()));
    100103        }
    101104        return re;
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r5754 r5769  
    11/*
    2  *  Copyright (c) 2016 International Characters.
     2 *  Copyright (c) 2017 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 *  icgrep is a trademark of International Characters.
     
    1919#include <re/re_seq.h>
    2020#include <re/re_start.h>
     21#include <re/re_range.h>
    2122#include <re/re_diff.h>
    2223#include <re/re_intersect.h>
     24#include <re/re_group.h>
    2325#include <re/re_assertion.h>
    2426#include <re/printer_re.h>
     
    276278                    ++mCursor;
    277279                    group_expr = parse_alt();
     280                    auto changed = fModeFlagSet ^ savedModeFlagSet;
     281                    if ((changed & CASE_INSENSITIVE_MODE_FLAG) != 0) {
     282                        group_expr = makeGroup(Group::Mode::CaseInsensitiveMode, group_expr,
     283                                               (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) == 0 ? Group::Sense::Off : Group::Sense::On);
     284                    }
    278285                    fModeFlagSet = savedModeFlagSet;
    279286                    break;
    280287                } else {  // if *_cursor == ')'
    281288                    ++mCursor;
    282                     return parse_next_item();
     289                    auto changed = fModeFlagSet ^ savedModeFlagSet;
     290                    if ((changed & CASE_INSENSITIVE_MODE_FLAG) != 0) {
     291                        group_expr = parse_seq();
     292                        return makeGroup(Group::Mode::CaseInsensitiveMode, group_expr,
     293                                               (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) == 0 ? Group::Sense::Off : Group::Sense::On);
     294                    }
     295                    else return parse_next_item();
    283296                }
    284297            default:
     
    898911                    if ((*mCursor == 'x') || (*mCursor == 'o') || (*mCursor == '0')) possibleByteCodeEscape = true;
    899912                    insert_range(cc, lastCodepointItem, parse_escaped_codepoint());
     913                    //subexprs.push_back(makeRange(makeCC(lastCodepointItem), makeCC(parse_escaped_codepoint())));
    900914                } else {
    901915                    insert_range(cc, lastCodepointItem, parse_literal_codepoint());
     916                    //subexprs.push_back(makeRange(makeCC(lastCodepointItem), makeCC(parse_literal_codepoint())));
    902917                }
    903918                lastItemKind = RangeItem;
     
    10571072
    10581073Name * RE_Parser::createCC(const codepoint_t cp) {
    1059     CC * cc = nullptr;
    1060     if (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) {
    1061         cc = makeCC();
    1062         caseInsensitiveInsert(cc, cp);
    1063     } else {
    1064         cc = makeCC(cp);
    1065     }
     1074    CC * cc = makeCC(cp);
    10661075    return mMemoizer.memoize(cc);
    10671076}
    10681077
    10691078void RE_Parser::insert(CC * cc, const codepoint_t cp) {
    1070     if (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) {
    1071         caseInsensitiveInsert(cc, cp);
    1072     } else {
    1073         cc->insert(cp);
    1074     }
     1079    cc->insert(cp);
    10751080}
    10761081
    10771082void RE_Parser::insert_range(CC * cc, const codepoint_t lo, const codepoint_t hi) {
    1078     if (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) {
    1079         caseInsensitiveInsertRange(cc, lo, hi);
    1080     } else {
    1081         cc->insert_range(lo, hi);
    1082     }
     1083    cc->insert_range(lo, hi);
    10831084}
    10841085
  • icGREP/icgrep-devel/icgrep/toolchain/grep_pipeline.cpp

    r5768 r5769  
    1515#include <kernels/streamset.h>
    1616#include <kernels/kernel_builder.h>
     17#include <re/casing.h>
    1718#include <re/re_toolchain.h>
    1819#include <toolchain/toolchain.h>
     
    3839    const unsigned segmentSize = 8;
    3940
     41    pattern = resolveCaseInsensitiveMode(pattern, false);
    4042    pattern = regular_expression_passes(pattern);
     43   
    4144   
    4245    ParabixDriver pxDriver("codepointEngine");
Note: See TracChangeset for help on using the changeset viewer.