Changeset 5617


Ignore:
Timestamp:
Aug 23, 2017, 12:31:45 AM (4 weeks ago)
Author:
xuedongx
Message:

new RE compiler pipeline for local language(enlightened by Glushkov automaton)

Location:
icGREP/icgrep-devel/icgrep/re
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5610 r5617  
    2626#include <re/re_seq.h>              // for Seq
    2727#include <re/re_start.h>
     28#include <re/re_local.h>
    2829#include <re/re_toolchain.h>        // for AlgorithmOptionIsSet, RE_Algorith...
    2930#include "cc/cc_compiler.h"         // for CC_Compiler
     
    7071}
    7172   
     73MarkerType RE_Compiler::compile_local(RE * re, MarkerType marker, PabloBuilder & pb) {
     74    UCD::UnicodeSet* first = RE_Local::first(re);
     75    PabloAST * pablo_first = mCCCompiler.compileCC(makeCC(std::move(*first)));
     76    UCD::UnicodeSet* final = RE_Local::final(re);
     77    PabloAST * pablo_final = mCCCompiler.compileCC(makeCC(std::move(*final)));
     78    std::map<UCD::UnicodeSet*, UCD::UnicodeSet*> follow_map;
     79    RE_Local::follow(re, follow_map);
     80
     81    if (first == nullptr || final == nullptr) {
     82        mLocal = false;
     83        return process(re, marker, pb);
     84    }
     85
     86    PabloAST * pablo_follow = pb.createZeroes();
     87    for (auto i = follow_map.begin(); i != follow_map.end(); i++) {
     88        CC * one = makeCC(std::move(*i->first));
     89        CC * two = makeCC(std::move(*i->second));
     90        PabloAST * pablo_one = pb.createAnd(mCCCompiler.compileCC(one), mAny);
     91        PabloAST * pablo_two = pb.createAnd(mCCCompiler.compileCC(two), mAny);
     92        PabloAST * one1 = pb.createAdvance(pablo_one, 1, "one1");
     93        PabloAST * follow = pb.createAnd(one1, pablo_two);
     94        pablo_follow = pb.createOr(pablo_follow, follow);
     95    }
     96    PabloAST * result = pb.createAnd(pb.createMatchStar(pb.createAdvance(pablo_first, 1), pablo_follow), pb.createAdvance(pablo_final, 1));
     97    return makeMarker(MarkerPosition::FinalPostPositionUnit, result);
     98}
     99
     100   
    72101MarkerType RE_Compiler::process(RE * re, MarkerType marker, PabloBuilder & pb) {
    73     if (isa<Name>(re)) {
    74         return compileName(cast<Name>(re), marker, pb);
    75     } else if (isa<Seq>(re)) {
    76         return compileSeq(cast<Seq>(re), marker, pb);
    77     } else if (isa<Alt>(re)) {
    78         return compileAlt(cast<Alt>(re), marker, pb);
    79     } else if (isa<Rep>(re)) {
    80         return compileRep(cast<Rep>(re), marker, pb);
    81     } else if (isa<Assertion>(re)) {
    82         return compileAssertion(cast<Assertion>(re), marker, pb);
    83     } else if (isa<Any>(re)) {
    84         return compileAny(marker, pb);
    85     } else if (isa<Diff>(re)) {
    86         return compileDiff(cast<Diff>(re), marker, pb);
    87     } else if (isa<Intersect>(re)) {
    88         return compileIntersect(cast<Intersect>(re), marker, pb);
    89     } else if (isa<Start>(re)) {
    90         return compileStart(marker, pb);
    91     } else if (isa<End>(re)) {
    92         return compileEnd(marker, pb);
    93     } else if (isa<CC>(re)) {
    94         // CCs may be passed through the toolchain directly to the compiler.
    95         return compileCC(cast<CC>(re), marker, pb);
    96     }
    97     UnsupportedRE("RE Compiler failed to process " + Printer_RE::PrintRE(re));
     102    if (mLocal) {
     103        if (isa<Name>(re) || isa<Seq>(re) || isa<Alt>(re) || isa<Rep>(re) || isa<CC>(re)) {
     104            return compile_local(re, marker, pb);
     105        } else if (isa<Any>(re)) {
     106            return compileAny(marker, pb);
     107        } else if (isa<Diff>(re)) {
     108            return compileDiff(cast<Diff>(re), marker, pb);
     109        } else if (isa<Intersect>(re)) {
     110            return compileIntersect(cast<Intersect>(re), marker, pb);
     111        }
     112        UnsupportedRE("RE Compiler for local language failed to process " + Printer_RE::PrintRE(re));
     113    } else {
     114        if (isa<Name>(re)) {
     115            return compileName(cast<Name>(re), marker, pb);
     116        } else if (isa<Seq>(re)) {
     117            return compileSeq(cast<Seq>(re), marker, pb);
     118        } else if (isa<Alt>(re)) {
     119            return compileAlt(cast<Alt>(re), marker, pb);
     120        } else if (isa<Rep>(re)) {
     121            return compileRep(cast<Rep>(re), marker, pb);
     122        } else if (isa<Assertion>(re)) {
     123            return compileAssertion(cast<Assertion>(re), marker, pb);
     124        } else if (isa<Any>(re)) {
     125            return compileAny(marker, pb);
     126        } else if (isa<Diff>(re)) {
     127            return compileDiff(cast<Diff>(re), marker, pb);
     128        } else if (isa<Intersect>(re)) {
     129            return compileIntersect(cast<Intersect>(re), marker, pb);
     130        } else if (isa<Start>(re)) {
     131            return compileStart(marker, pb);
     132        } else if (isa<End>(re)) {
     133            return compileEnd(marker, pb);
     134        } else if (isa<CC>(re)) {
     135            // CCs may be passed through the toolchain directly to the compiler.
     136            return compileCC(cast<CC>(re), marker, pb);
     137        }
     138        UnsupportedRE("RE Compiler failed to process " + Printer_RE::PrintRE(re));
     139    }
    98140}
    99141
     
    520562   
    521563
    522 RE_Compiler::RE_Compiler(PabloKernel * kernel, cc::CC_Compiler & ccCompiler)
     564RE_Compiler::RE_Compiler(PabloKernel * kernel, cc::CC_Compiler & ccCompiler, bool local)
    523565: mKernel(kernel)
    524566, mCCCompiler(ccCompiler)
     567, mLocal(local)
    525568, mLineBreak(nullptr)
    526569, mCRLF(nullptr)
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r5561 r5617  
    6161public:
    6262
    63     RE_Compiler(pablo::PabloKernel * kernel, cc::CC_Compiler & ccCompiler);
     63    RE_Compiler(pablo::PabloKernel * kernel, cc::CC_Compiler & ccCompiler, bool local = false);
    6464    void compileUnicodeNames(RE *& re);
    6565    void compile(RE * re);
     
    9090
    9191    MarkerType compile(RE * re, pablo::PabloBuilder & cg);
     92    MarkerType compile_local(RE * re, MarkerType marker, pablo::PabloBuilder & cg);
    9293
    9394    MarkerType process(RE * re, MarkerType marker, pablo::PabloBuilder & pb);
     
    122123    bool                                            mCountOnly;
    123124    cc::CC_Compiler &                               mCCCompiler;
     125    bool                                            mLocal;
    124126    pablo::PabloAST *                               mLineBreak;
    125127    pablo::PabloAST *                               mCRLF;
  • icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp

    r5561 r5617  
    1212#include <re/re_star_normal.h>         // for RE_Star_Normal
    1313#include <re/re_simplifier.h>          // for RE_Simplifier
     14#include <re/re_local.h>
    1415#include <re/printer_re.h>
     16#include <re/re_analysis.h>
    1517#include <iostream>
    1618
     
    9395void re2pablo_compiler(PabloKernel * kernel, RE * re_ast) {
    9496    Var * const basis = kernel->getInputStreamVar("basis");
     97    bool local = RE_Local::isLocalLanguage(re_ast) && isTypeForLocal(re_ast);
    9598    cc::CC_Compiler cc_compiler(kernel, basis);
    96     re::RE_Compiler re_compiler(kernel, cc_compiler);
     99    re::RE_Compiler re_compiler(kernel, cc_compiler, local);
    97100    re_compiler.compileUnicodeNames(re_ast);
    98101    re_compiler.compile(re_ast);
Note: See TracChangeset for help on using the changeset viewer.