Ignore:
Timestamp:
Sep 30, 2015, 12:26:23 PM (4 years ago)
Author:
nmedfort
Message:

Progress on multi-target UCD compilation

Location:
icGREP/icgrep-devel/icgrep/re
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4803 r4808  
    2828#include <stdexcept>
    2929#include <iostream>
     30#include <pablo/printer_pablos.h>
    3031
    3132#include "llvm/Support/CommandLine.h"
     
    4546                     cl::desc("set mod64 approximate mode"), cl::cat(fREcompilationOptions));
    4647#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
    47 static cl::opt<bool> DisablePregeneratedUnicode("disable-pregenerated-unicode", cl::init(false),
    48                      cl::desc("disable use of pregenerated Unicode character class sets"), cl::cat(fREcompilationOptions));
     48static cl::opt<bool> UsePregeneratedUnicode("use-pregenerated-unicode", cl::init(false),
     49                     cl::desc("use fixed pregenerated Unicode character class sets instead"), cl::cat(fREcompilationOptions));
    4950#endif
    5051using namespace pablo;
     
    7273    if (m.pos == newpos) return m;
    7374    PabloAST * a = m.stream;
    74     if (m.pos == FinalMatchByte) {
     75    if (m.pos == MarkerPosition::FinalMatchByte) {
    7576        // Must advance at least to InitialPostPositionByte
    7677        a = pb.createAdvance(a, 1, "adv");
    7778    }
    7879    // Now at InitialPostPositionByte; is a further advance needed?
    79     if (newpos == FinalPostPositionByte) {
     80    if (newpos == MarkerPosition::FinalPostPositionByte) {
    8081        // Must advance through nonfinal bytes
    8182        a = pb.createScanThru(pb.createAnd(mInitial, a), mNonFinal, "scanToFinal");
     
    174175    mFinal = mPB.createNot(mPB.createOr(mNonFinal, u8invalid), "final");
    175176    mUnicodeLineBreak = mPB.createAnd(LB_chars, mPB.createNot(mCRLF));  // count the CR, but not CRLF
     177    PabloAST * const lb = UNICODE_LINE_BREAK ? mUnicodeLineBreak : mLineFeed;
     178    mFunction.setResult(1, mPB.createAssign("lf", mPB.createAnd(lb, mPB.createNot(mCRLF))));
     179}
     180
     181void RE_Compiler::gatherUnicodePropertyNames(RE * re, NameSet & nameSet) {
     182    if (Name * name = dyn_cast<Name>(re)) {
     183        if (name->getDefinition()) {
     184            gatherUnicodePropertyNames(name->getDefinition(), nameSet);
     185        } else if (name->getType() == Name::Type::UnicodeProperty) {
     186            nameSet.insert(name);
     187        }
     188    } else if (Seq* seq = dyn_cast<Seq>(re)) {
     189        for (RE * re : *seq) {
     190            gatherUnicodePropertyNames(re, nameSet);
     191        }
     192    } else if (Alt * alt = dyn_cast<Alt>(re)) {
     193        for (RE * re : *alt) {
     194            gatherUnicodePropertyNames(re, nameSet);
     195        }
     196    } else if (Rep * rep = dyn_cast<Rep>(re)) {
     197        gatherUnicodePropertyNames(rep->getRE(), nameSet);
     198    } else if (Assertion * a = dyn_cast<Assertion>(re)) {
     199        gatherUnicodePropertyNames(a->getAsserted(), nameSet);
     200    } else if (Diff * diff = dyn_cast<Diff>(re)) {
     201        gatherUnicodePropertyNames(diff->getLH(), nameSet);
     202        gatherUnicodePropertyNames(diff->getRH(), nameSet);
     203    } else if (Intersect * ix = dyn_cast<Intersect>(re)) {
     204        gatherUnicodePropertyNames(ix->getLH(), nameSet);
     205        gatherUnicodePropertyNames(ix->getRH(), nameSet);
     206    }
     207}
     208
     209void RE_Compiler::compileUnicodeNames(RE * re) {
     210    NameSet nameSet;
     211    gatherUnicodePropertyNames(re, nameSet);
     212#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
     213    if (UsePregeneratedUnicode) {
     214        for (Name * name : nameSet) {
     215            const UCD::ExternalProperty & ep = UCD::resolveExternalProperty(name->getFunctionName());
     216            Call * call = mPB.createCall(Prototype::Create(name->getFunctionName(), std::get<1>(ep), std::get<2>(ep), std::get<0>(ep)), mCCCompiler.getBasisBits());
     217            name->setCompiled(mPB.createAnd(call, mPB.createNot(UNICODE_LINE_BREAK ? mUnicodeLineBreak : mLineFeed)));
     218        }
     219    } else {
     220#endif
     221        std::vector<UCD::UnicodeSet> sets;
     222        for (Name * name : nameSet) {
     223            sets.push_back(std::move(UCD::resolveUnicodeSet(name)));
     224        }
     225        if (sets.size() > 0) {
     226            UCD::UCDCompiler ucdCompiler(mCCCompiler);
     227            std::vector<PabloAST *> classes(std::move(ucdCompiler.generateWithDefaultIfHierarchy(sets, mPB)));
     228            auto value = classes.begin();
     229            for (Name * name : nameSet) {
     230                name->setCompiled(mPB.createAnd(*value++, mPB.createNot(UNICODE_LINE_BREAK ? mUnicodeLineBreak : mLineFeed)));
     231            }
     232        }
     233#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
     234    }
     235#endif
    176236}
    177237
     
    181241    PabloAST * v = markerVar(match_result);
    182242    mFunction.setResult(0, mPB.createAssign("matches", mPB.createAnd(mPB.createMatchStar(v, mPB.createNot(lb)), lb)));
    183     mFunction.setResult(1, mPB.createAssign("lf", mPB.createAnd(lb, mPB.createNot(mCRLF))));
    184243}
    185244
    186245MarkerType RE_Compiler::compile(RE * re, PabloBuilder & pb) {
    187     return process(re, makeMarker(FinalPostPositionByte, pb.createOnes()), pb);
     246    return process(re, makeMarker(MarkerPosition::FinalPostPositionByte, pb.createOnes()), pb);
    188247}
    189248
    190249PabloAST * RE_Compiler::nextUnicodePosition(MarkerType m, PabloBuilder & pb) {
    191     if (markerPos(m) == FinalPostPositionByte) {
     250    if (markerPos(m) == MarkerPosition::FinalPostPositionByte) {
    192251        return markerVar(m);
    193252    }
    194     else if (markerPos(m) == InitialPostPositionByte) {
     253    else if (markerPos(m) == MarkerPosition::InitialPostPositionByte) {
    195254        return pb.createScanThru(pb.createAnd(mInitial, markerVar(m)), mNonFinal);
    196255    }
     
    219278        PabloAST * nextPos = nextUnicodePosition(marker, pb);
    220279        PabloAST * dot = pb.createNot(UNICODE_LINE_BREAK ? pb.createOr(mUnicodeLineBreak, mCRLF) : mLineFeed);
    221         return makeMarker(FinalMatchByte, pb.createAnd(nextPos, dot, "dot"));
     280        return makeMarker(MarkerPosition::FinalMatchByte, pb.createAnd(nextPos, dot, "dot"));
    222281    }
    223282    else if (Diff * diff = dyn_cast<Diff>(re)) {
     
    228287    }
    229288    else if (isa<Start>(re)) {
    230         MarkerType m = AdvanceMarker(marker, InitialPostPositionByte, pb);
     289        MarkerType m = AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb);
    231290        if (UNICODE_LINE_BREAK) {
    232291            PabloAST * line_end = mPB.createOr(mUnicodeLineBreak, mCRLF);
    233292            PabloAST * sol = pb.createNot(pb.createOr(pb.createAdvance(pb.createNot(line_end), 1), mCRLF));
    234             return makeMarker(InitialPostPositionByte, pb.createAnd(markerVar(m), sol, "sol"));
     293            return makeMarker(MarkerPosition::InitialPostPositionByte, pb.createAnd(markerVar(m), sol, "sol"));
    235294        }
    236295        else {
    237296            PabloAST * sol = pb.createNot(pb.createAdvance(pb.createNot(mLineFeed), 1));
    238             return makeMarker(FinalPostPositionByte, pb.createAnd(markerVar(m), sol, "sol"));
     297            return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(markerVar(m), sol, "sol"));
    239298        }
    240299    }
    241300    else if (isa<End>(re)) {
    242301        if (UNICODE_LINE_BREAK) {
    243             PabloAST * nextPos = markerVar(AdvanceMarker(marker, FinalPostPositionByte, pb));
    244             return makeMarker(FinalPostPositionByte, pb.createAnd(nextPos, mUnicodeLineBreak, "end"));
    245         }
    246         PabloAST * nextPos = markerVar(AdvanceMarker(marker, InitialPostPositionByte, pb));  // For LF match
    247         return makeMarker(FinalPostPositionByte, pb.createAnd(nextPos, mLineFeed, "eol"));
     302            PabloAST * nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionByte, pb));
     303            return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(nextPos, mUnicodeLineBreak, "end"));
     304        }
     305        PabloAST * nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb));  // For LF match
     306        return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(nextPos, mLineFeed, "eol"));
    248307    }
    249308    return marker;
     
    252311MarkerType RE_Compiler::process(Name * name, MarkerType marker, PabloBuilder & pb) {
    253312    MarkerType nextPos;
    254     if (markerPos(marker) == FinalPostPositionByte) {
     313    if (markerPos(marker) == MarkerPosition::FinalPostPositionByte) {
    255314        nextPos = marker;
    256315    }
    257316    else if (name->getType() == Name::Type::Byte) {
    258         nextPos = AdvanceMarker(marker, InitialPostPositionByte, pb);
     317        nextPos = AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb);
    259318    }
    260319    else {
    261         nextPos = AdvanceMarker(marker, FinalPostPositionByte, pb);
    262     }
    263     return makeMarker(FinalMatchByte, pb.createAnd(markerVar(nextPos), getNamedCharacterClassStream(name, pb), "m"));
     320        nextPos = AdvanceMarker(marker, MarkerPosition::FinalPostPositionByte, pb);
     321    }
     322    return makeMarker(MarkerPosition::FinalMatchByte, pb.createAnd(markerVar(nextPos), getNamedCharacterClassStream(name, pb), "m"));
    264323}
    265324
     
    268327    if (LLVM_LIKELY(var != nullptr)) {
    269328        return var;
    270     }
    271     else if (name->getDefinition() != nullptr) {
     329    } else if (name->getDefinition() != nullptr) {
    272330        MarkerType m = compile(name->getDefinition(), pb);
    273         assert(markerPos(m) == FinalMatchByte);
     331        assert(markerPos(m) == MarkerPosition::FinalMatchByte);
    274332        var = markerVar(m);
    275     }
    276     else if (name->getType() == Name::Type::UnicodeProperty) {
    277         #ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
    278         if (DisablePregeneratedUnicode) {
    279         #endif
    280             UCD::UCDCompiler ucdCompiler(mCCCompiler);
    281             var = ucdCompiler.generateWithDefaultIfHierarchy(UCD::resolveUnicodeSet(name), pb);
    282         #ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
    283         } else {
    284             const UCD::ExternalProperty & ep = UCD::resolveExternalProperty(name->getFunctionName());
    285             var = pb.createCall(Prototype::Create(name->getFunctionName(), std::get<1>(ep), std::get<2>(ep), std::get<0>(ep)), mCCCompiler.getBasisBits());
    286         }
    287         #endif
    288     }
    289     else {
     333    } else {
    290334        throw std::runtime_error("Unresolved name " + name->getName());
    291335    }
     
    336380        accum[p] = pb.createOr(accum[p], markerVar(rslt), "alt");
    337381    }
    338     if (isa<Zeroes>(accum[InitialPostPositionByte]) && isa<Zeroes>(accum[FinalPostPositionByte])) {
    339         return makeMarker(FinalMatchByte, accum[FinalMatchByte]);
    340     }
    341     PabloAST * combine = pb.createOr(accum[InitialPostPositionByte], pb.createAdvance(accum[FinalMatchByte], 1), "alt");
     382    if (isa<Zeroes>(accum[MarkerPosition::InitialPostPositionByte]) && isa<Zeroes>(accum[MarkerPosition::FinalPostPositionByte])) {
     383        return makeMarker(MarkerPosition::FinalMatchByte, accum[MarkerPosition::FinalMatchByte]);
     384    }
     385    PabloAST * combine = pb.createOr(accum[InitialPostPositionByte], pb.createAdvance(accum[MarkerPosition::FinalMatchByte], 1), "alt");
    342386    if (isa<Zeroes>(accum[FinalPostPositionByte])) {
    343387        return makeMarker(InitialPostPositionByte, combine);
    344388    }
    345     combine = pb.createOr(pb.createScanThru(pb.createAnd(mInitial, combine), mNonFinal), accum[FinalPostPositionByte], "alt");
    346     return makeMarker(FinalPostPositionByte, combine);
     389    combine = pb.createOr(pb.createScanThru(pb.createAnd(mInitial, combine), mNonFinal), accum[MarkerPosition::FinalPostPositionByte], "alt");
     390    return makeMarker(MarkerPosition::FinalPostPositionByte, combine);
    347391}
    348392
     
    361405    else if (isUnicodeUnitLength(asserted)) {
    362406        MarkerType lookahead = compile(asserted, pb);
    363         assert(markerPos(lookahead) == FinalMatchByte);
     407        assert(markerPos(lookahead) == MarkerPosition::FinalMatchByte);
    364408        PabloAST * la = markerVar(lookahead);
    365409        if (a->getSense() == Assertion::Sense::Negative) {
    366410            la = pb.createNot(la);
    367411        }
    368         MarkerType fbyte = AdvanceMarker(marker, FinalPostPositionByte, pb);
    369         return makeMarker(FinalPostPositionByte, pb.createAnd(markerVar(fbyte), la, "lookahead"));
     412        MarkerType fbyte = AdvanceMarker(marker, MarkerPosition::FinalPostPositionByte, pb);
     413        return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(markerVar(fbyte), la, "lookahead"));
    370414    }
    371415    else {
     
    462506        PabloAST * cc = markerVar(compile(repeated, pb));
    463507        PabloAST * cc_lb = consecutive1(cc, 1, lb, pb);
    464         PabloAST * marker_fwd = pb.createAdvance(markerVar(marker), markerPos(marker) == FinalMatchByte ? lb : lb-1);
    465         return makeMarker(FinalMatchByte, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
     508        PabloAST * marker_fwd = pb.createAdvance(markerVar(marker), markerPos(marker) == MarkerPosition::FinalMatchByte ? lb : lb - 1);
     509        return makeMarker(MarkerPosition::FinalMatchByte, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
    466510    }
    467511    // Fall through to general case.
     
    477521        // Create a mask of positions reachable within ub from current marker.
    478522        // Use matchstar, then apply filter.
    479         PabloAST * match = markerVar(AdvanceMarker(marker, InitialPostPositionByte, pb));
     523        PabloAST * match = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb));
    480524        PabloAST * upperLimitMask = reachable(match, 1, ub, pb);
    481         PabloAST * cursor = markerVar(AdvanceMarker(marker, InitialPostPositionByte, pb));
     525        PabloAST * cursor = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb));
    482526        PabloAST * rep_class_var = markerVar(compile(repeated, pb));
    483         return makeMarker(InitialPostPositionByte, pb.createAnd(pb.createMatchStar(cursor, rep_class_var), upperLimitMask, "bounded"));
     527        return makeMarker(MarkerPosition::InitialPostPositionByte, pb.createAnd(pb.createMatchStar(cursor, rep_class_var), upperLimitMask, "bounded"));
    484528    }
    485529    // Fall through to general case.
     
    495539MarkerType RE_Compiler::processUnboundedRep(RE * repeated, MarkerType marker, PabloBuilder & pb) {
    496540    // always use PostPosition markers for unbounded repetition.
    497     PabloAST * base = markerVar(AdvanceMarker(marker, InitialPostPositionByte, pb));
     541    PabloAST * base = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb));
    498542   
    499543    if (isByteLength(repeated)  && !DisableMatchStar) {
    500544        PabloAST * cc = markerVar(compile(repeated, pb)); 
    501545        PabloAST * mstar = SetMod64Approximation ? pb.createMod64MatchStar(base, cc) : pb.createMatchStar(base, cc, "unbounded");
    502         return makeMarker(InitialPostPositionByte, mstar);
     546        return makeMarker(MarkerPosition::InitialPostPositionByte, mstar);
    503547    }
    504548    else if (isUnicodeUnitLength(repeated) && !DisableMatchStar && !DisableUnicodeMatchStar) {
    505549        PabloAST * cc = markerVar(compile(repeated, pb));
    506550        PabloAST * mstar = SetMod64Approximation ? pb.createMod64MatchStar(base, pb.createOr(mNonFinal, cc)) : pb.createMatchStar(base, pb.createOr(mNonFinal, cc));
    507         return makeMarker(FinalPostPositionByte, pb.createAnd(mstar, mFinal, "unbounded"));
     551        return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(mstar, mFinal, "unbounded"));
    508552    }
    509553    else if (mStarDepth > 0){
     
    517561        PabloAST * m1 = pb.createOr(base, starPending);
    518562        PabloAST * m2 = pb.createOr(base, starAccum);
    519         PabloAST * loopComputation = markerVar(AdvanceMarker(process(repeated, makeMarker(InitialPostPositionByte, m1), pb), InitialPostPositionByte, pb));
     563        PabloAST * loopComputation = markerVar(AdvanceMarker(process(repeated, makeMarker(MarkerPosition::InitialPostPositionByte, m1), pb), MarkerPosition::InitialPostPositionByte, pb));
    520564        Next * nextPending = pb.createNext(starPending, pb.createAnd(loopComputation, pb.createNot(m2)));
    521565        Next * nextStarAccum = pb.createNext(starAccum, pb.createOr(loopComputation, m2));
     
    525569        mStarDepth--;
    526570       
    527         return makeMarker(InitialPostPositionByte, pb.createAssign("unbounded", pb.createOr(base, nextStarAccum)));
     571        return makeMarker(MarkerPosition::InitialPostPositionByte, pb.createAssign("unbounded", pb.createOr(base, nextStarAccum)));
    528572    }   
    529573    else {
     
    536580        mStarDepth++;
    537581
    538         PabloAST * loopComputation = markerVar(AdvanceMarker(process(repeated, makeMarker(InitialPostPositionByte, whilePending), wb), InitialPostPositionByte, wb));
     582        PabloAST * loopComputation = markerVar(AdvanceMarker(process(repeated, makeMarker(MarkerPosition::InitialPostPositionByte, whilePending), wb), MarkerPosition::InitialPostPositionByte, wb));
    539583        Next * nextWhilePending = wb.createNext(whilePending, wb.createAnd(loopComputation, wb.createNot(whileAccum)));
    540584        Next * nextWhileAccum = wb.createNext(whileAccum, wb.createOr(loopComputation, whileAccum));
     
    546590        mStarDepth--;
    547591        mLoopVariants.clear();
    548         return makeMarker(InitialPostPositionByte, pb.createAssign("unbounded", nextWhileAccum));
     592        return makeMarker(MarkerPosition::InitialPostPositionByte, pb.createAssign("unbounded", nextWhileAccum));
    549593    }   
    550594} // end of namespace re
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r4684 r4808  
    1212#include <cc/cc_compiler.h>
    1313#include <pablo/builder.hpp>
    14 #include <string>
    15 #include <list>
    16 #include <map>
    17 
    18 namespace cc {
    19 class CC_NameMap;
    20 }
     14#ifdef USE_BOOST
     15#include <boost/container/flat_set.hpp>
     16#else
     17#include <unordered_set>
     18#endif
    2119
    2220namespace pablo {
     
    2422}
    2523
     24namespace UCD {
     25class UnicodeSet;
     26}
    2627
    2728/*   Marker streams represent the results of matching steps.
     
    5859    RE_Compiler(pablo::PabloFunction & function, cc::CC_Compiler & ccCompiler);
    5960    void initializeRequiredStreams();
     61    void compileUnicodeNames(RE * re);
    6062    void finalizeMatchResult(MarkerType match_result);
    6163    MarkerType compile(RE * re) {
     
    6466
    6567private:
     68
     69    #ifdef USE_BOOST
     70    using NameSet = boost::container::flat_set<Name *>;
     71    #else
     72    using NameSet = std::unordered_set<Name *>;
     73    #endif
    6674
    6775    MarkerType compile(RE * re, pablo::PabloBuilder & cg);
     
    8795    MarkerType processUnboundedRep(RE * repeated, MarkerType marker, pablo::PabloBuilder & pb);
    8896    MarkerType processBoundedRep(RE * repeated, int ub, MarkerType marker, pablo::PabloBuilder & pb);
     97    static void gatherUnicodePropertyNames(RE * re, NameSet & nameSet);
    8998
    9099private:
  • icGREP/icgrep-devel/icgrep/re/re_name.h

    r4660 r4808  
    1111}
    1212
     13namespace UCD {
     14    class UnicodeSet;
     15}
    1316
    1417namespace re {
     
    3538
    3639    Type getType() const;
    37     RE *getDefinition() const;
     40    RE * getDefinition() const;
    3841    pablo::PabloAST * getCompiled() const {
    3942        return mCompiled;
Note: See TracChangeset for help on using the changeset viewer.