Changeset 5872 for icGREP


Ignore:
Timestamp:
Feb 17, 2018, 9:40:32 AM (13 months ago)
Author:
cameron
Message:

Decoupling CC compilers from Pablo Kernel

Location:
icGREP/icgrep-devel/icgrep
Files:
17 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.cpp

    r5787 r5872  
    3636#define Behind(x) makeLookBehindAssertion(x)
    3737#define Ahead(x) makeLookAheadAssertion(x)
     38   
     39   
     40RE * UnicodeBreakRE() {
     41    return makeAlt({makeCC(0x0A, 0x0C), makeSeq({makeCC(0x0D), makeCC(0x0A)}), makeSeq({makeCC(0x0D), makeNegativeLookAheadAssertion(makeCC(0x0A))})});
     42}
    3843
    3944void generateGraphemeClusterBoundaryRule(Name * const &property) {
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.h

    r5428 r5872  
    1515LLVM_ATTRIBUTE_NORETURN void UnicodePropertyExpressionError(std::string errmsg);
    1616
     17re::RE * UnicodeBreakRE();
    1718void generateGraphemeClusterBoundaryRule(re::Name * const &property);
    1819bool resolvePropertyDefinition(re::Name * const property);
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.cpp

    r5859 r5872  
    2222
    2323namespace cc {
    24     CC_Compiler::CC_Compiler(pablo::PabloKernel * kernel)
    25     : mBuilder(kernel->getEntryScope()) {
    26     }
    27    
    28 
    29 Parabix_CC_Compiler::Parabix_CC_Compiler(pablo::PabloKernel * kernel, std::vector<pablo::PabloAST *> basisBitSet)
    30 : CC_Compiler(kernel)
     24    CC_Compiler::CC_Compiler(pablo::PabloBlock * scope)
     25    : mBuilder(scope) {
     26    }
     27   
     28
     29Parabix_CC_Compiler::Parabix_CC_Compiler(pablo::PabloBlock * scope, std::vector<pablo::PabloAST *> basisBitSet)
     30: CC_Compiler(scope)
    3131, mEncodingBits(basisBitSet.size())
    3232, mBasisBit(basisBitSet) {
     
    268268}
    269269   
    270 Direct_CC_Compiler::Direct_CC_Compiler(pablo::PabloKernel * kernel, pablo::PabloAST * codeUnitStream)
    271 : CC_Compiler(kernel)
     270Direct_CC_Compiler::Direct_CC_Compiler(pablo::PabloBlock * scope, pablo::PabloAST * codeUnitStream)
     271: CC_Compiler(scope)
    272272, mCodeUnitStream(codeUnitStream) {
    273273}
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.h

    r5854 r5872  
    11/*
    2  *  Copyright (c) 2014 International Characters.
     2 *  Copyright (c) 2018 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 *  icgrep is a trademark of International Characters.
     
    3232
    3333protected:
    34     CC_Compiler(pablo::PabloKernel * kernel);
     34    CC_Compiler(pablo::PabloBlock * scope);
    3535    pablo::PabloBuilder             mBuilder;
    3636};
     
    4040public:
    4141   
    42     Parabix_CC_Compiler(pablo::PabloKernel * kernel, std::vector<pablo::PabloAST *> basisBitSet);
     42    Parabix_CC_Compiler(pablo::PabloBlock * scope, std::vector<pablo::PabloAST *> basisBitSet);
    4343   
    4444    pablo::PabloAST * compileCC(const re::CC *cc) override;
     
    9393public:
    9494   
    95     Direct_CC_Compiler(pablo::PabloKernel * kernel, pablo::PabloAST * codeUnitStream);
     95    Direct_CC_Compiler(pablo::PabloBlock * scope, pablo::PabloAST * codeUnitStream);
    9696   
    9797    pablo::PabloAST * compileCC(const re::CC *cc) override;
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r5856 r5872  
    292292void PreprocessKernel::generatePabloMethod() {
    293293    PabloBuilder pb(getEntryScope());
    294     cc::Parabix_CC_Compiler ccc(this, getInputStreamSet("basis"));
    295     PabloAST * A = ccc.compileCC(re::makeCC(re::makeCC(0x41), re::makeCC(0x61)), pb);
    296     PabloAST * C = ccc.compileCC(re::makeCC(re::makeCC(0x43), re::makeCC(0x63)), pb);
    297     PabloAST * T = ccc.compileCC(re::makeCC(re::makeCC(0x54), re::makeCC(0x74)), pb);
    298     PabloAST * G = ccc.compileCC(re::makeCC(re::makeCC(0x47), re::makeCC(0x67)), pb);
     294    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
     295    PabloAST * A = ccc.compileCC(re::makeCC(re::makeCC(0x41), re::makeCC(0x61)));
     296    PabloAST * C = ccc.compileCC(re::makeCC(re::makeCC(0x43), re::makeCC(0x63)));
     297    PabloAST * T = ccc.compileCC(re::makeCC(re::makeCC(0x54), re::makeCC(0x74)));
     298    PabloAST * G = ccc.compileCC(re::makeCC(re::makeCC(0x47), re::makeCC(0x67)));
    299299    Var * const pat = getOutputStreamVar("pat");
    300300    pb.createAssign(pb.createExtract(pat, 0), A);
  • icGREP/icgrep-devel/icgrep/kernels/cc_kernel.cpp

    r5847 r5872  
    103103void ParabixCharacterClassKernelBuilder::generatePabloMethod() {
    104104    PabloBuilder pb(getEntryScope());
    105     cc::Parabix_CC_Compiler ccc(this, getInputStreamSet("basis"));
     105    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
    106106    Var * outputVar = getOutputStreamVar("outputStream");
    107107    for (unsigned i = 0; i < mCharClasses.size(); ++i) {
    108         pb.createAssign(pb.createExtract(outputVar, i), ccc.compileCC("cc", mCharClasses[i], pb));
     108        pb.createAssign(pb.createExtract(outputVar, i), ccc.compileCC(mCharClasses[i]));
    109109    }
    110110}
  • icGREP/icgrep-devel/icgrep/kernels/charclasses.cpp

    r5861 r5872  
    8080    std::unique_ptr<CC_Compiler> ccc;
    8181    if (mUseDirectCC) {
    82         ccc = make_unique<cc::Direct_CC_Compiler>(this, pb.createExtract(getInput(0), pb.getInteger(0)));
     82        ccc = make_unique<cc::Direct_CC_Compiler>(getEntryScope(), pb.createExtract(getInput(0), pb.getInteger(0)));
    8383    } else {
    84         ccc = make_unique<cc::Parabix_CC_Compiler>(this, getInputStreamSet("basis"));
     84        ccc = make_unique<cc::Parabix_CC_Compiler>(getEntryScope(), getInputStreamSet("basis"));
    8585    }
    8686    unsigned n = mCCs.size();
  • icGREP/icgrep-devel/icgrep/kernels/delmask_kernel.cpp

    r5847 r5872  
    2525    //  output: delmask stream + error stream
    2626   
    27     cc::Parabix_CC_Compiler ccc(this, u8_bits);
     27    cc::Parabix_CC_Compiler ccc(getEntryScope(), u8_bits);
    2828   
    2929    Zeroes * zeroes = main.createZeroes();
     
    3434    Var * error_mask = main.createVar("error_mask", zeroes);
    3535   
    36     PabloAST * ASCII = ccc.compileCC("ASCII", re::makeCC(0x0, 0x7F), main);
    37     auto ascii = main.createScope();
    38     main.createIf(ASCII, ascii);
    39     PabloAST * u8pfx = ccc.compileCC("u8pfx", re::makeCC(0xC0, 0xFF), main);
    40     PabloAST * nonASCII = ccc.compileCC("u8pfx", re::makeCC(0x80, 0xFF), main);
     36    PabloAST * u8pfx = ccc.compileCC(re::makeCC(0xC0, 0xFF));
     37    PabloAST * nonASCII = ccc.compileCC(re::makeCC(0x80, 0xFF));
    4138    auto it = main.createScope();
    4239    main.createIf(nonASCII, it);
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5867 r5872  
    4242void RequiredStreams_UTF8::generatePabloMethod() {
    4343    PabloBuilder pb(getEntryScope());
    44     cc::Parabix_CC_Compiler ccc(this, getInputStreamSet("basis"));
     44    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
    4545   
    4646    PabloAST * const LF = pb.createExtract(getInput(1), pb.getInteger(0), "LF");
     
    152152void RequiredStreams_UTF16::generatePabloMethod() {
    153153    PabloBuilder pb(getEntryScope());
    154     cc::Parabix_CC_Compiler ccc(this, getInputStreamSet("basis"));
     154    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
    155155   
    156156    PabloAST * u16hi_hi_surrogate = ccc.compileCC(makeCC(0xD800, 0xDBFF, &cc::UTF16));    //u16hi_hi_surrogate = [\xD8-\xDB]
     
    224224void ICGrepKernel::generatePabloMethod() {
    225225    PabloBuilder pb(getEntryScope());
    226     cc::Parabix_CC_Compiler ccc(this, getInputStreamSet("basis"));
     226    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
    227227    RE_Compiler re_compiler(this, ccc);
    228228    for (auto a : mAlphabets) {
  • icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.cpp

    r5861 r5872  
    3838    std::unique_ptr<CC_Compiler> ccc;
    3939    if (mNumOfStreams == 1) {
    40         ccc = make_unique<cc::Direct_CC_Compiler>(this, pb.createExtract(getInput(0), pb.getInteger(0)));
     40        ccc = make_unique<cc::Direct_CC_Compiler>(getEntryScope(), pb.createExtract(getInput(0), pb.getInteger(0)));
    4141    } else {
    42         ccc = make_unique<cc::Parabix_CC_Compiler>(this, getInputStreamSet("basis"));
     42        ccc = make_unique<cc::Parabix_CC_Compiler>(getEntryScope(), getInputStreamSet("basis"));
    4343    }
    4444    PabloAST * LF = ccc->compileCC("LF", makeByte(0x0A), pb);
     
    5959void LineBreakKernelBuilder::generatePabloMethod() {
    6060    PabloBuilder pb(getEntryScope());
    61     cc::Parabix_CC_Compiler ccc(this, getInputStreamSet("basis"));
     61    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
    6262
    6363    Integer * const ZERO = pb.getInteger(0);
  • icGREP/icgrep-devel/icgrep/kernels/u8u32_kernel.cpp

    r5847 r5872  
    2525    //  output: 32 u8-indexed streams, + delmask stream + error stream
    2626   
    27     cc::Parabix_CC_Compiler ccc(this, u8_bits);
     27    cc::Parabix_CC_Compiler ccc(getEntryScope(), u8_bits);
    2828   
    2929    Zeroes * zeroes = main.createZeroes();
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5867 r5872  
    4444using namespace llvm;
    4545
    46 using FollowMap = std::map<re::CC *, re::CC*>;
    47 
    4846namespace re {
    4947
     
    5149void RE_Compiler::addAlphabet(cc::Alphabet * a, std::vector<pablo::PabloAST *> basis_set) {
    5250    mAlphabets.push_back(a);
    53     mAlphabetCompilers.push_back(make_unique<cc::Parabix_CC_Compiler>(mKernel, basis_set));
     51    mAlphabetCompilers.push_back(make_unique<cc::Parabix_CC_Compiler>(mEntryScope, basis_set));
    5452}
    5553
     
    5755
    5856PabloAST * RE_Compiler::compile(RE * const re, PabloAST * const initialCursors) {
    59     pablo::PabloBuilder mPB(mKernel->getEntryScope());
     57    pablo::PabloBuilder mPB(mEntryScope);
    6058    const auto markers = initialCursors ? compile(re, initialCursors, mPB) : compile(re, mPB);
    6159    return markerVar(AdvanceMarker(markers, FinalPostPositionUnit, mPB));
     
    586584
    587585RE_Compiler::RE_Compiler(PabloKernel * kernel, cc::CC_Compiler & ccCompiler)
    588 : mKernel(kernel)
     586: mEntryScope(kernel->getEntryScope())
    589587, mCCCompiler(ccCompiler)
    590588, mLineBreak(nullptr)
     
    594592, mStarDepth(0)
    595593, mCompiledName(&mBaseMap) {
    596     PabloBuilder mPB(kernel->getEntryScope());
    597     Var * const linebreak = mKernel->getInputStreamVar("linebreak");
     594    PabloBuilder mPB(mEntryScope);
     595    Var * const linebreak = kernel->getInputStreamVar("linebreak");
    598596    mLineBreak = mPB.createExtract(linebreak, 0);
    599     Var * const required = mKernel->getInputStreamVar("required");
     597    Var * const required = kernel->getInputStreamVar("required");
    600598    mNonFinal = mPB.createExtract(required, 0);
    601599    mFinal = mPB.createNot(mNonFinal);
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r5867 r5872  
    130130private:
    131131
    132     pablo::PabloKernel * const                      mKernel;
     132    pablo::PabloBlock * const                       mEntryScope;
    133133    std::vector<cc::Alphabet *>                     mAlphabets;
    134134    std::vector<std::unique_ptr<cc::CC_Compiler>>   mAlphabetCompilers;
  • icGREP/icgrep-devel/icgrep/re/re_name_resolve.cpp

    r5859 r5872  
    1212#include <re/re_analysis.h>
    1313#include <re/re_group.h>
     14#include <re/re_start.h>
     15#include <re/re_end.h>
     16#include <re/re_any.h>
    1417#include <re/re_memoizer.hpp>
    1518#include <UCD/resolve_properties.h>
     
    115118    }
    116119   
     120   
     121   
     122bool hasAnchor(const RE * re) {
     123    if (const Alt * alt = dyn_cast<Alt>(re)) {
     124        for (const RE * re : *alt) {
     125            if (hasAnchor(re)) {
     126                return true;
     127            }
     128        }
     129        return false;
     130    } else if (const Seq * seq = dyn_cast<Seq>(re)) {
     131        for (const RE * re : *seq) {
     132            if (hasAnchor(re)) {
     133                return true;
     134            }
     135        }
     136        return false;
     137    } else if (const Rep * rep = dyn_cast<Rep>(re)) {
     138        return hasAnchor(rep->getRE());
     139    } else if (isa<Start>(re)) {
     140        return true;
     141    } else if (isa<End>(re)) {
     142        return true;
     143    } else if (const Assertion * a = dyn_cast<Assertion>(re)) {
     144        return hasAnchor(a->getAsserted());
     145    } else if (const Diff * diff = dyn_cast<Diff>(re)) {
     146        return hasAnchor(diff->getLH()) || hasAnchor(diff->getRH());
     147    } else if (const Intersect * e = dyn_cast<Intersect>(re)) {
     148        return hasAnchor(e->getLH()) || hasAnchor(e->getRH());
     149    } else if (isa<Any>(re)) {
     150        return false;
     151    } else if (isa<CC>(re)) {
     152        return false;
     153    } else if (const Group * g = dyn_cast<Group>(re)) {
     154        return hasAnchor(g->getRE());
     155    } else if (const Name * n = dyn_cast<Name>(re)) {
     156        return hasAnchor(n->getDefinition());
     157    }
     158    return false; // otherwise
    117159}
     160
     161RE * resolveAnchors(RE * r, RE * breakRE) {
     162    if (!hasAnchor(r)) return r;
     163    if (const Alt * alt = dyn_cast<Alt>(r)) {
     164        std::vector<RE *> list;
     165        list.reserve(alt->size());
     166        for (RE * item : *alt) {
     167            item = resolveAnchors(item, breakRE);
     168            list.push_back(item);
     169        }
     170        return makeAlt(list.begin(), list.end());
     171    } else if (const Seq * seq = dyn_cast<Seq>(r)) {
     172        std::vector<RE *> list;
     173        list.reserve(seq->size());
     174        for (RE * item : *seq) {
     175            item = resolveAnchors(item, breakRE);
     176            list.push_back(item);
     177        }
     178        return makeSeq(list.begin(), list.end());
     179    } else if (Assertion * a = dyn_cast<Assertion>(r)) {
     180        return makeAssertion(resolveAnchors(a->getAsserted(), breakRE), a->getKind(), a->getSense());
     181    } else if (Rep * rep = dyn_cast<Rep>(r)) {
     182        return makeRep(resolveAnchors(rep->getRE(), breakRE), rep->getLB(), rep->getUB());
     183    } else if (Diff * diff = dyn_cast<Diff>(r)) {
     184        return makeDiff(resolveAnchors(diff->getLH(), breakRE), resolveAnchors(diff->getRH(), breakRE));
     185    } else if (Intersect * e = dyn_cast<Intersect>(r)) {
     186        return makeIntersect(resolveAnchors(e->getLH(), breakRE), resolveAnchors(e->getRH(), breakRE));
     187    } else if (isa<Start>(r)) {
     188        return makeAlt({r, makeLookBehindAssertion(breakRE)});
     189    } else if (isa<End>(r)) {
     190        return makeAlt({r, makeLookAheadAssertion(breakRE)});
     191    }
     192}
     193}
  • icGREP/icgrep-devel/icgrep/re/re_name_resolve.h

    r5836 r5872  
    99    RE * resolveUnicodeProperties(RE * re);
    1010    RE * resolveNames(RE * re);
     11    RE * resolveAnchors(RE * r, RE * breakRE);
    1112
    1213}
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r5856 r5872  
    8484    Var * error_mask = main.createVar("error_mask", zeroes);
    8585
    86     cc::Parabix_CC_Compiler ccc(this, u8_bits);
     86    cc::Parabix_CC_Compiler ccc(getEntryScope(), u8_bits);
    8787
    8888    // The logic for processing non-ASCII bytes will be embedded within an if-hierarchy.
  • icGREP/icgrep-devel/icgrep/wc.cpp

    r5861 r5872  
    103103    std::unique_ptr<cc::CC_Compiler> ccc;
    104104    if (CountWords || CountChars) {
    105         ccc = make_unique<cc::Parabix_CC_Compiler>(this, getInputStreamSet("u8bit"));
    106     } else {
    107         ccc = make_unique<cc::Direct_CC_Compiler>(this, pb.createExtract(getInput(0), pb.getInteger(0)));
     105        ccc = make_unique<cc::Parabix_CC_Compiler>(getEntryScope(), getInputStreamSet("u8bit"));
     106    } else {
     107        ccc = make_unique<cc::Direct_CC_Compiler>(getEntryScope(), pb.createExtract(getInput(0), pb.getInteger(0)));
    108108    }
    109109
Note: See TracChangeset for help on using the changeset viewer.