Changeset 6218 for icGREP


Ignore:
Timestamp:
Dec 6, 2018, 7:26:06 AM (2 months ago)
Author:
cameron
Message:

Grep Kernel Options

Location:
icGREP/icgrep-devel/icgrep
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp

    r6217 r6218  
    8181namespace grep {
    8282
    83 using Alphabets = ICGrepKernel::Alphabets;
    84 
    8583void GrepCallBackObject::handle_signal(unsigned s) {
    8684    if (static_cast<GrepSignal>(s) == GrepSignal::BinaryFile) {
     
    288286    Component internalComponents = Component::NoComponents;
    289287
     288   
    290289
    291290    if (isSimple) {
     
    295294            setComponent(internalComponents, Component::MoveMatchesToEOL);
    296295        }
     296        std::unique_ptr<GrepKernelOptions> options = make_unique<GrepKernelOptions>();
    297297        const auto isWithinByteTestLimit = byteTestsWithinLimit(mREs[0], ByteCClimit);
    298298        const auto hasTriCC = hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE);
    299         ICGrepKernel::Externals externals;
    300299        if (isWithinByteTestLimit || hasTriCC) {
    301300            if (MultithreadedSimpleRE && hasTriCC) {
     
    307306                    StreamSet * const ccStream = P->CreateStreamSet(1, 1);
    308307                    P->CreateKernelCall<DirectCharacterClassKernelBuilder>(ccNameStr, std::vector<re::CC *>{cc}, ByteStream, ccStream);
    309                     externals.emplace_back(ccNameStr, ccStream);
     308                    options->addExternal(ccNameStr, ccStream);
    310309                }
    311310            }
     
    313312            MatchResultsBufs[0] = MatchResults;
    314313            if (isWithinByteTestLimit) {
    315                 P->CreateKernelCall<ICGrepKernel>(mREs[0], ByteStream, MatchResults, externals);
     314                options->setRE(mREs[0]);
     315                options->setSource(ByteStream);
     316                options->setResults(MatchResults);
     317                P->CreateKernelCall<ICGrepKernel>(std::move(options));
    316318            } else {
    317                 P->CreateKernelCall<ByteBitGrepKernel>(prefixRE, suffixRE, ByteStream, MatchResults, externals);
     319                //P->CreateKernelCall<ByteBitGrepKernel>(prefixRE, suffixRE, ByteStream, MatchResults, externals);
     320                options->setPrefixRE(prefixRE);
     321                options->setRE(suffixRE);
     322                options->setSource(ByteStream);
     323                options->setResults(MatchResults);
     324                P->CreateKernelCall<ICGrepKernel>(std::move(options));
    318325            }
    319326            Kernel * LB_nullK = P->CreateKernelCall<DirectCharacterClassKernelBuilder>( "breakCC", std::vector<re::CC *>{mBreakCC}, ByteStream, LineBreakStream, callbackObject);
     
    329336            P->CreateKernelCall<S2P_PabloKernel>(ByteStream, BasisBits);
    330337        } else {
     338            //P->CreateKernelCall<S2PKernel>(ByteStream, BasisBits);
    331339            Kernel * s2pK = P->CreateKernelCall<S2PKernel>(ByteStream, BasisBits, cc::BitNumbering::LittleEndian, callbackObject);
    332340            mGrepDriver.LinkFunction(s2pK, "signal_dispatcher", kernel::signal_dispatcher);
     
    366374
    367375        for(unsigned i = 0; i < numOfREs; ++i) {
    368             ICGrepKernel::Externals externals;
     376            std::unique_ptr<GrepKernelOptions> options = make_unique<GrepKernelOptions>();
    369377            if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
    370                 externals.emplace_back("UTF8_LB", LineBreakStream);
    371                 externals.emplace_back("UTF8_nonfinal", RequiredStreams);
     378                options->addExternal("UTF8_LB", LineBreakStream);
     379                options->addExternal("UTF8_nonfinal", RequiredStreams);
    372380            }
    373381            std::set<re::Name *> UnicodeProperties;
     
    380388                        report_fatal_error(name + " not found");
    381389                    }
    382                     externals.emplace_back(name, f->second);
     390                    options->addExternal(name, f->second);
    383391                }
    384392            }
    385393            if (hasGCB[i]) { assert (GCB_stream);
    386                 externals.emplace_back("\\b{g}", GCB_stream);
     394                options->addExternal("\\b{g}", GCB_stream);
    387395            }
    388396
     
    393401                const auto UnicodeSets = re::collectCCs(mREs[i], cc::Unicode, std::set<re::Name *>{re::makeZeroWidth("\\b{g}")});
    394402                if (UnicodeSets.size() <= 1) {
    395                     P->CreateKernelCall<ICGrepKernel>(mREs[i], BasisBits, MatchResults, externals);
     403                    options->setRE(mREs[i]);
     404                    options->setSource(BasisBits);
     405                    options->setResults(MatchResults);
    396406                } else {
    397407                    auto mpx = std::make_shared<MultiplexedAlphabet>("mpx", UnicodeSets);
     
    404414                    // Multiplexing Grep Kernel is not Cachable, since for now it use string representation of RE AST as cache key,
    405415                    // whileit is possible that two multiplexed REs with the same name "mpx_1" have different alphabets
    406 
    407                     Alphabets alphabets;
    408                     alphabets.emplace_back(mpx, CharClasses);
    409                     P->CreateKernelCall<ICGrepKernel>(mREs[i], BasisBits, MatchResults, externals, alphabets, cc::BitNumbering::LittleEndian, false);
     416                    options->setRE(mREs[i]);
     417                    options->setSource(BasisBits);
     418                    options->setResults(MatchResults);
     419                    options->addAlphabet(mpx, CharClasses);
     420                    P->CreateKernelCall<ICGrepKernel>(std::move(options));
    410421                }
    411422            } else {
    412                 P->CreateKernelCall<ICGrepKernel>(mREs[i], BasisBits, MatchResults, externals);
     423                options->setRE(mREs[i]);
     424                options->setSource(BasisBits);
     425                options->setResults(MatchResults);
     426                P->CreateKernelCall<ICGrepKernel>(std::move(options));
    413427            }
    414428        }
     
    568582    close(fileDescriptor);
    569583    if (handler.binaryFileSignalled()) {
     584        llvm::errs() << "Binary file " << fileName << "\n";
    570585        return 0;
    571586    }
     
    612627    if (accum.binaryFileSignalled()) {
    613628        accum.mResultStr.clear();
    614         accum.mResultStr << "Binary file " << fileName << " skipped.\n";
     629        if (!mSuppressFileMessages) {
     630            accum.mResultStr << "Binary file " << fileName << " skipped.\n";
     631        }
    615632    }
    616633    if (accum.mLineCount > 0) grepMatchFound = true;
     
    793810    } else {
    794811        StreamSet * MatchResults = E->CreateStreamSet();
    795         E->CreateKernelCall<ICGrepKernel>(matchingRE, BasisBits, MatchResults);
     812        std::unique_ptr<GrepKernelOptions> options = make_unique<GrepKernelOptions>();
     813        options->setRE(matchingRE);
     814        options->setSource(BasisBits);
     815        options->setResults(MatchResults);
     816        E->CreateKernelCall<ICGrepKernel>(std::move(options));
    796817        MatchingRecords = E->CreateStreamSet();
    797818        E->CreateKernelCall<MatchedLinesKernel>(MatchResults, RecordBreakStream, MatchingRecords);
     
    799820    if (!excludeNothing) {
    800821        StreamSet * ExcludedResults = E->CreateStreamSet();
    801         E->CreateKernelCall<ICGrepKernel>(excludedRE, BasisBits, ExcludedResults);
     822        std::unique_ptr<GrepKernelOptions> options = make_unique<GrepKernelOptions>();
     823        options->setRE(excludedRE);
     824        options->setSource(BasisBits);
     825        options->setResults(ExcludedResults);
     826        E->CreateKernelCall<ICGrepKernel>(std::move(options));
    802827        StreamSet * ExcludedRecords = E->CreateStreamSet();
    803828        E->CreateKernelCall<MatchedLinesKernel>(ExcludedResults, RecordBreakStream, ExcludedRecords);
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r6205 r6218  
    55
    66#include "grep_kernel.h"
     7#include <cc/alphabet.h>
    78#include <re/printer_re.h>
    89#include <re/re_cc.h>
     
    214215}
    215216
    216 ICGrepSignature::ICGrepSignature(re::RE * const re_ast)
    217 : mRE(re_ast)
    218 , mSignature(Printer_RE::PrintRE(mRE)) {
    219 
    220 }
    221 // Helper to compute stream set inputs to pass into PabloKernel constructor.
    222 Bindings ICGrepKernel::makeInputBindings(StreamSet * const basis, const Externals & externals, const Alphabets & alphabets) {
     217void GrepKernelOptions::setNumbering(cc::BitNumbering numbering) {mBasisSetNumbering = numbering;}
     218void GrepKernelOptions::setIndexingAlphabet(cc::Alphabet * a) {mIndexingAlphabet = a;}
     219void GrepKernelOptions::setRE(RE * e) {mRE = e;}
     220void GrepKernelOptions::setPrefixRE(RE * e) {mPrefixRE = e;}
     221void GrepKernelOptions::setSource(StreamSet * s) {mSource = s;}
     222void GrepKernelOptions::setResults(StreamSet * r) {mResults = r;}
     223void GrepKernelOptions::addExternal(std::string name, StreamSet * strm) {
     224    mExternals.emplace_back(name, strm);
     225}
     226void GrepKernelOptions::addAlphabet(std::shared_ptr<cc::Alphabet> a, StreamSet * basis) {
     227    mAlphabets.emplace_back(a, basis);
     228}
     229
     230Bindings GrepKernelOptions::streamSetInputBindings() {
    223231    Bindings inputs;
    224     inputs.emplace_back("basis", basis);
    225     for (const auto & e : externals) {
     232    inputs.emplace_back("basis", mSource);
     233    for (const auto & e : mExternals) {
    226234        inputs.emplace_back(e.first, e.second);
    227235    }
    228     for (const auto & a : alphabets) {
     236    for (const auto & a : mAlphabets) {
    229237        inputs.emplace_back(a.first->getName() + "_basis", a.second);
    230238    }
     
    232240}
    233241
    234 ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b,
    235                            RE * const re,
    236                            StreamSet * const BasisBits,
    237                            StreamSet * const matches,
    238                            const Externals externals,
    239                            const Alphabets alphabets,
    240                            const cc::BitNumbering basisSetNumbering,
    241                            const bool cachable)
    242 : ICGrepSignature(re)
    243 , PabloKernel(b, "ic" + getStringHash(mSignature),
    244 // inputs
    245 makeInputBindings(BasisBits, externals, alphabets),
    246 // output
    247 {Binding{"matches", matches, FixedRate(), Add1()}})
    248 , mExternals(std::move(externals))
    249 , mAlphabets(std::move(alphabets))
    250 , mBasisSetNumbering(basisSetNumbering)
    251 , mIsCachable(cachable) {
     242Bindings GrepKernelOptions::streamSetOutputBindings() {
     243    return {Binding{"matches", mResults, FixedRate(), Add1()}};
     244}
     245
     246Bindings GrepKernelOptions::scalarInputBindings() {
     247    return {};
     248}
     249
     250Bindings GrepKernelOptions::scalarOutputBindings() {
     251    return {};
     252}
     253
     254std::string GrepKernelOptions::getSignature() {
     255    if (mSignature == "") {
     256        mSignature = std::to_string(mSource->getNumElements()) + "x" + std::to_string(mSource->getFieldWidth());
     257        mSignature += "/" + mIndexingAlphabet->getName();
     258        for (auto e: mExternals) {
     259            mSignature += "_" + e.first;
     260        }
     261        for (auto a: mAlphabets) {
     262            mSignature += "_" + a.first->getName();
     263        }
     264        if (mPrefixRE) {
     265            mSignature += ":" + Printer_RE::PrintRE(mPrefixRE);
     266        }
     267        mSignature += ":" + Printer_RE::PrintRE(mRE);
     268    }
     269    return mSignature;
     270}
     271
     272ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, std::unique_ptr<GrepKernelOptions> options)
     273: PabloKernel(b, "ic" + getStringHash(options->getSignature()),
     274    options->streamSetInputBindings(),
     275    options->streamSetOutputBindings(),
     276    options->scalarInputBindings(),
     277              options->scalarOutputBindings()), mOptions(std::move(options)) {
    252278}
    253279
    254280std::string ICGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
    255     return mSignature;
     281    return mOptions->getSignature();
    256282}
    257283
     
    263289        ccc = make_unique<cc::Direct_CC_Compiler>(getEntryScope(), pb.createExtract(getInput(0), pb.getInteger(0)));
    264290    } else {
    265         ccc = make_unique<cc::Parabix_CC_Compiler>(getEntryScope(), getInputStreamSet("basis"), mBasisSetNumbering);
    266     }
    267     //cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"), mBasisSetNumbering);
    268     RE_Compiler re_compiler(getEntryScope(), *ccc.get(), mBasisSetNumbering);
    269     for (const auto & e : mExternals) {
     291        ccc = make_unique<cc::Parabix_CC_Compiler>(getEntryScope(), getInputStreamSet("basis"), mOptions->mBasisSetNumbering);
     292    }
     293    //cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"), mOptions->mBasisSetNumbering);
     294    RE_Compiler re_compiler(getEntryScope(), *ccc.get(), mOptions->mBasisSetNumbering);
     295    for (const auto & e : mOptions->mExternals) {
    270296        re_compiler.addPrecompiled(e.first, pb.createExtract(getInputStreamVar(e.first), pb.getInteger(0)));
    271297    }
    272     for (const auto & a : mAlphabets) {
     298    for (const auto & a : mOptions->mAlphabets) {
    273299        auto & alpha = a.first;
    274300        auto mpx_basis = getInputStreamSet(alpha->getName() + "_basis");
    275301        re_compiler.addAlphabet(alpha, mpx_basis);
    276302    }
    277     PabloAST * const matches = re_compiler.compile(mRE);
    278     Var * const output = getOutputStreamVar("matches");
    279     pb.createAssign(pb.createExtract(output, pb.getInteger(0)), matches);
     303    if (mOptions->mPrefixRE) {
     304        PabloAST * const prefixMatches = re_compiler.compile(mOptions->mPrefixRE);
     305        Var * const final_matches = pb.createVar("final_matches", pb.createZeroes());
     306        PabloBlock * scope1 = getEntryScope()->createScope();
     307        pb.createIf(prefixMatches, scope1);
     308       
     309        PabloAST * u8bytes = pb.createExtract(getInput(0), pb.getInteger(0));
     310        PabloAST * nybbles[2];
     311        nybbles[0] = scope1->createPackL(scope1->getInteger(8), u8bytes);
     312        nybbles[1] = scope1->createPackH(scope1->getInteger(8), u8bytes);
     313       
     314        PabloAST * bitpairs[4];
     315        for (unsigned i = 0; i < 2; i++) {
     316            bitpairs[2*i] = scope1->createPackL(scope1->getInteger(4), nybbles[i]);
     317            bitpairs[2*i + 1] = scope1->createPackH(scope1->getInteger(4), nybbles[i]);
     318        }
     319       
     320        std::vector<PabloAST *> basis(8);
     321        for (unsigned i = 0; i < 4; i++) {
     322            basis[2*i] = scope1->createPackL(scope1->getInteger(2), bitpairs[i]);
     323            basis[2*i + 1] = scope1->createPackH(scope1->getInteger(2), bitpairs[i]);
     324        }
     325       
     326        cc::Parabix_CC_Compiler ccc(scope1, basis);
     327        RE_Compiler re_compiler(scope1, ccc);
     328        scope1->createAssign(final_matches, re_compiler.compile(mOptions->mRE, prefixMatches));
     329        Var * const output = getOutputStreamVar("matches");
     330        pb.createAssign(pb.createExtract(output, pb.getInteger(0)), final_matches);
     331    } else {
     332        PabloAST * const matches = re_compiler.compile(mOptions->mRE);
     333        Var * const output = getOutputStreamVar("matches");
     334        pb.createAssign(pb.createExtract(output, pb.getInteger(0)), matches);
     335    }
    280336}
    281337
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.h

    r6203 r6218  
    5252
    5353
    54 struct ICGrepSignature {
    55     ICGrepSignature(re::RE * re_ast);
     54class GrepKernelOptions {
     55    friend class ICGrepKernel;
     56public:
     57    using Externals = std::vector<std::pair<std::string, StreamSet *>>;
     58    using Alphabets = std::vector<std::pair<std::shared_ptr<cc::Alphabet>, StreamSet *>>;
     59    GrepKernelOptions() :
     60        mBasisSetNumbering(cc::BitNumbering::LittleEndian),
     61        mIndexingAlphabet(&cc::Byte),
     62        mPrefixRE(nullptr) {}
     63    void setNumbering(cc::BitNumbering numbering);
     64    void setIndexingAlphabet(cc::Alphabet * a);
     65    void setSource(StreamSet * s);
     66    void setResults(StreamSet * r);
     67    void addExternal(std::string name, StreamSet * strm);
     68    void addAlphabet(std::shared_ptr<cc::Alphabet> a, StreamSet * basis);
     69    void setRE(re::RE * re);
     70    void setPrefixRE(re::RE * re);
     71
    5672protected:
    57     re::RE * const  mRE;
     73    Bindings streamSetInputBindings();
     74    Bindings streamSetOutputBindings();
     75    Bindings scalarInputBindings();
     76    Bindings scalarOutputBindings();
     77    std::string getSignature();
     78
     79private:
     80    cc::BitNumbering mBasisSetNumbering;
     81    const cc::Alphabet * mIndexingAlphabet;
     82    StreamSet * mSource;
     83    StreamSet * mResults;
     84    Externals mExternals;
     85    Alphabets mAlphabets;
     86    re::RE * mRE;
     87    re::RE * mPrefixRE;
    5888    std::string     mSignature;
    5989};
    6090
    61    
    62 class ICGrepKernel : public ICGrepSignature, public pablo::PabloKernel {
     91
     92class ICGrepKernel : public pablo::PabloKernel {
    6393public:
    64 
    65     using Externals = std::vector<std::pair<std::string, StreamSet *>>;
    66     using Alphabets = std::vector<std::pair<std::shared_ptr<cc::Alphabet>, StreamSet *>>;
    67 
    6894    ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder,
    69                  re::RE * const re_ast, StreamSet * const BasisBits, StreamSet * const MatchResults,
    70                  const Externals externals = {}, const Alphabets alphabets = {},
    71                  const cc::BitNumbering basisSetNumbering = cc::BitNumbering::LittleEndian,
    72                  const bool cachable = true);
     95                 std::unique_ptr<GrepKernelOptions> options);
    7396    std::string makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) override;
    74     bool isCachable() const override { return mIsCachable; }
     97    bool isCachable() const override { return true; }
    7598    bool hasFamilyName() const override { return true; }
    7699protected:
    77100    void generatePabloMethod() override;
    78 private:
    79     static Bindings makeInputBindings(StreamSet * const basis, const Externals & externals, const Alphabets & alphabets);
    80 protected:
    81     const Externals mExternals;
    82     const Alphabets mAlphabets;
    83     const cc::BitNumbering mBasisSetNumbering;
    84     const bool mIsCachable;
     101    std::unique_ptr<GrepKernelOptions> mOptions;
    85102};
    86103   
Note: See TracChangeset for help on using the changeset viewer.