Changeset 6203 for icGREP


Ignore:
Timestamp:
Nov 18, 2018, 8:39:47 PM (3 months ago)
Author:
cameron
Message:

Grep pipeline restructuring step

Location:
icGREP/icgrep-devel/icgrep
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp

    r6184 r6203  
    7777
    7878using Alphabets = ICGrepKernel::Alphabets;
    79 
     79   
    8080extern "C" void signal_dispatcher(intptr_t callback_object_addr, unsigned signal) {
    8181    reinterpret_cast<GrepCallBackObject *>(callback_object_addr)->handle_signal(signal);
     
    159159    grepMatchFound(false),
    160160    mGrepRecordBreak(GrepRecordBreakKind::LF),
     161    mRequiredComponents(static_cast<Component>(0)),
    161162    mMoveMatchesToEOL(true),
    162163    mEngineThread(pthread_self()) {}
     
    164165QuietModeEngine::QuietModeEngine(BaseDriver &driver) : GrepEngine(driver) {
    165166    mEngineKind = EngineKind::QuietMode;
    166     mMoveMatchesToEOL = false;
    167167    mMaxCount = 1;
    168168}
     
    172172    mEngineKind = EngineKind::MatchOnly;
    173173    mFileSuffix = useNullSeparators ? std::string("\0", 1) : "\n";
    174     mMoveMatchesToEOL = false;
    175174    mMaxCount = 1;
    176175    mShowFileNames = true;
     
    189188
    190189   
     190bool GrepEngine::hasComponent(Component compon_set, Component c) {
     191    return (static_cast<component_t>(compon_set) & static_cast<component_t>(c)) != 0;
     192}
     193
     194void GrepEngine::GrepEngine::setComponent(Component & compon_set, Component c) {
     195    compon_set = static_cast<Component>(static_cast<component_t>(compon_set) | static_cast<component_t>(c));
     196}
     197
    191198void GrepEngine::setRecordBreak(GrepRecordBreakKind b) {
    192199    mGrepRecordBreak = b;
     
    225232        mREs[i] = regular_expression_passes(mREs[i]);
    226233    }
    227     if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
    228 
     234    if ((mEngineKind == EngineKind::EmitMatches) || (mEngineKind == EngineKind::CountOnly)) {
     235        if (!allAnchored || (mGrepRecordBreak == GrepRecordBreakKind::Unicode)) {
     236            setComponent(mRequiredComponents, Component::MoveMatchesToEOL);
     237        }
     238    }
    229239}
    230240
     
    258268        anyGCB |= hasGCB[i];
    259269    }
     270    if (anyGCB) {
     271        setComponent(mRequiredComponents, Component::GraphemeClusterBoundary);
     272    }
    260273
    261274
     
    273286
    274287    bool requiresComplexTest = true;
     288   
     289
    275290
    276291    if (isSimple) {
    277292        const auto isWithinByteTestLimit = byteTestsWithinLimit(mREs[0], ByteCClimit);
    278293        const auto hasTriCC = hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE);
     294        ICGrepKernel::Externals externals;
    279295        if (isWithinByteTestLimit || hasTriCC) {
    280             std::vector<Binding> inputSets;
    281             inputSets.emplace_back("byteData", ByteStream);
    282296            if (MultithreadedSimpleRE && hasTriCC) {
    283297                auto CCs = re::collectCCs(prefixRE, cc::Byte);
    284                 inputSets.reserve(CCs.size());
    285298                for (auto cc : CCs) {
    286299                    auto ccName = makeName(cc);
     
    289302                    StreamSet * const ccStream = P->CreateStreamSet(1, 1);
    290303                    P->CreateKernelCall<DirectCharacterClassKernelBuilder>(ccNameStr, std::vector<re::CC *>{cc}, ByteStream, ccStream);
    291                     inputSets.emplace_back(ccNameStr, ccStream);
     304                    externals.emplace_back(ccNameStr, ccStream);
    292305                }
    293306            }
     
    295308            MatchResultsBufs[0] = MatchResults;
    296309            if (isWithinByteTestLimit) {
    297                 P->CreateKernelCall<ByteGrepKernel>(mREs[0], inputSets, MatchResults);
     310                P->CreateKernelCall<ICGrepKernel>(mREs[0], ByteStream, MatchResults, externals);
    298311            } else {
    299                 P->CreateKernelCall<ByteBitGrepKernel>(prefixRE, suffixRE, inputSets, MatchResults);
     312                P->CreateKernelCall<ByteBitGrepKernel>(prefixRE, suffixRE, ByteStream, MatchResults, externals);
    300313            }
    301314            P->CreateKernelCall<DirectCharacterClassKernelBuilder>( "breakCC", std::vector<re::CC *>{mBreakCC}, ByteStream, LineBreakStream);
     
    340353
    341354        StreamSet * GCB_stream = nullptr;
    342         if (anyGCB) {
     355        if (hasComponent(mRequiredComponents, Component::GraphemeClusterBoundary)) {
    343356            GCB_stream = P->CreateStreamSet();
    344357            P->CreateKernelCall<GraphemeClusterBreakKernel>(BasisBits, RequiredStreams, GCB_stream);
     
    402415        Matches = MergedMatches;
    403416    }
    404     if (mMoveMatchesToEOL) {
     417    if (hasComponent(mRequiredComponents, Component::MoveMatchesToEOL)) {
    405418        StreamSet * const MovedMatches = P->CreateStreamSet();
    406419        P->CreateKernelCall<MatchedLinesKernel>(Matches, LineBreakStream, MovedMatches);
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.h

    r6184 r6203  
    1 
     1
    22/*
    33 *  Copyright (c) 2018 International Characters.
     
    8989
    9090protected:
     91    // Functional components that may be required for grep searches,
     92    // depending on search pattern, mode flags, external parameters and
     93    // implementation strategy.
     94    typedef uint32_t component_t;
     95    enum class Component : component_t {
     96        MoveMatchesToEOL = 0x01,
     97        GraphemeClusterBoundary = 0x02
     98    };
     99    bool hasComponent(Component compon_set, Component c);
     100    void setComponent(Component & compon_set, Component c);
     101
    91102    std::pair<kernel::StreamSet *, kernel::StreamSet *> grepPipeline(const std::unique_ptr<kernel::PipelineBuilder> & P,
    92103                                                                     kernel::StreamSet * ByteStream);
     
    126137    re::CC * mBreakCC;
    127138    std::string mFileSuffix;
     139    Component mRequiredComponents;
    128140    bool mMoveMatchesToEOL;
    129141    pthread_t mEngineThread;
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r6198 r6203  
    219219
    220220}
    221 
    222221// Helper to compute stream set inputs to pass into PabloKernel constructor.
    223222Bindings ICGrepKernel::makeInputBindings(StreamSet * const basis, const Externals & externals, const Alphabets & alphabets) {
     
    259258void ICGrepKernel::generatePabloMethod() {
    260259    PabloBuilder pb(getEntryScope());
    261     cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"), mBasisSetNumbering);
    262     RE_Compiler re_compiler(getEntryScope(), ccc, mBasisSetNumbering);
     260    std::unique_ptr<cc::CC_Compiler> ccc;
     261    bool useDirectCC = getInput(0)->getType()->getArrayNumElements() == 1;
     262    if (useDirectCC) {
     263        ccc = make_unique<cc::Direct_CC_Compiler>(getEntryScope(), pb.createExtract(getInput(0), pb.getInteger(0)));
     264    } else {
     265        ccc = make_unique<cc::Parabix_CC_Compiler>(getEntryScope(), getInputStreamSet("basis"), mBasisSetNumbering);
     266    }
     267    //cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"), mBasisSetNumbering);
     268    RE_Compiler re_compiler(getEntryScope(), *ccc.get(), mBasisSetNumbering);
    263269    for (const auto & e : mExternals) {
    264270        re_compiler.addPrecompiled(e.first, pb.createExtract(getInputStreamVar(e.first), pb.getInteger(0)));
     
    274280}
    275281
    276 
    277 ByteGrepSignature::ByteGrepSignature(RE * re)
    278 : mRE(re)
    279 , mSignature(Printer_RE::PrintRE(re) ) {
    280 }
    281 
    282 ByteGrepKernel::ByteGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, RE * const re, std::vector<Binding> inputSets, StreamSet * matches)
    283 : ByteGrepSignature(re)
    284 , PabloKernel(b, "byteGrep" + getStringHash(mSignature),
    285 // inputs
    286 std::move(inputSets),
    287 // output
    288 {Binding{"matches", matches, FixedRate(), Add1()}}) {
    289 
    290 }
    291 
    292 std::string ByteGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
    293     return mSignature;
    294 }
    295 
    296 void ByteGrepKernel::generatePabloMethod() {
    297     PabloBuilder pb(getEntryScope());
    298     PabloAST * u8bytes = pb.createExtract(getInput(0), pb.getInteger(0));
    299     cc::Direct_CC_Compiler dcc(getEntryScope(), u8bytes);
    300     RE_Compiler re_byte_compiler(getEntryScope(), dcc);
    301     const auto numOfInputs = getNumOfInputs();
    302     for (unsigned i = 1; i < numOfInputs; ++i) {
    303         const Binding & input = getInputStreamSetBinding(i);
    304         re_byte_compiler.addPrecompiled(input.getName(), pb.createExtract(getInputStreamVar(input.getName()), pb.getInteger(0)));
    305     }
    306     PabloAST * const matches = re_byte_compiler.compile(mRE);   
    307     Var * const output = getOutputStreamVar("matches");
    308     pb.createAssign(pb.createExtract(output, pb.getInteger(0)), matches);
    309 }
    310 
    311282// Helper to compute stream set inputs to pass into PabloKernel constructor.
    312 inline std::vector<Binding> byteBitGrepInputs(const std::unique_ptr<kernel::KernelBuilder> & b,
    313                                               const std::vector<std::string> & externals) {
    314     std::vector<Binding> streamSetInputs = {
    315         Binding{b->getStreamSetTy(1, 8), "byteData"},
    316     };
    317     for (auto & e : externals) {
    318         streamSetInputs.push_back(Binding{b->getStreamSetTy(1, 1), e});
    319     }
    320     return streamSetInputs;
    321 }
     283Bindings ByteBitGrepKernel::makeInputBindings(StreamSet * const basis, const Externals & externals) {
     284    Bindings inputs;
     285    inputs.emplace_back("basis", basis);
     286    for (const auto & e : externals) {
     287        inputs.emplace_back(e.first, e.second);
     288    }
     289    return inputs;
     290}
     291
    322292
    323293ByteBitGrepSignature::ByteBitGrepSignature(RE * prefix, RE * suffix)
     
    327297}
    328298
    329 ByteBitGrepKernel::ByteBitGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, RE * const prefixRE, RE * const suffixRE, std::vector<Binding> inputSets, StreamSet * matches)
     299ByteBitGrepKernel::ByteBitGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, RE * const prefixRE, RE * const suffixRE, StreamSet * const Source, StreamSet * const matches, const Externals externals)
    330300: ByteBitGrepSignature(prefixRE, suffixRE)
    331301, PabloKernel(b, "bBc" + getStringHash(mSignature),
    332302// inputs
    333 std::move(inputSets),
     303makeInputBindings(Source, externals),
    334304// output
    335305{Binding{"matches", matches, FixedRate(), Add1()}}) {
    336 
     306   
    337307}
    338308
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.h

    r6184 r6203  
    8484    const bool mIsCachable;
    8585};
    86 
    87 struct ByteGrepSignature {
    88     ByteGrepSignature(re::RE * re);
    89 protected:
    90     re::RE * const  mRE;
    91     std::string     mSignature;
    92 };
    93 
    94 
    95 class ByteGrepKernel : public ByteGrepSignature, public pablo::PabloKernel {
    96 public:
    97     ByteGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, re::RE * const re, std::vector<Binding> inputSets, StreamSet * matches);
    98     std::string makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) override;
    99     bool isCachable() const override { return true; }
    100     bool hasFamilyName() const override { return true; }
    101 protected:
    102     void generatePabloMethod() override;
    103 };
    10486   
    10587struct ByteBitGrepSignature {
     
    11395   
    11496class ByteBitGrepKernel : public ByteBitGrepSignature, public pablo::PabloKernel {
     97    using Externals = std::vector<std::pair<std::string, StreamSet *>>;
    11598public:
    116     ByteBitGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, re::RE * const prefix, re::RE * const suffix, std::vector<Binding> inputSets, StreamSet * matches);
     99    ByteBitGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, re::RE * const prefix, re::RE * const suffix, StreamSet * const Source, StreamSet * const MatchResults,
     100                      const Externals externals = {});
    117101    std::string makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) override;
    118102    bool isCachable() const override { return true; }
    119103    bool hasFamilyName() const override { return true; }
     104private:
     105    static Bindings makeInputBindings(StreamSet * const source, const Externals & externals);
    120106protected:
    121107    void generatePabloMethod() override;
Note: See TracChangeset for help on using the changeset viewer.