Changeset 5252


Ignore:
Timestamp:
Jan 10, 2017, 2:00:04 PM (10 months ago)
Author:
cameron
Message:

Separate doSegment/final segment processing in pipeline loop; check optional NoTerminateAttribute?

Location:
icGREP/icgrep-devel/icgrep
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5248 r5252  
    4343set(Boost_USE_STATIC_RUNTIME OFF)
    4444find_package(Boost 1.46 REQUIRED COMPONENTS system filesystem iostreams)
     45message(STATUS "Found Boost_LIBRARY_DIR: ${Boost_LIBRARY_DIR}")
    4546
    4647include_directories("${Boost_INCLUDE_DIRS}")
    4748link_directories(${Boost_LIBRARY_DIR})
    4849
    49 SET(KERNEL_SRC kernels/kernel.cpp kernels/s2p_kernel.cpp kernels/streamset.cpp kernels/interface.cpp kernels/mmap_kernel.cpp)
     50SET(KERNEL_SRC kernels/kernel.cpp kernels/pipeline.cpp kernels/s2p_kernel.cpp kernels/streamset.cpp kernels/interface.cpp kernels/mmap_kernel.cpp)
    5051SET(IDISA_SRC IR_Gen/CBuilder.cpp IR_Gen/types/streamtype.cpp IR_Gen/idisa_builder.cpp IR_Gen/idisa_avx_builder.cpp IR_Gen/idisa_i64_builder.cpp IR_Gen/idisa_sse_builder.cpp IR_Gen/idisa_nvptx_builder.cpp IR_Gen/idisa_target.cpp)
    5152
     
    7576target_link_libraries (RegExpCompiler RegExpADT)
    7677
    77 add_executable(icgrep icgrep.cpp toolchain.cpp grep_engine.cpp kernels/pipeline.cpp kernels/scanmatchgen.cpp kernels/cc_kernel.cpp)
    78 add_executable(u8u16 u8u16.cpp toolchain.cpp kernels/p2s_kernel.cpp kernels/pipeline.cpp kernels/deletion.cpp kernels/stdout_kernel.cpp)
    79 add_executable(base64 base64.cpp kernels/radix64.cpp toolchain.cpp kernels/p2s_kernel.cpp kernels/pipeline.cpp kernels/deletion.cpp kernels/stdout_kernel.cpp)
    80 add_executable(wc wc.cpp toolchain.cpp kernels/pipeline.cpp)
    81 add_executable(editd editd/editd.cpp editd/pattern_compiler.cpp toolchain.cpp kernels/pipeline.cpp editd/editdscan_kernel.cpp editd/editd_gpu_kernel.cpp editd/editd_cpu_kernel.cpp)
    82 add_executable(array-test array-test.cpp toolchain.cpp kernels/pipeline.cpp)
     78add_executable(icgrep icgrep.cpp toolchain.cpp grep_engine.cpp kernels/scanmatchgen.cpp kernels/cc_kernel.cpp)
     79add_executable(u8u16 u8u16.cpp toolchain.cpp kernels/p2s_kernel.cpp kernels/deletion.cpp kernels/stdout_kernel.cpp)
     80add_executable(base64 base64.cpp kernels/radix64.cpp toolchain.cpp kernels/p2s_kernel.cpp kernels/deletion.cpp kernels/stdout_kernel.cpp)
     81add_executable(wc wc.cpp toolchain.cpp)
     82add_executable(editd editd/editd.cpp editd/pattern_compiler.cpp toolchain.cpp editd/editdscan_kernel.cpp editd/editd_gpu_kernel.cpp editd/editd_cpu_kernel.cpp)
     83add_executable(array-test array-test.cpp toolchain.cpp)
    8384
    8485IF (PRINT_TIMING_INFORMATION)
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5250 r5252  
    2222                             std::vector<Binding> scalar_outputs,
    2323                             std::vector<Binding> internal_scalars)
    24 : KernelInterface(builder, kernelName, stream_inputs, stream_outputs, scalar_parameters, scalar_outputs, internal_scalars) {
     24: KernelInterface(builder, kernelName, stream_inputs, stream_outputs, scalar_parameters, scalar_outputs, internal_scalars),
     25mNoTerminateAttribute(false) {
    2526
    2627}
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5251 r5252  
    4444    virtual Value * getProcessedItemCount(Value * self, const std::string & ssName) const override;
    4545    virtual Value * getProducedItemCount(Value * self, const std::string & ssName) const override;
     46   
     47    bool hasNoTerminateAttribute() { return mNoTerminateAttribute;}
     48   
    4649    Value * getTerminationSignal(Value * self) const override;
    4750   
     
    98101    // all scalar fields have been added.   If there are no fields to
    99102    // be added, the default method for preparing kernel state may be used.
     103   
     104    void setNoTerminateAttribute(bool noTerminate = true) {mNoTerminateAttribute = noTerminate;}
    100105   
    101106    virtual void prepareKernel();
     
    173178    std::vector<StreamSetBuffer *>  mStreamSetInputBuffers;
    174179    std::vector<StreamSetBuffer *>  mStreamSetOutputBuffers;
     180    bool                            mNoTerminateAttribute;
    175181
    176182};
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5251 r5252  
    1818
    1919
    20 static void createStreamBufferMap(BufferMap bufferMap, std::vector<KernelBuilder *> kernels) {
     20static void createStreamBufferMap(BufferMap & bufferMap, std::vector<KernelBuilder *> kernels) {
    2121    for (auto k: kernels) {
    2222        auto outputSets = k->getStreamSetOutputBuffers();
     
    3535}
    3636
     37static Value * getSegmentBlocks(BufferMap & bufferMap, KernelBuilder * kernel) {
     38    IDISA::IDISA_Builder * iBuilder = kernel->getBuilder();
     39    std::cerr << "getSegmentBlocks\n";
     40
     41    KernelBuilder * sourceKernel;
     42
     43    unsigned outputIndex;
     44    auto inputs = kernel->getStreamSetInputBuffers();
     45    if (inputs.empty()) return iBuilder->getSize(codegen::SegmentSize * iBuilder->getStride() / iBuilder->getBitBlockWidth());
     46    std::string inputSetName = kernel->getStreamInputs()[0].name;
     47    std::cerr << "inputSetName = " << inputSetName << "\n";
     48    auto f = bufferMap.find(inputs[0]);
     49    assert(f != bufferMap.end()  && "bufferMap failure");
     50    std::tie(sourceKernel, outputIndex) = f->second;
     51    std::cerr << "outputIndex = " << outputIndex << "\n";
     52    Value * produced = sourceKernel->getProducedItemCount(sourceKernel->getInstance(), sourceKernel->getStreamOutputs()[outputIndex].name);
     53    iBuilder->CallPrintInt("produced", produced);
     54    Value * processed = kernel->getProcessedItemCount(kernel->getInstance(), inputSetName);
     55    iBuilder->CallPrintInt("processed", processed);
     56    Value * itemsToDo = iBuilder->CreateSub(produced, processed);
     57    return iBuilder->CreateUDiv(itemsToDo, iBuilder->getSize(iBuilder->getStride()));
     58}
     59                                   
     60
    3761
    3862Function * generateSegmentParallelPipelineThreadFunction(std::string name, IDISA::IDISA_Builder * iBuilder, std::vector<KernelBuilder *> kernels, Type * sharedStructType, int id) {
     
    215239void generatePipelineLoop(IDISA::IDISA_Builder * iBuilder, std::vector<KernelBuilder *> kernels) {
    216240   
     241   
     242    for (auto k : kernels) k->createInstance();
     243    //BufferMap bufferMap;
     244    //createStreamBufferMap(bufferMap, kernels);
     245   
    217246    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    218247    Function * main = entryBlock->getParent();
    219        
    220     const unsigned segmentSize = codegen::SegmentSize;
    221     Type * const size_ty = iBuilder->getSizeTy();
    222 
    223     // Create the basic blocks for the loop.
    224     BasicBlock * segmentBlock = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", main, 0);
     248
     249    // Create the basic blocks. 
     250    BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", main, 0);
    225251    BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exitBlock", main, 0);
    226     for (auto k : kernels) k->createInstance();
    227     iBuilder->CreateBr(segmentBlock);
    228     iBuilder->SetInsertPoint(segmentBlock);
    229     Constant * segBlocks = ConstantInt::get(size_ty, segmentSize * iBuilder->getStride() / iBuilder->getBitBlockWidth());
    230     for (unsigned i = 0; i < kernels.size(); i++) {
     252    // We create vectors of loop body and final segment blocks indexed by kernel.
     253    std::vector<BasicBlock *> loopBodyBlocks;
     254    std::vector<BasicBlock *> finalSegmentBlocks;
     255
     256    loopBodyBlocks.push_back(segmentLoop);
     257    finalSegmentBlocks.push_back(nullptr); 
     258   
     259    for (unsigned i = 1; i < kernels.size(); i++) {
     260        if (kernels[i-1]->hasNoTerminateAttribute()) {
     261            // Previous kernel cannot terminate.   Continue with the previous blocks;
     262            loopBodyBlocks.push_back(loopBodyBlocks.back());
     263            finalSegmentBlocks.push_back(finalSegmentBlocks.back());
     264        }
     265        else {
     266            loopBodyBlocks.push_back(BasicBlock::Create(iBuilder->getContext(), "do_" + kernels[i]->getName(), main, 0));
     267            finalSegmentBlocks.push_back(BasicBlock::Create(iBuilder->getContext(), "finish_" + kernels[i]->getName(), main, 0));
     268        }
     269    }
     270    loopBodyBlocks.push_back(segmentLoop); // If the last kernel does not terminate, loop back.
     271    finalSegmentBlocks.push_back(exitBlock); // If the last kernel does terminate, we're done.
     272   
     273    iBuilder->CreateBr(segmentLoop);
     274    Constant * segBlocks = iBuilder->getSize(codegen::SegmentSize * iBuilder->getStride() / iBuilder->getBitBlockWidth());
     275    for (unsigned i = 0; i < kernels.size(); i++) {
     276        iBuilder->SetInsertPoint(loopBodyBlocks[i]);
     277        //Value * segBlocks = getSegmentBlocks(bufferMap, kernels[i]);
     278        Value * segNo = kernels[i]->acquireLogicalSegmentNo(kernels[i]->getInstance());
    231279        kernels[i]->createDoSegmentCall(kernels[i]->getInstance(), segBlocks);
    232         Value * segNo = kernels[i]->acquireLogicalSegmentNo(kernels[i]->getInstance());
    233         kernels[i]->releaseLogicalSegmentNo(kernels[i]->getInstance(), iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
    234     }
    235     Value * endSignal = kernels.back()->getTerminationSignal(kernels.back()->getInstance());
    236     iBuilder->CreateCondBr(endSignal, exitBlock, segmentBlock);
     280        if (kernels[i]->hasNoTerminateAttribute()) {
     281            kernels[i]->releaseLogicalSegmentNo(kernels[i]->getInstance(), iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
     282            if (i == kernels.size() - 1) {
     283                iBuilder->CreateBr(segmentLoop);
     284            }
     285        }
     286        else {
     287            Value * terminated = kernels[i]->getTerminationSignal(kernels[i]->getInstance());
     288            kernels[i]->releaseLogicalSegmentNo(kernels[i]->getInstance(), iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
     289            iBuilder->CreateCondBr(terminated, finalSegmentBlocks[i+1], loopBodyBlocks[i+1]);
     290        }
     291        if (finalSegmentBlocks[i] != nullptr) {
     292            iBuilder->SetInsertPoint(finalSegmentBlocks[i]);
     293            Value * segNo = kernels[i]->acquireLogicalSegmentNo(kernels[i]->getInstance());
     294            kernels[i]->createDoSegmentCall(kernels[i]->getInstance(), segBlocks);
     295            kernels[i]->releaseLogicalSegmentNo(kernels[i]->getInstance(), iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
     296            if (finalSegmentBlocks[i] != finalSegmentBlocks[i+1]) {
     297                iBuilder->CreateBr(finalSegmentBlocks[i+1]);
     298            }
     299        }
     300    }
    237301    iBuilder->SetInsertPoint(exitBlock);
    238 
    239 }
     302}
     303
     304
     305
     306
     307
     308
     309
     310
     311
     312
     313
     314
     315
     316
     317
     318
     319
     320
     321
     322
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r5247 r5252  
    199199S2PKernel::S2PKernel(IDISA::IDISA_Builder * builder)
    200200: KernelBuilder(builder, "s2p", {Binding{builder->getStreamSetTy(1, 8), "byteStream"}}, {Binding{builder->getStreamSetTy(8, 1), "basisBits"}}, {}, {}, {}) {
    201 
    202 }
    203 
    204 }
     201    setNoTerminateAttribute(true);
     202
     203}
     204
     205}
Note: See TracChangeset for help on using the changeset viewer.