Changeset 5402 for icGREP/icgrep-devel


Ignore:
Timestamp:
Apr 10, 2017, 4:34:41 PM (2 years ago)
Author:
nmedfort
Message:

Moved toolchain and object_cache to kernels directory. Continued work on providing input consumed information.

Location:
icGREP/icgrep-devel/icgrep
Files:
23 edited
4 moved

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5377 r5402  
    5858
    5959SET(KERNEL_SRC kernels/kernel.cpp kernels/pipeline.cpp kernels/streamset.cpp kernels/interface.cpp)
     60SET(KERNEL_SRC ${KERNEL_SRC} kernels/toolchain.cpp kernels/object_cache.cpp)
    6061SET(KERNEL_SRC ${KERNEL_SRC} kernels/s2p_kernel.cpp kernels/mmap_kernel.cpp kernels/deletion.cpp kernels/swizzle.cpp kernels/p2s_kernel.cpp kernels/stdin_kernel.cpp kernels/stdout_kernel.cpp)
    6162
     
    7273ENDIF()
    7374
    74 add_library(CodeGen ${KERNEL_SRC} ${IDISA_SRC} object_cache.cpp)
     75add_library(CodeGen ${KERNEL_SRC} ${IDISA_SRC})
    7576add_library(PabloADT ${PABLO_SRC})
    7677add_library(RegExpADT re/re_re.cpp re/re_cc.cpp re/re_rep.cpp re/re_diff.cpp re/re_intersect.cpp re/printer_re.cpp)
     
    8687target_link_libraries (RegExpCompiler RegExpADT)
    8788
    88 add_executable(icgrep icgrep.cpp toolchain.cpp grep_engine.cpp kernels/scanmatchgen.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/linebreak_kernel.cpp kernels/streams_merge.cpp kernels/match_count.cpp)
    89 add_executable(u8u16 u8u16.cpp toolchain.cpp)
    90 add_executable(base64 base64.cpp kernels/radix64.cpp toolchain.cpp)
    91 add_executable(wc wc.cpp toolchain.cpp)
    92 add_executable(editd editd/editd.cpp editd/pattern_compiler.cpp toolchain.cpp editd/editdscan_kernel.cpp editd/editd_gpu_kernel.cpp editd/editd_cpu_kernel.cpp)
    93 add_executable(array-test array-test.cpp toolchain.cpp kernels/alignedprint.cpp)
     89add_executable(icgrep icgrep.cpp grep_engine.cpp kernels/scanmatchgen.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/linebreak_kernel.cpp kernels/streams_merge.cpp kernels/match_count.cpp)
     90add_executable(u8u16 u8u16.cpp)
     91add_executable(base64 base64.cpp kernels/radix64.cpp)
     92add_executable(wc wc.cpp)
     93add_executable(editd editd/editd.cpp editd/pattern_compiler.cpp editd/editdscan_kernel.cpp editd/editd_gpu_kernel.cpp editd/editd_cpu_kernel.cpp)
     94add_executable(array-test array-test.cpp kernels/alignedprint.cpp)
    9495
    9596## IWYU detects superfluous includes and when the include can be replaced with a forward declaration.
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5398 r5402  
    1111#include <llvm/IR/TypeBuilder.h>
    1212#include <llvm/IR/MDBuilder.h>
     13#include <llvm/Support/raw_ostream.h>
     14#include <kernels/toolchain.h>
    1315#include <fcntl.h>
    1416#include <unistd.h>
    1517#include <sys/mman.h>
    16 #include <toolchain.h>
    1718#include <errno.h>
    18 #include <llvm/Support/raw_ostream.h>
    19 
    2019
    2120using namespace llvm;
     
    402401        pthreadCreateFunc->setCallingConv(CallingConv::C);
    403402    }
    404     return CreateCall(pthreadCreateFunc, {thread, attr, start_routine, arg});
     403    return CreateCall(pthreadCreateFunc, {thread, attr, start_routine, CreatePointerCast(arg, getVoidPtrTy())});
    405404}
    406405
  • icGREP/icgrep-devel/icgrep/IR_Gen/CudaDriver.h

    r5352 r5402  
    66#include <unistd.h>
    77#include <cassert>
    8 #include <toolchain.h>
     8#include <kernels/toolchain.h>
    99#include "cuda.h"
    1010
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r5398 r5402  
    1313#include <llvm/Support/raw_ostream.h>
    1414#include <llvm/IR/TypeBuilder.h>
    15 #include <toolchain.h>
     15#include <kernels/toolchain.h>
    1616
    1717using namespace llvm;
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_target.cpp

    r5361 r5402  
    55
    66#include "idisa_target.h"
    7 #include <toolchain.h>
     7#include <kernels/toolchain.h>
    88#include <IR_Gen/idisa_avx_builder.h>
    99#include <IR_Gen/idisa_sse_builder.h>
  • icGREP/icgrep-devel/icgrep/array-test.cpp

    r5401 r5402  
    2121#include <pablo/pe_zeroes.h>
    2222#include <pablo/pe_ones.h>
    23 #include <toolchain.h>                             // for JIT_to_ExecutionEn...
     23#include <kernels/toolchain.h>
    2424#include <pablo/builder.hpp>                       // for PabloBuilder
    2525#include <boost/filesystem.hpp>
  • icGREP/icgrep-devel/icgrep/base64.cpp

    r5401 r5402  
    1111#include <llvm/IR/Verifier.h>
    1212#include <llvm/Support/CommandLine.h>
    13 #include <toolchain.h>
     13#include <kernels/toolchain.h>
    1414#include <IR_Gen/idisa_builder.h>
    1515#include <IR_Gen/idisa_target.h>
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r5401 r5402  
    88#include <iostream>
    99#include <fstream>
    10 #include <toolchain.h>
     10#include <kernels/toolchain.h>
    1111#include <pablo/pablo_toolchain.h>
    1212#include <llvm/IR/Function.h>
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5401 r5402  
    3030#include <re/re_cc.h>
    3131#include <re/re_toolchain.h>
    32 #include <toolchain.h>
     32#include <kernels/toolchain.h>
    3333#include <iostream>
    3434#include <sstream>
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r5377 r5402  
    5252kernels/mmap_kernel.cpp
    5353kernels/mmap_kernel.h
     54kernels/object_cache.cpp
     55kernels/object_cache.h
    5456kernels/p2s_kernel.cpp
    5557kernels/p2s_kernel.h
     
    7274kernels/swizzle.cpp
    7375kernels/swizzle.h
     76kernels/toolchain.cpp
     77kernels/toolchain.h
    7478pablo/analysis/pabloverifier.cpp
    7579pablo/analysis/pabloverifier.hpp
     
    227231hrtime.h
    228232icgrep.cpp
    229 object_cache.cpp
    230 object_cache.h
    231233preprocess.cpp
    232 toolchain.cpp
    233 toolchain.h
    234234u8u16.cpp
    235235utf16_encoder.cpp
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5401 r5402  
    2121#include <string>
    2222#include <boost/uuid/sha1.hpp>
    23 #include <toolchain.h>
     23#include <kernels/toolchain.h>
    2424#include <re/re_toolchain.h>
    2525#include <pablo/pablo_toolchain.h>
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5401 r5402  
    104104    params.insert(params.end(), mStreamSetInputs.size() + mStreamSetOutputs.size(), sizeTy);
    105105
    106     Type * retType = nullptr;
    107     if (mStreamSetInputs.empty()) {
    108         retType = iBuilder->getVoidTy();
    109     } else {
    110         retType = ArrayType::get(sizeTy, mStreamSetInputs.size());
    111     }
    112 
    113     FunctionType * const doSegmentType = FunctionType::get(retType, params, false);
     106    FunctionType * const doSegmentType = FunctionType::get(iBuilder->getVoidTy(), params, false);
    114107    Function * doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, client);
    115108    doSegment->setCallingConv(CallingConv::C);
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5401 r5402  
    159159    }
    160160   
    161 //    virtual void addAdditionalKernelDeclarations(llvm::Module * module, llvm::PointerType * selfType) {}
    162 
    163161protected:
    164162   
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5401 r5402  
    55
    66#include "kernel.h"
    7 #include <toolchain.h>
     7#include <kernels/toolchain.h>
    88#include <kernels/streamset.h>
    99#include <llvm/IR/Constants.h>
     
    208208    }
    209209    generateDoSegmentMethod(doFinal, producerPos); // must be overridden by the KernelBuilder subtype
    210     if (LLVM_UNLIKELY(mStreamSetInputs.empty())) {
    211         iBuilder->CreateRetVoid();
    212     } else {
    213         const unsigned n = mStreamSetInputs.size();
    214         Value * values[n];
    215         for (unsigned i = 0; i < n; ++i) {
    216             values[i] = getProcessedItemCount(mStreamSetInputs[i].name);
    217         }
    218         iBuilder->CreateAggregateRet(values, n);
    219     }
     210    iBuilder->CreateRetVoid();
    220211}
    221212
     
    305296}
    306297
     298llvm::Value * KernelBuilder::getAvailableItemCount(const std::string & name) const {
     299    auto arg = mCurrentMethod->arg_begin();
     300    ++arg; // self
     301    ++arg; // doFinal
     302    for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
     303        if (mStreamSetInputs[i].name == name) {
     304            return &*arg;
     305        }
     306        ++arg;
     307    }
     308    return nullptr;
     309}
     310
    307311Value * KernelBuilder::getProcessedItemCount(Value * instance, const std::string & name) const {
    308312    assert ("instance cannot be null!" && instance);
     
    321325}
    322326
     327llvm::Value * KernelBuilder::getConsumedItemCount(const std::string & name) const {
     328    auto arg = mCurrentMethod->arg_begin();
     329    ++arg; // self
     330    ++arg; // doFinal
     331    for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
     332        ++arg; // input
     333    }
     334    for (unsigned i = 0; i < mStreamSetOutputs.size(); ++i) {
     335        if (mStreamSetOutputs[i].name == name) {
     336            return &*arg;
     337        }
     338        ++arg;
     339    }
     340    return nullptr;
     341}
     342
    323343void KernelBuilder::setProducedItemCount(Value * instance, const std::string & name, Value * value) const {
    324344    assert ("instance cannot be null!" && instance);
     
    338358void KernelBuilder::setTerminationSignal(Value * instance) const {
    339359    assert ("instance cannot be null!" && instance);
    340     setScalarField(instance, TERMINATION_SIGNAL, iBuilder->getInt1(true));
     360    setScalarField(instance, TERMINATION_SIGNAL, iBuilder->getTrue());
    341361}
    342362
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5401 r5402  
    131131    const std::vector<const parabix::StreamSetBuffer *> & getStreamSetInputBuffers() const { return mStreamSetInputBuffers; }
    132132
     133    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const { return mStreamSetInputBuffers[i]; }
     134
    133135    const std::vector<const parabix::StreamSetBuffer *> & getStreamSetOutputBuffers() const { return mStreamSetOutputBuffers; }
     136
     137    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const { return mStreamSetOutputBuffers[i]; }
    134138
    135139    llvm::Value * createDoSegmentCall(const std::vector<llvm::Value *> & args) const;
     
    253257    }
    254258
     259    llvm::Value * getAvailableItemCount(const std::string & name) const;
     260
    255261    inline llvm::Value * getProcessedItemCount(const std::string & name) const {
    256262        return getProcessedItemCount(getSelf(), name);
     
    260266        setProcessedItemCount(getSelf(), name, value);
    261267    }
     268
     269    llvm::Value * getConsumedItemCount(const std::string & name) const;
    262270
    263271    llvm::Value * getTerminationSignal() const {
     
    333341                          std::vector<Binding> && internal_scalars);
    334342
    335     virtual ~SegmentOrientedKernel() { }
    336 
    337343};
    338344
     
    365371                        std::vector<Binding> && internal_scalars);
    366372
    367     virtual ~BlockOrientedKernel() { }
    368 
    369373private:
    370374
  • icGREP/icgrep-devel/icgrep/kernels/object_cache.cpp

    r5401 r5402  
    1 #include <string>
    2 
    31#include "object_cache.h"
    42#include <llvm/Support/raw_ostream.h>
     
    64#include <llvm/Support/Path.h>
    75#include <llvm/IR/Module.h>
    8 
    9 #ifdef OBJECT_CACHE_DEBUG
    10 #include <iostream>
    11 #endif
     6#include <string>
    127
    138using namespace llvm;
     
    5146    outfile.write(Obj.getBufferStart(), Obj.getBufferSize());
    5247    outfile.close();
    53 #ifdef OBJECT_CACHE_DEBUG
    54     std::cerr << "Cache file created: " << CacheName.c_str() << std::endl;
    55 #endif
    5648    auto s = kernelSignatureMap.find(ModuleID);  // Check for a kernel signature.
    5749    if (s != kernelSignatureMap.end()) {
     
    6153        sigfile << s->second;
    6254        sigfile.close();
    63 #ifdef OBJECT_CACHE_DEBUG
    64         std::cerr << "Signature file created: " << CacheName.c_str() << std::endl;
    65 #endif
    6655    }
    6756}
     
    7261    if (s!= kernelSignatureMap.end()) {
    7362        if (s->second != signature) {
     63
     64
    7465#ifdef OBJECT_CACHE_DEBUG
    7566            std::cerr << "loadCachedObjectFile:  conflicting signatures for the same moduleID! " << ModuleID << std::endl;
     
    7869        }
    7970        // A cached entry exists if it has already been loaded.
    80         return cachedObjectMap.find(ModuleID) != cachedObjectMap.end();
     71        return cachedObjectMap.count(ModuleID) != 0;
    8172    }
    8273    // Confirm that the module is cacheable.
    8374    Path CachedObjectName(CacheDir);
    84     if (!getCacheFilename(ModuleID, CachedObjectName)) return false;
     75    if (!getCacheFilename(ModuleID, CachedObjectName)) {
     76        return false;
     77    }
    8578    //
    8679    // Save the signature.
     
    9689        ErrorOr<std::unique_ptr<MemoryBuffer>> SignatureBuffer = MemoryBuffer::getFile(CachedObjectName.c_str(), -1, false);
    9790        if (!SignatureBuffer) {
    98 #ifdef OBJECT_CACHE_DEBUG
    99             std::cerr << "signature file expected but not Found. " << ModuleID << std::endl;
    100 #endif
     91            report_fatal_error("signature file expected but not found: " + ModuleID);
    10192            return false;
    10293        }
    103         StringRef loadedSig = SignatureBuffer.get()->getBuffer();
    104         StringRef computedSig = signature;
    105         if (!computedSig.equals(loadedSig)) {
    106 #ifdef OBJECT_CACHE_DEBUG
    107             std::cerr << "computed signature does not match stored signature: " << ModuleID << std::endl;
    108 #endif
    109             return false;
     94        StringRef loadedSig = SignatureBuffer.get()->getBuffer(); 
     95        if (!loadedSig.equals(signature)) {
     96            report_fatal_error("computed signature does not match stored signature: " + ModuleID);
    11097        }
    111         // Signature is confirmed.
    112 #ifdef OBJECT_CACHE_DEBUG
    113         std::cerr << "loadCachedObjectFile: computed signature matches stored signature. " << ModuleID << std::endl;
    114 #endif
    11598    }
    116 #ifdef OBJECT_CACHE_DEBUG
    117     std::cerr << "Found cached object." << CachedObjectName.c_str() << std::endl;
    118 #endif
    11999    // Make a copy so that the JIT engine can freely modify it.
    120100    cachedObjectMap.emplace(ModuleID, std::move(KernelObjectBuffer.get()));
     
    140120        return nullptr;
    141121    }
    142 #ifdef OBJECT_CACHE_DEBUG
    143     std::cerr << "Object retrieved by engine. "<< ModuleID << std::endl;
    144 #endif
    145122    // Return a copy of the buffer, for MCJIT to modify, if necessary.
    146123    return MemoryBuffer::getMemBufferCopy(f->second.get()->getBuffer());
     
    154131    CacheName = CacheDir;
    155132    sys::path::append(CacheName, ModID.substr(PrefixLength) + ".o");
    156 #ifdef OBJECT_CACHE_DEBUG
    157     std::cerr << "CacheName: " << CacheName.c_str() << std::endl;
    158 #endif
    159133    return true;
    160134}
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.h

    r5297 r5402  
    1515public:
    1616    P2SKernel(IDISA::IDISA_Builder * iBuilder);
     17    bool moduleIDisSignature() override {return true;}
    1718private:
    1819    void generateDoBlockMethod() override;
     
    2223public:
    2324    P2SKernelWithCompressedOutput(IDISA::IDISA_Builder * iBuilder);   
     25    bool moduleIDisSignature() override {return true;}
    2426private:
    2527    void generateDoBlockMethod() override;
     
    2830class P2S16Kernel : public BlockOrientedKernel {
    2931public:
    30     P2S16Kernel(IDISA::IDISA_Builder * iBuilder);   
     32    P2S16Kernel(IDISA::IDISA_Builder * iBuilder);
     33    bool moduleIDisSignature() override {return true;}
    3134private:
    3235    void generateDoBlockMethod() override;
     
    3639public:
    3740    P2S16KernelWithCompressedOutput(IDISA::IDISA_Builder * iBuilder);
     41    bool moduleIDisSignature() override {return true;}
    3842private:
    3943    void generateDoBlockMethod() override;
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5401 r5402  
    55
    66#include "pipeline.h"
    7 #include <toolchain.h>
     7#include <kernels/toolchain.h>
    88#include <kernels/kernel.h>
    99#include <kernels/streamset.h>
     
    8080}
    8181
    82 Function * generateSegmentParallelPipelineThreadFunction(std::string name, IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels, Type * sharedStructType, const ProducerTable & producerTable, int id) {
     82Function * makeThreadFunction(const std::string & name, Module * const m) {
     83    LLVMContext & C = m->getContext();
     84    Type * const voidTy = Type::getVoidTy(C);
     85    PointerType * const int8PtrTy = Type::getInt8PtrTy(C);
     86    Function * const f = Function::Create(FunctionType::get(voidTy, {int8PtrTy}, false), Function::InternalLinkage, name, m);
     87    f->setCallingConv(CallingConv::C);
     88    f->arg_begin()->setName("input");
     89    return f;
     90}
     91
     92Function * generateSegmentParallelPipelineThreadFunction(IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels, Type * sharedStructType, const ProducerTable & producerTable, const unsigned id) {
    8393   
    8494    // ProducerPos[k][i] will hold the producedItemCount of the i^th output stream
     
    92102    const auto ip = iBuilder->saveIP();
    93103   
    94     Module * m = iBuilder->getModule();
    95     Type * const voidTy = iBuilder->getVoidTy();
     104    Module * const m = iBuilder->getModule();
    96105    PointerType * const voidPtrTy = iBuilder->getVoidPtrTy();
    97     PointerType * const int8PtrTy = iBuilder->getInt8PtrTy();
    98 
    99     Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
    100     threadFunc->setCallingConv(CallingConv::C);
    101     Function::arg_iterator args = threadFunc->arg_begin();
    102 
    103     Value * const input = &*(args++);
    104     input->setName("input");
    105 
     106
     107    Function * const threadFunc = makeThreadFunction("thread" + std::to_string(id), m);
    106108
    107109     // Create the basic blocks for the thread function.
    108     BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc, 0);
    109     BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", threadFunc, 0);
    110     BasicBlock * exitThreadBlock = BasicBlock::Create(iBuilder->getContext(), "exitThread", threadFunc, 0);
     110    BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc);
     111    BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", threadFunc);
     112    BasicBlock * exitThreadBlock = BasicBlock::Create(iBuilder->getContext(), "exitThread", threadFunc);
    111113   
    112114    std::vector<BasicBlock *> segmentWait;
     
    114116    for (unsigned i = 0; i < kernels.size(); i++) {
    115117        auto kname = kernels[i]->getName();
    116         segmentWait.push_back(BasicBlock::Create(iBuilder->getContext(), kname + "Wait", threadFunc, 0));
    117         segmentLoopBody.push_back(BasicBlock::Create(iBuilder->getContext(), kname + "Do", threadFunc, 0));
     118        segmentWait.push_back(BasicBlock::Create(iBuilder->getContext(), kname + "Wait", threadFunc));
     119        segmentLoopBody.push_back(BasicBlock::Create(iBuilder->getContext(), kname + "Do", threadFunc));
    118120    }
    119121
    120122    iBuilder->SetInsertPoint(entryBlock);
    121123   
    122     Value * sharedStruct = iBuilder->CreateBitCast(input, PointerType::get(sharedStructType, 0));
     124    Value * input = &(*threadFunc->arg_begin());
     125    Value * sharedStruct = iBuilder->CreateBitCast(input, sharedStructType->getPointerTo());
    123126    std::vector<Value *> instancePtrs;
    124127    for (unsigned k = 0; k < kernels.size(); k++) {
     
    145148        Value * ready = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
    146149
    147         KernelBuilder * const K = kernels[k];
    148 
    149         if (K->hasNoTerminateAttribute()) {
     150        KernelBuilder * const kernel = kernels[k];
     151
     152        if (kernel->hasNoTerminateAttribute()) {
    150153            iBuilder->CreateCondBr(ready, segmentLoopBody[k], segmentWait[k]);
    151154        } else { // If the kernel was terminated in a previous segment then the pipeline is done.
    152             BasicBlock * completionTest = BasicBlock::Create(iBuilder->getContext(), K->getName() + "Completed", threadFunc, 0);
    153             BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), K->getName() + "Exit", threadFunc, 0);
     155            BasicBlock * completionTest = BasicBlock::Create(iBuilder->getContext(), kernel->getName() + "Completed", threadFunc, 0);
     156            BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), kernel->getName() + "Exit", threadFunc, 0);
    154157            iBuilder->CreateCondBr(ready, completionTest, segmentWait[k]);
    155158            iBuilder->SetInsertPoint(completionTest);
    156             Value * alreadyDone = K->getTerminationSignal(instancePtrs[k]);
     159            Value * alreadyDone = kernel->getTerminationSignal(instancePtrs[k]);
    157160            iBuilder->CreateCondBr(alreadyDone, exitBlock, segmentLoopBody[k]);
    158161            iBuilder->SetInsertPoint(exitBlock);
    159162            // Ensure that the next thread will also exit.
    160             K->releaseLogicalSegmentNo(instancePtrs[k], nextSegNo);
     163            kernel->releaseLogicalSegmentNo(instancePtrs[k], nextSegNo);
    161164            iBuilder->CreateBr(exitThreadBlock);
    162165        }
    163166        iBuilder->SetInsertPoint(segmentLoopBody[k]);
    164167        std::vector<Value *> args = {instancePtrs[k], doFinal};
    165         for (unsigned j = 0; j < K->getStreamInputs().size(); j++) {
     168        for (unsigned j = 0; j < kernel->getStreamInputs().size(); j++) {
    166169            unsigned producerKernel, outputIndex;
    167170            std::tie(producerKernel, outputIndex) = producerTable[k][j];
    168171            args.push_back(ProducerPos[producerKernel][outputIndex]);
    169172        }
    170         K->createDoSegmentCall(args);
    171          if (! (K->hasNoTerminateAttribute())) {
    172             Value * terminated = K->getTerminationSignal(instancePtrs[k]);
     173        for (unsigned i = 0; i < kernel->getStreamOutputs().size(); ++i) {
     174            args.push_back(iBuilder->getSize(0));
     175        }
     176        kernel->createDoSegmentCall(args);
     177         if (!(kernel->hasNoTerminateAttribute())) {
     178            Value * terminated = kernel->getTerminationSignal(instancePtrs[k]);
    173179            doFinal = iBuilder->CreateOr(doFinal, terminated);
    174180        }
    175181        std::vector<Value *> produced;
    176         for (unsigned i = 0; i < K->getStreamOutputs().size(); i++) {
    177             produced.push_back(K->getProducedItemCount(instancePtrs[k], K->getStreamOutputs()[i].name, doFinal));
     182        for (unsigned i = 0; i < kernel->getStreamOutputs().size(); i++) {
     183            produced.push_back(kernel->getProducedItemCount(instancePtrs[k], kernel->getStreamOutputs()[i].name, doFinal));
    178184        }
    179185        ProducerPos.push_back(produced);
    180186
    181         K->releaseLogicalSegmentNo(instancePtrs[k], nextSegNo);
     187        kernel->releaseLogicalSegmentNo(instancePtrs[k], nextSegNo);
    182188        if (k == last_kernel) {
    183189            segNo->addIncoming(iBuilder->CreateAdd(segNo, iBuilder->getSize(codegen::ThreadNum)), segmentLoopBody[last_kernel]);
     
    193199    iBuilder->CreateRetVoid();
    194200    iBuilder->restoreIP(ip);
    195 
    196201    return threadFunc;
    197202}
     
    212217    PointerType * const int8PtrTy = iBuilder->getInt8PtrTy();
    213218   
    214     for (auto k : kernels) k->createInstance();
    215    
     219    for (auto k : kernels) {
     220        k->createInstance();
     221    }
     222
    216223    const ProducerTable producerTable = createProducerTable(kernels);
    217224   
     
    243250    const auto ip = iBuilder->saveIP();
    244251    for (int i = 0; i < codegen::ThreadNum; i++) {
    245         thread_functions.push_back(generateSegmentParallelPipelineThreadFunction("thread"+std::to_string(i), iBuilder, kernels, sharedStructType, producerTable, i));
     252        thread_functions.push_back(generateSegmentParallelPipelineThreadFunction(iBuilder, kernels, sharedStructType, producerTable, i));
    246253    }
    247254    iBuilder->restoreIP(ip);
    248255   
    249256    for (int i = 0; i < codegen::ThreadNum; i++) {
    250         iBuilder->CreatePThreadCreateCall(pthreadsPtrs[i], nullVal, thread_functions[i], iBuilder->CreateBitCast(sharedStruct, int8PtrTy));
     257        iBuilder->CreatePThreadCreateCall(pthreadsPtrs[i], nullVal, thread_functions[i], sharedStruct);
    251258    }
    252259   
     
    262269}
    263270
    264 Function * generateParallelPipelineThreadFunction(std::string name, IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels, Type * sharedStructType, const ProducerTable & producerTable, const ConsumerTable & consumerTable, const unsigned id) {
     271Function * generateParallelPipelineThreadFunction(IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels, Type * sharedStructType, const ProducerTable & producerTable, const ConsumerTable & consumerTable, const unsigned id) {
    265272       
    266273    const auto ip = iBuilder->saveIP();
    267274   
    268275    Module * m = iBuilder->getModule();
    269     Type * const voidTy = iBuilder->getVoidTy();
    270     IntegerType * const size_ty = iBuilder->getSizeTy();
    271     PointerType * const voidPtrTy = iBuilder->getVoidPtrTy();
    272     PointerType * const int8PtrTy = iBuilder->getInt8PtrTy();
    273     IntegerType * const int1ty = iBuilder->getInt1Ty();
    274 
    275     Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
    276     threadFunc->setCallingConv(CallingConv::C);
    277     Function::arg_iterator args = threadFunc->arg_begin();
    278 
    279     Value * const input = &*(args++);
    280     input->setName("input");
    281 
    282     KernelBuilder * const targetK = kernels[id];
    283     Value * bufferSegments = ConstantInt::get(size_ty, codegen::BufferSegments - 1);
     276    Function * const threadFunc = makeThreadFunction("thread" + std::to_string(id), m);
     277
     278    KernelBuilder * const kernel = kernels[id];
     279    Value * bufferSegments = ConstantInt::get(iBuilder->getSizeTy(), codegen::BufferSegments - 1);
    284280    ConstantInt * segmentItems = iBuilder->getSize(codegen::SegmentSize * iBuilder->getBitBlockWidth());
    285     Value * waitCondTest = nullptr;
    286281
    287282     // Create the basic blocks for the thread function.
    288     BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc, 0);
    289     BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc, 0);
    290     BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc, 0);
    291     BasicBlock * exitThreadBlock = BasicBlock::Create(iBuilder->getContext(), "exitThread", threadFunc, 0);
     283    BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc);
     284    BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc);
     285    BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc);
     286    BasicBlock * exitThreadBlock = BasicBlock::Create(iBuilder->getContext(), "exitThread", threadFunc);
    292287
    293288    iBuilder->SetInsertPoint(entryBlock);
    294289   
    295     Value * sharedStruct = iBuilder->CreateBitCast(input, PointerType::get(sharedStructType, 0));
    296     std::vector<Value *> instancePtrs;
    297     std::vector<std::vector<Value *>> ProducerPos;
    298     for (unsigned k = 0; k < kernels.size(); k++) {
    299         KernelBuilder * K = kernels[k];
    300 
    301         Value * ptr = iBuilder->CreateGEP(sharedStruct, {iBuilder->getInt32(0), iBuilder->getInt32(k)});
    302         instancePtrs.push_back(iBuilder->CreateLoad(ptr));
    303 
    304         std::vector<Value *> produced;
    305         for (unsigned i = 0; i < K->getStreamOutputs().size(); i++) {
    306             produced.push_back(K->getProducedItemCount(instancePtrs[k], K->getStreamOutputs()[i].name));
    307         }
    308         ProducerPos.push_back(produced);
     290    Value * input = &(*threadFunc->arg_begin());
     291    Value * sharedStruct = iBuilder->CreateBitCast(input, sharedStructType->getPointerTo());
     292
     293    const unsigned n = kernels.size();
     294
     295    Value * instancePtrs[n];
     296    for (unsigned k = 0; k < n; k++) {
     297        Value * const ptr = iBuilder->CreateGEP(sharedStruct, {iBuilder->getInt32(0), iBuilder->getInt32(k)});
     298        instancePtrs[k] = iBuilder->CreateLoad(ptr);
    309299    }
    310300
     
    312302
    313303    iBuilder->SetInsertPoint(outputCheckBlock);
    314     PHINode * segNo = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "segNo");
     304    PHINode * segNo = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "segNo");
    315305    segNo->addIncoming(iBuilder->getSize(0), entryBlock);
    316306    segNo->addIncoming(segNo, outputCheckBlock);
    317307
    318     waitCondTest = ConstantInt::getTrue(int1ty);
    319     for (unsigned j = 0; j < targetK->getStreamOutputs().size(); j++) {
     308    Value * outputWaitCond = iBuilder->getTrue();
     309    for (unsigned j = 0; j < kernel->getStreamOutputs().size(); j++) {
    320310        const auto & consumerKernels = consumerTable[id][j];
    321311        for (unsigned k = 0; k < consumerKernels.size(); k++) {
    322312            Value * consumerSegNo = kernels[consumerKernels[k]]->acquireLogicalSegmentNo(instancePtrs[consumerKernels[k]]);
    323             waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(segNo, iBuilder->CreateAdd(consumerSegNo, bufferSegments)));
    324         } 
    325     }
    326 
    327     if (targetK->getStreamInputs().empty()) {
    328 
    329         iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, outputCheckBlock);
     313            outputWaitCond = iBuilder->CreateAnd(outputWaitCond, iBuilder->CreateICmpULE(segNo, iBuilder->CreateAdd(consumerSegNo, bufferSegments)));
     314        }
     315    }
     316
     317    if (kernel->getStreamInputs().empty()) {
     318
     319        iBuilder->CreateCondBr(outputWaitCond, doSegmentBlock, outputCheckBlock);
    330320
    331321        iBuilder->SetInsertPoint(doSegmentBlock);
    332322
    333         Value * terminated = targetK->getTerminationSignal(instancePtrs[id]);
    334         std::vector<Value *> doSegmentArgs = {instancePtrs[id], terminated};       
    335         targetK->createDoSegmentCall(doSegmentArgs);
     323        std::vector<Value *> args = {instancePtrs[id], iBuilder->getFalse()};
     324        for (unsigned i = 0; i < kernel->getStreamOutputs().size(); ++i) {
     325            args.push_back(iBuilder->getSize(0));
     326        }
     327        kernel->createDoSegmentCall(args);
    336328        Value * nextSegNo = iBuilder->CreateAdd(segNo, iBuilder->getSize(1));
    337329        segNo->addIncoming(nextSegNo, doSegmentBlock);
    338         targetK->releaseLogicalSegmentNo(instancePtrs[id], nextSegNo);
    339 
     330        Value * const terminated = kernel->getTerminationSignal(instancePtrs[id]);
     331        kernel->releaseLogicalSegmentNo(instancePtrs[id], nextSegNo);
    340332        iBuilder->CreateCondBr(terminated, exitThreadBlock, outputCheckBlock);
    341333
    342334    } else {
    343335
    344         BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
    345 
    346         iBuilder->CreateCondBr(waitCondTest, inputCheckBlock, outputCheckBlock);
     336        BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, doSegmentBlock);
     337
     338        iBuilder->CreateCondBr(outputWaitCond, inputCheckBlock, outputCheckBlock);
    347339
    348340        iBuilder->SetInsertPoint(inputCheckBlock);
    349341       
    350         waitCondTest = ConstantInt::getTrue(int1ty);
    351         for (unsigned j = 0; j < targetK->getStreamInputs().size(); j++) {
     342        Value * inputWaitCond = iBuilder->getTrue();
     343        for (unsigned j = 0; j < kernel->getStreamInputs().size(); j++) {
    352344            unsigned producerKernel, outputIndex;
    353345            std::tie(producerKernel, outputIndex) = producerTable[id][j];
    354346            Value * producerSegNo = kernels[producerKernel]->acquireLogicalSegmentNo(instancePtrs[producerKernel]);
    355             waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULT(segNo, producerSegNo)); 
    356         }
    357 
    358         iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, inputCheckBlock);
     347            inputWaitCond = iBuilder->CreateAnd(inputWaitCond, iBuilder->CreateICmpULT(segNo, producerSegNo));
     348        }
     349
     350        iBuilder->CreateCondBr(inputWaitCond, doSegmentBlock, inputCheckBlock);
    359351
    360352        iBuilder->SetInsertPoint(doSegmentBlock);
    361353
    362354        Value * nextSegNo = iBuilder->CreateAdd(segNo, iBuilder->getSize(1));
    363         Value * terminated = ConstantInt::get(int1ty, 1);
    364         for (unsigned j = 0; j < targetK->getStreamInputs().size(); j++) {
     355
     356        Value * terminated = iBuilder->getTrue();
     357        for (unsigned j = 0; j < kernel->getStreamInputs().size(); j++) {
    365358            unsigned producerKernel, outputIndex;
    366359            std::tie(producerKernel, outputIndex) = producerTable[id][j];
     
    370363        }
    371364       
    372         std::vector<Value *> doSegmentArgs = {instancePtrs[id], terminated};
    373         for (unsigned j = 0; j < targetK->getStreamInputs().size(); j++) {
     365        std::vector<Value *> args = {instancePtrs[id], terminated};
     366        for (unsigned j = 0; j < kernel->getStreamInputs().size(); j++) {
    374367            unsigned producerKernel, outputIndex;
    375368            std::tie(producerKernel, outputIndex) = producerTable[id][j];
    376             doSegmentArgs.push_back(iBuilder->CreateMul(segmentItems, segNo));
    377         }
    378         targetK->createDoSegmentCall(doSegmentArgs);
     369            args.push_back(iBuilder->CreateMul(segmentItems, segNo));
     370        }
     371        for (unsigned i = 0; i < kernel->getStreamOutputs().size(); ++i) {
     372            args.push_back(iBuilder->getSize(0));
     373        }
     374        kernel->createDoSegmentCall(args);
    379375        segNo->addIncoming(nextSegNo, doSegmentBlock);
    380         targetK->releaseLogicalSegmentNo(instancePtrs[id], nextSegNo);
     376        kernel->releaseLogicalSegmentNo(instancePtrs[id], nextSegNo);
    381377
    382378        iBuilder->CreateCondBr(terminated, exitThreadBlock, outputCheckBlock);
     
    384380
    385381    iBuilder->SetInsertPoint(exitThreadBlock);
    386 
    387     Value * nullVal = Constant::getNullValue(voidPtrTy);
    388     iBuilder->CreatePThreadExitCall(nullVal);
     382    iBuilder->CreatePThreadExitCall(ConstantPointerNull::getNullValue(iBuilder->getVoidPtrTy()));
    389383    iBuilder->CreateRetVoid();
    390384    iBuilder->restoreIP(ip);
    391 
    392385    return threadFunc;
    393 
    394386}
    395387
    396388void generateParallelPipeline(IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels) {
    397     const unsigned threadNum = kernels.size();
    398    
    399     Module * m = iBuilder->getModule();
    400    
     389   
     390    Module * const m = iBuilder->getModule();
    401391    IntegerType * const size_ty = iBuilder->getSizeTy();
    402392    PointerType * const voidPtrTy = iBuilder->getVoidPtrTy();
     
    406396        k->createInstance();
    407397    }
    408    
     398
    409399    const ProducerTable producerTable = createProducerTable(kernels);
    410400    const ConsumerTable consumerTable = createConsumerTable(kernels);
    411    
    412     Type * const pthreadsTy = ArrayType::get(size_ty, threadNum);
    413     AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);
    414     std::vector<Value *> pthreadsPtrs;
    415     for (unsigned i = 0; i < threadNum; i++) {
    416         pthreadsPtrs.push_back(iBuilder->CreateGEP(pthreads, {iBuilder->getInt32(0), iBuilder->getInt32(i)}));
    417     }
    418     Value * nullVal = Constant::getNullValue(voidPtrTy);
    419     AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
     401    const unsigned n = kernels.size();
     402
     403    Type * const pthreadsTy = ArrayType::get(size_ty, n);
     404    AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);   
     405    Value * pthreadsPtrs[n];
     406    for (unsigned i = 0; i < n; i++) {
     407        pthreadsPtrs[i] = iBuilder->CreateGEP(pthreads, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
     408    }
    420409   
    421410    std::vector<Type *> structTypes;
    422     for (unsigned i = 0; i < kernels.size(); i++) {
     411    for (unsigned i = 0; i < n; i++) {
    423412        structTypes.push_back(kernels[i]->getInstance()->getType());
    424413    }
    425     Type * sharedStructType = StructType::get(m->getContext(), structTypes);
    426    
     414    Type * const sharedStructType = StructType::get(m->getContext(), structTypes);
    427415    AllocaInst * sharedStruct = iBuilder->CreateAlloca(sharedStructType);
    428     for (unsigned i = 0; i < kernels.size(); i++) {
     416    for (unsigned i = 0; i < n; i++) {
    429417        Value * ptr = iBuilder->CreateGEP(sharedStruct, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
    430418        iBuilder->CreateStore(kernels[i]->getInstance(), ptr);
    431419    }
    432     for (unsigned i = 0; i < kernels.size(); i++) {
     420    for (unsigned i = 0; i < n; i++) {
    433421        KernelBuilder * const K = kernels[i];
    434422        K->releaseLogicalSegmentNo(K->getInstance(), iBuilder->getSize(0));
    435423    }
    436424
    437     std::vector<Function *> thread_functions;
    438     const auto ip = iBuilder->saveIP();
    439     for (unsigned i = 0; i < threadNum; i++) {
    440         thread_functions.push_back(generateParallelPipelineThreadFunction("thread" + std::to_string(i), iBuilder, kernels, sharedStructType, producerTable, consumerTable, i));
    441     }
    442     iBuilder->restoreIP(ip);
    443    
    444     for (unsigned i = 0; i < threadNum; i++) {
    445         iBuilder->CreatePThreadCreateCall(pthreadsPtrs[i], nullVal, thread_functions[i], iBuilder->CreateBitCast(sharedStruct, int8PtrTy));
    446     }
    447    
    448     std::vector<Value *> threadIDs;
    449     for (unsigned i = 0; i < threadNum; i++) {
    450         threadIDs.push_back(iBuilder->CreateLoad(pthreadsPtrs[i]));
    451     }
    452    
    453     for (unsigned i = 0; i < threadNum; i++) {
    454         iBuilder->CreatePThreadJoinCall(threadIDs[i], status);
     425    Function * thread_functions[n];
     426    for (unsigned i = 0; i < n; i++) {
     427        thread_functions[i] = generateParallelPipelineThreadFunction(iBuilder, kernels, sharedStructType, producerTable, consumerTable, i);
     428    }
     429   
     430    Value * nullVal = Constant::getNullValue(voidPtrTy);
     431    for (unsigned i = 0; i < n; i++) {
     432        iBuilder->CreatePThreadCreateCall(pthreadsPtrs[i], nullVal, thread_functions[i], sharedStruct);
     433    }
     434
     435    AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
     436    for (unsigned i = 0; i < n; i++) {
     437        Value * threadId = iBuilder->CreateLoad(pthreadsPtrs[i]);
     438        iBuilder->CreatePThreadJoinCall(threadId, status);
    455439    }
    456440
     
    458442
    459443void generatePipelineLoop(IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels) {
     444
    460445    for (auto k : kernels) {
    461446        k->createInstance();
     
    465450
    466451    // Create the basic blocks for the loop.
    467     BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", main, 0);
    468     BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exitBlock", main, 0);
    469    
    470     const ProducerTable producer = createProducerTable(kernels);
    471 
    472  //   const ConsumerTable consumer = createConsumerTable(kernels);
    473    
    474     // ProducerPos[k][i] will hold the producedItemCount of the i^th output stream
    475     // set of the k^th kernel.  These values will be loaded immediately after the
    476     // doSegment and finalSegment calls for kernel k and later used as the
    477     // producer position arguments for later doSegment/finalSegment calls.
    478    
    479     std::vector<std::vector<Value *>> ProducerPos;
    480    
    481     iBuilder->CreateBr(segmentLoop);
    482     iBuilder->SetInsertPoint(segmentLoop);
    483 
    484     Value * terminated = ConstantInt::getFalse(iBuilder->getContext());
     452    BasicBlock * pipelineLoop = BasicBlock::Create(iBuilder->getContext(), "pipelineLoop", main);
     453    BasicBlock * pipelineExit = BasicBlock::Create(iBuilder->getContext(), "pipelineExit", main);
     454
     455    StreamSetBufferMap<Value *> producedPos;
     456    StreamSetBufferMap<std::pair<PHINode *, Value *>> consumedPos;
     457
     458    iBuilder->CreateBr(pipelineLoop);
     459    iBuilder->SetInsertPoint(pipelineLoop);
     460
     461    // Gather all of our
     462    for (unsigned k = 0; k < kernels.size(); k++) {
     463        KernelBuilder * const kernel = kernels[k];
     464        const auto & outputs = kernel->getStreamOutputs();
     465        for (unsigned i = 0; i < outputs.size(); ++i) {
     466            const StreamSetBuffer * const buf = kernel->getStreamSetOutputBuffer(i);
     467            if (LLVM_UNLIKELY(consumedPos.count(buf) != 0)) {
     468                report_fatal_error(kernel->getName() + " redefines stream set " + outputs[i].name);
     469            }
     470            PHINode * phi = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
     471            phi->addIncoming(iBuilder->getSize(0), entryBlock);
     472            consumedPos.emplace(buf, std::make_pair(phi, nullptr));
     473        }
     474    }
     475
     476    Value * terminated = iBuilder->getFalse();
    485477    for (unsigned k = 0; k < kernels.size(); k++) {
    486478        KernelBuilder * const kernel = kernels[k];
    487479        Value * const instance = kernel->getInstance();
     480        const auto & inputs = kernel->getStreamInputs();
     481        const auto & outputs = kernel->getStreamOutputs();
    488482        std::vector<Value *> args = {instance, terminated};
    489         for (unsigned i = 0; i < kernel->getStreamInputs().size(); ++i) {
    490             unsigned producerKernel, outputIndex;
    491             std::tie(producerKernel, outputIndex) = producer[k][i];
    492             args.push_back(ProducerPos[producerKernel][outputIndex]);
    493         }
    494         for (unsigned i = 0; i < kernel->getStreamOutputs().size(); ++i) {
    495             args.push_back(iBuilder->getSize(0));
     483        for (unsigned i = 0; i < inputs.size(); ++i) {
     484            const auto f = producedPos.find(kernel->getStreamSetInputBuffer(i));
     485            if (LLVM_UNLIKELY(f == producedPos.end())) {
     486                report_fatal_error(kernel->getName() + " uses stream set " + inputs[i].name + " prior to its definition");
     487            }
     488            args.push_back(f->second);
     489        }
     490        for (unsigned i = 0; i < outputs.size(); ++i) {
     491            const auto f = consumedPos.find(kernel->getStreamSetOutputBuffer(i));
     492            assert (f != consumedPos.end());
     493            args.push_back(std::get<0>(f->second));
    496494        }
    497495        kernel->createDoSegmentCall(args);
     
    499497            terminated = iBuilder->CreateOr(terminated, kernel->getTerminationSignal(instance));
    500498        }
    501         std::vector<Value *> produced;
    502         const auto & streamOutputs = kernel->getStreamOutputs();
    503         for (unsigned i = 0; i < streamOutputs.size(); i++) {
    504             produced.push_back(kernel->getProducedItemCount(instance, streamOutputs[i].name, terminated));
    505         }
    506         ProducerPos.push_back(produced);
     499        for (unsigned i = 0; i < outputs.size(); i++) {
     500            Value * const produced = kernel->getProducedItemCount(instance, outputs[i].name, terminated);
     501            const StreamSetBuffer * const buf = kernel->getStreamSetOutputBuffer(i);
     502            assert (producedPos.count(buf) == 0);
     503            producedPos.emplace(buf, produced);
     504        }
     505        for (unsigned i = 0; i < inputs.size(); i++) {
     506            Value * const processed = kernel->getProcessedItemCount(instance, inputs[i].name);
     507            const StreamSetBuffer * const buf = kernel->getStreamSetInputBuffer(i);
     508            const auto f = consumedPos.find(buf);
     509            assert (f != consumedPos.end());
     510            Value *& consumed = std::get<1>(f->second);
     511            if (consumed) {
     512                consumed = iBuilder->CreateSelect(iBuilder->CreateICmpULT(processed, consumed), processed, consumed);
     513            } else {
     514                consumed = processed;
     515            }
     516        }
    507517        Value * const segNo = kernel->acquireLogicalSegmentNo(instance);
    508518        kernel->releaseLogicalSegmentNo(instance, iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
    509519    }
    510 
    511     iBuilder->CreateCondBr(terminated, exitBlock, segmentLoop);
    512     iBuilder->SetInsertPoint(exitBlock);
    513 }
     520    // update the consumed position phi nodes with the last min processed count of each input stream
     521    for (const auto entry : consumedPos) {
     522        PHINode * const phi = std::get<0>(entry.second);
     523        Value * const value = std::get<1>(entry.second);
     524        phi->addIncoming(value ? value : phi, pipelineLoop);
     525    }
     526    iBuilder->CreateCondBr(terminated, pipelineExit, pipelineLoop);
     527    iBuilder->SetInsertPoint(pipelineExit);
     528}
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5396 r5402  
    4141// a continous buffer for the full segment (number of blocks).
    4242
    43 void expand3_4Kernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &producerPos) {
     43void expand3_4Kernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &) {
    4444
    4545    BasicBlock * expand2_3entry = iBuilder->GetInsertBlock();
     
    8787
    8888    Value * processed = getProcessedItemCount("sourceStream");
    89     Value * itemsAvail = iBuilder->CreateSub(producerPos[0], processed);
     89    Value * available = getAvailableItemCount("sourceStream");
     90    Value * itemsAvail = iBuilder->CreateSub(available, processed);
    9091   
    9192    //
  • icGREP/icgrep-devel/icgrep/kernels/stdin_kernel.cpp

    r5399 r5402  
    7474}
    7575
    76 void FileSourceKernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &producerPos) {
     76void FileSourceKernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &) {
    7777
    7878    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r5377 r5402  
    9494
    9595    Value * IOstreamPtr = getScalarField("IOstreamPtr");
     96    Value * available = getAvailableItemCount("codeUnitBuffer");
    9697    Value * processed = getProcessedItemCount("codeUnitBuffer");
    97     Value * itemsToDo = iBuilder->CreateSub(producerPos[0], processed);
     98    Value * itemsToDo = iBuilder->CreateSub(available, processed);
    9899    // There may be two memory areas if we are at the physical end of a circular buffer.
    99100    const auto b  = getInputStreamSetBuffer("codeUnitBuffer");
     
    127128        Value * bytePtr = iBuilder->CreatePointerCast(getInputStreamBlockPtr("codeUnitBuffer", iBuilder->getInt32(0)), i8PtrTy);
    128129        bytePtr = iBuilder->CreateGEP(bytePtr, byteOffset);
    129         itemsToDo = iBuilder->CreateSub(producerPos[0], processed);
     130        itemsToDo = iBuilder->CreateSub(available, processed);
    130131        iBuilder->CreateFWriteCall(bytePtr, itemsToDo, itemBytes, IOstreamPtr);
    131132        processed = iBuilder->CreateAdd(processed, itemsToDo);
    132         setProcessedItemCount("codeUnitBuffer", producerPos[0]);
     133        setProcessedItemCount("codeUnitBuffer", available);
    133134        iBuilder->CreateBr(checkFinal);
    134135        iBuilder->SetInsertPoint(checkFinal);
  • icGREP/icgrep-devel/icgrep/kernels/toolchain.cpp

    r5401 r5402  
    1111#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
    1212#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
    13 #include <llvm/Support/FormattedStream.h>
    14 #include <llvm/ADT/SmallString.h>                  // for SmallString
    1513#include <llvm/IR/LegacyPassManager.h>             // for PassManager
    1614#include <llvm/IR/IRPrintingPasses.h>
     15#include <llvm/InitializePasses.h>                 // for initializeCodeGen
     16#ifndef NDEBUG
    1717#include <llvm/IR/Verifier.h>
    18 #include <llvm/InitializePasses.h>                 // for initializeCodeGen
     18#endif
    1919#include <llvm/PassRegistry.h>                     // for PassRegistry
    2020#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
     
    2525#include <llvm/Transforms/Utils/Local.h>
    2626#include <llvm/IR/Module.h>
    27 #include <object_cache.h>
     27#include <kernels/object_cache.h>
    2828#include <kernels/pipeline.h>
    2929#include <kernels/interface.h>
     
    165165, mTarget(nullptr)
    166166, mEngine(nullptr)
     167, mCache(nullptr)
    167168{
    168169    InitializeNativeTarget();
     
    202203    if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
    203204        if (codegen::ObjectCacheDir.empty()) {
    204             mCache = llvm::make_unique<ParabixObjectCache>();
     205            mCache = new ParabixObjectCache();
    205206        } else {
    206             mCache = llvm::make_unique<ParabixObjectCache>(codegen::ObjectCacheDir);
     207            mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
    207208        }
    208209        assert (mCache);
    209         mEngine->setObjectCache(mCache.get());
     210        mEngine->setObjectCache(mCache);
    210211    }
    211212}
     
    313314    return mEngine->getPointerToNamedFunction("Main");
    314315}
     316
     317ParabixDriver::~ParabixDriver() {
     318    delete mCache;
     319}
  • icGREP/icgrep-devel/icgrep/kernels/toolchain.h

    r5401 r5402  
    1111#include <llvm/IR/TypeBuilder.h>
    1212#include <boost/container/flat_map.hpp>
    13 #include <object_cache.h>
    1413
    1514namespace llvm { class ExecutionEngine; }
     
    2120namespace kernel { class KernelBuilder; }
    2221namespace parabix { class StreamSetBuffer; }
     22class ParabixObjectCache;
    2323
    2424namespace codegen {
     
    6262public:
    6363    ParabixDriver(IDISA::IDISA_Builder * iBuilder);
     64
     65    ~ParabixDriver();
    6466   
    6567    IDISA::IDISA_Builder * getIDISA_Builder() {return iBuilder;}
     
    8385    llvm::TargetMachine *                   mTarget;
    8486    llvm::ExecutionEngine *                 mEngine;
    85     std::unique_ptr<ParabixObjectCache>     mCache;
     87    ParabixObjectCache *                    mCache;
    8688    std::vector<kernel::KernelBuilder *>    mKernelList;
    8789    ModuleMap                               mModuleMap;
  • icGREP/icgrep-devel/icgrep/preprocess.cpp

    r5377 r5402  
    2222#include <kernels/pipeline.h>
    2323#include <boost/filesystem.hpp>
    24 #include <toolchain.h>
     24#include <kernels/toolchain.h>
    2525#include <boost/iostreams/device/mapped_file.hpp>
    2626
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r5401 r5402  
    2323#include <pablo/pablo_toolchain.h>                 // for pablo_function_passes
    2424#include <pablo/pe_zeroes.h>
    25 #include <toolchain.h>                             // for JIT_to_ExecutionEn...
     25#include <kernels/toolchain.h>
    2626#include <boost/iostreams/device/mapped_file.hpp>  // for mapped_file_source
    2727#include <boost/filesystem.hpp>
     
    317317    DeleteByPEXTkernel delK(iBuilder, 64, 16, true);
    318318    pxDriver.addKernelCall(delK, {&U8u16Bits, &DelMask}, {&SwizzleFields0, &SwizzleFields1, &SwizzleFields2, &SwizzleFields3, &DeletionCounts});
    319 ;
     319
    320320    //  Produce fully compressed swizzled UTF-16 bit streams
    321321    SwizzledCopybackBuffer u16Swizzle0(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
     
    506506
    507507void u8u16(u8u16FunctionType fn_ptr, const std::string & fileName) {
    508     std::string mFileName = fileName;
    509     size_t fileSize;
    510     char * fileBuffer;
    511    
    512     const boost::filesystem::path file(mFileName);
     508
     509    const boost::filesystem::path file(fileName);
    513510    if (exists(file)) {
    514511        if (is_directory(file)) {
     
    516513        }
    517514    } else {
    518         std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
     515        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
    519516        return;
    520517    }
    521518   
    522     fileSize = file_size(file);
    523     boost::iostreams::mapped_file_source mFile;
    524     if (fileSize == 0) {
    525         fileBuffer = nullptr;
    526     }
    527     else {
     519    size_t fileSize = file_size(file);
     520    boost::iostreams::mapped_file_source input;
     521
     522    char * fileBuffer = nullptr;
     523    if (fileSize) {
    528524        try {
    529             mFile.open(mFileName);
    530         } catch (std::exception &e) {
    531             std::cerr << "Error: Boost mmap of " << mFileName << ": " << e.what() << std::endl;
    532             return;
    533         }
    534         fileBuffer = const_cast<char *>(mFile.data());
     525            input.open(fileName);
     526            fileBuffer = const_cast<char *>(input.data());
     527        } catch (std::exception & e) {
     528            throw std::runtime_error("Boost mmap error: " + fileName + ": " + e.what());
     529        }       
    535530    }
    536531
     
    540535        outputBuffer.advise(boost::interprocess::mapped_region::advice_sequential);
    541536        fn_ptr(fileBuffer, static_cast<char*>(outputBuffer.get_address()), fileSize);
    542     }
    543     else if (memAlignBuffering) {
     537    } else if (memAlignBuffering) {
    544538        char * outputBuffer;
    545539        const auto r = posix_memalign(reinterpret_cast<void **>(&outputBuffer), 32, 2*fileSize);
     
    549543        fn_ptr(fileBuffer, outputBuffer, fileSize);
    550544        free(reinterpret_cast<void *>(outputBuffer));
    551     }
    552     else {
     545    } else {
    553546        /* No external output buffer */
    554547        fn_ptr(fileBuffer, nullptr, fileSize);
    555548    }
    556     mFile.close();
     549    input.close();
    557550   
    558551}
    559 
    560552
    561553int main(int argc, char *argv[]) {
     
    563555    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&u8u16Options, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
    564556    cl::ParseCommandLineOptions(argc, argv);
    565 
    566     u8u16FunctionType fn_ptr = u8u16CodeGen();
    567 
    568     u8u16(fn_ptr, inputFile);
    569 
     557    u8u16(u8u16CodeGen(), inputFile);
    570558    return 0;
    571559}
  • icGREP/icgrep-devel/icgrep/wc.cpp

    r5401 r5402  
    88#include <iomanip>
    99#include <sstream>
    10 #include <toolchain.h>
     10#include <kernels/toolchain.h>
    1111#include <llvm/IR/Function.h>
    1212#include <llvm/IR/Module.h>
Note: See TracChangeset for help on using the changeset viewer.