Ignore:
Timestamp:
Apr 22, 2017, 4:03:25 PM (2 years ago)
Author:
nmedfort
Message:

Removed non-functional CUDA code from icgrep and consolidated grep and multigrep mode into a single function; allowed segment parallel pipeline to utilize process as its initial thread; modified MMapSourceKernel to map and perform mmap directly and advise the OS to drop consumed data streams.

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
16 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5411 r5418  
    1111#include <llvm/IR/Module.h>
    1212#include <IR_Gen/idisa_builder.h>
    13 namespace llvm { class Module; }
    14 namespace llvm { class Type; }
    1513
    1614static const auto INIT_SUFFIX = "_Init";
    1715
    18 static const auto TERMINATE_SUFFIX = "_Terminate";
    19 
    2016static const auto DO_SEGMENT_SUFFIX = "_DoSegment";
    2117
    22 static const auto ACCUMULATOR_INFIX = "_get_";
     18static const auto TERMINATE_SUFFIX = "_Terminate";
    2319
    2420using namespace llvm;
     
    7369
    7470void KernelInterface::addKernelDeclarations(Module * const client) {
    75     Module * saveModule = iBuilder->getModule();
    76     auto savePoint = iBuilder->saveIP();
     71    Module * const saveModule = iBuilder->getModule();
    7772    iBuilder->setModule(client);
    7873    if (mKernelStateType == nullptr) {
     
    8277    IntegerType * const sizeTy = iBuilder->getSizeTy();
    8378    PointerType * const consumerTy = StructType::get(sizeTy, sizeTy->getPointerTo()->getPointerTo(), nullptr)->getPointerTo();
     79    Type * const voidTy = iBuilder->getVoidTy();
    8480
    8581    // Create the initialization function prototype
     
    9086    initParameters.insert(initParameters.end(), mStreamSetOutputs.size(), consumerTy);
    9187
    92     FunctionType * initType = FunctionType::get(iBuilder->getVoidTy(), initParameters, false);
    93     Function * init = Function::Create(initType, GlobalValue::ExternalLinkage, getName() + INIT_SUFFIX, client);
    94     init->setCallingConv(CallingConv::C);
    95     init->setDoesNotThrow();
    96     auto args = init->arg_begin();
     88    FunctionType * const initType = FunctionType::get(voidTy, initParameters, false);
     89    Function * const initFunc = Function::Create(initType, GlobalValue::ExternalLinkage, getName() + INIT_SUFFIX, client);
     90    initFunc->setCallingConv(CallingConv::C);
     91    initFunc->setDoesNotThrow();
     92    auto args = initFunc->arg_begin();
    9793    args->setName("self");
    9894    for (auto binding : mScalarInputs) {
     
    10096    }
    10197    for (auto binding : mStreamSetOutputs) {
    102         args->setName(binding.name + "ConsumerLogicalSegments");       
    103         ++args;
     98        (args++)->setName(binding.name + "ConsumerLocks");
    10499    }
    105100
     
    108103    params.insert(params.end(), mStreamSetInputs.size(), sizeTy);
    109104
    110     FunctionType * const doSegmentType = FunctionType::get(iBuilder->getVoidTy(), params, false);
    111     Function * doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, client);
     105    FunctionType * const doSegmentType = FunctionType::get(voidTy, params, false);
     106    Function * const doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, client);
    112107    doSegment->setCallingConv(CallingConv::C);
    113108    doSegment->setDoesNotThrow();
    114     doSegment->setDoesNotCapture(1); // for self parameter only.
     109    doSegment->setDoesNotCapture(1); // for self parameter only.   
    115110    args = doSegment->arg_begin();
    116111    args->setName("self");
     
    121116
    122117    // Create the terminate function prototype
    123     FunctionType * terminateType = FunctionType::get(iBuilder->getVoidTy(), {selfType}, false);
    124     Function * terminateFunc = Function::Create(terminateType, GlobalValue::ExternalLinkage, getName() + TERMINATE_SUFFIX, client);
     118    Type * resultType = nullptr;
     119    if (mScalarOutputs.empty()) {
     120        resultType = iBuilder->getVoidTy();
     121    } else {
     122        const auto n = mScalarOutputs.size();
     123        Type * outputType[n];
     124        for (unsigned i = 0; i < n; ++i) {
     125            outputType[i] = mScalarOutputs[i].type;
     126        }
     127        if (n == 1) {
     128            resultType = outputType[0];
     129        } else {
     130            resultType = StructType::get(iBuilder->getContext(), ArrayRef<Type *>(outputType, n));
     131        }
     132    }
     133    FunctionType * const terminateType = FunctionType::get(resultType, {selfType}, false);
     134    Function * const terminateFunc = Function::Create(terminateType, GlobalValue::ExternalLinkage, getName() + TERMINATE_SUFFIX, client);
    125135    terminateFunc->setCallingConv(CallingConv::C);
    126136    terminateFunc->setDoesNotThrow();
     
    129139    args->setName("self");
    130140
    131     /// INVESTIGATE: replace the accumulator methods with a single Exit method that handles any clean up and returns
    132     /// a struct containing all scalar outputs?
    133 
    134     // Create the accumulator get function prototypes
    135     for (const auto & binding : mScalarOutputs) {
    136         FunctionType * accumFnType = FunctionType::get(binding.type, {selfType}, false);
    137         Function * accumFn = Function::Create(accumFnType, GlobalValue::ExternalLinkage, getName() + ACCUMULATOR_INFIX + binding.name, client);
    138         accumFn->setCallingConv(CallingConv::C);
    139         accumFn->setDoesNotThrow();
    140         accumFn->setDoesNotCapture(1);
    141         auto args = accumFn->arg_begin();
    142         args->setName("self");
    143     }
    144 
    145141    iBuilder->setModule(saveModule);
    146     iBuilder->restoreIP(savePoint);
    147142}
    148143
    149144void KernelInterface::setInitialArguments(std::vector<Value *> args) {
    150145    mInitialArguments = args;
    151 }
    152 
    153 llvm::Function * KernelInterface::getAccumulatorFunction(const std::string & accumName) const {
    154     const auto name = getName() + ACCUMULATOR_INFIX + accumName;
    155     Function * f = iBuilder->getModule()->getFunction(name);
    156     if (LLVM_UNLIKELY(f == nullptr)) {
    157         llvm::report_fatal_error("Cannot find " + name);
    158     }
    159     return f;
    160146}
    161147
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5411 r5418  
    77#define KERNEL_INTERFACE_H
    88
    9 #include <string>  // for string
    10 #include <vector>  // for vector
     9#include <llvm/IR/Constants.h>
     10#include <string>
     11#include <vector>
     12
    1113namespace IDISA { class IDISA_Builder; }
    12 //namespace llvm { class ConstantInt; }
    13 #include <llvm/IR/Constants.h>
    14 namespace llvm { class Function; }
    15 namespace llvm { class Module; }
    16 namespace llvm { class PointerType; }
    17 namespace llvm { class StructType; }
    18 namespace llvm { class Type; }
    19 namespace llvm { class Value; }
    20 
    2114
    2215// Processing rate attributes are required for all stream set bindings for a kernel.
     
    10497    virtual void initializeInstance() = 0;
    10598
    106     virtual void terminateInstance() = 0;
     99    virtual void finalizeInstance() = 0;
    107100
    108101    void setInitialArguments(std::vector<llvm::Value *> args);
     
    128121    virtual void setProcessedItemCount(const std::string & name, llvm::Value * value) const = 0;
    129122
     123    virtual llvm::Value * getConsumedItemCount(const std::string & name) const = 0;
     124
     125    virtual void setConsumedItemCount(const std::string & name, llvm::Value * value) const = 0;
     126
    130127    virtual llvm::Value * getTerminationSignal() const = 0;
    131128
     
    139136
    140137    llvm::Function * getDoSegmentFunction() const;
    141 
    142     llvm::Function * getAccumulatorFunction(const std::string & accumName) const;
    143138
    144139    llvm::Function * getTerminateFunction() const;
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5411 r5418  
    3434static const std::string BUFFER_PTR_SUFFIX = "_bufferPtr";
    3535
    36 static const std::string CONSUMER_LOGICAL_SEGMENT_SUFFIX = "_cls";
     36static const std::string CONSUMER_SUFFIX = "_cls";
    3737
    3838using namespace llvm;
     
    5555unsigned KernelBuilder::addUnnamedScalar(Type * const type) {
    5656    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    57         report_fatal_error("Cannot add unnamed kernel field after kernel state finalized");
     57        report_fatal_error("Cannot add unnamed field  to " + getName() + " after kernel state finalized");
    5858    }
    5959    const auto index = mKernelFields.size();
     
    7272   
    7373void KernelBuilder::prepareKernel() {
    74 
    7574    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    7675        report_fatal_error("Cannot prepare kernel after kernel state finalized");
    7776    }
    78 
    7977    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
    8078        std::string tmp;
     
    9795        }
    9896        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
    99         if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
     97        if ((i == 0) || mStreamSetInputs[i].rate.isUnknown()) {
    10098            addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
    10199        }       
     
    124122    Type * const consumerSetTy = StructType::get(sizeTy, sizeTy->getPointerTo()->getPointerTo(), nullptr)->getPointerTo();
    125123    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    126         addScalar(consumerSetTy, mStreamSetOutputs[i].name + CONSUMER_LOGICAL_SEGMENT_SUFFIX);
     124        addScalar(consumerSetTy, mStreamSetOutputs[i].name + CONSUMER_SUFFIX);
    127125    }
    128126
     
    130128    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
    131129
     130    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     131        addScalar(sizeTy, mStreamSetOutputs[i].name + CONSUMED_ITEM_COUNT_SUFFIX);
     132    }
     133
    132134    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
    133135}
    134136
    135 Module * KernelBuilder::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
    136 
     137void KernelBuilder::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
     138
     139    assert (mModule == nullptr);
    137140    assert (mStreamSetInputBuffers.empty());
    138141    assert (mStreamSetOutputBuffers.empty());
     
    188191    prepareKernel();
    189192
    190     Module * const m = new Module(cacheName.str(), iBuilder->getContext());
    191     m->setTargetTriple(iBuilder->getModule()->getTargetTriple());
    192     return m;
     193    mModule = new Module(cacheName.str(), iBuilder->getContext());
     194    mModule->setTargetTriple(iBuilder->getModule()->getTargetTriple());
    193195}
    194196
     
    213215        auto savePoint = iBuilder->saveIP();
    214216        addKernelDeclarations(iBuilder->getModule());
    215         callGenerateInitMethod();
     217        callGenerateInitializeMethod();
    216218        callGenerateDoSegmentMethod();       
    217         // Implement the accumulator get functions
    218         for (auto binding : mScalarOutputs) {
    219             Function * f = getAccumulatorFunction(binding.name);
    220             iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, f));
    221             Value * self = &*(f->arg_begin());
    222             Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
    223             Value * retVal = iBuilder->CreateLoad(ptr);
    224             iBuilder->CreateRet(retVal);
    225         }
    226         callGenerateTerminateMethod();
     219        callGenerateFinalizeMethod();
    227220        iBuilder->restoreIP(savePoint);
    228221        setInstance(saveInstance);
     
    231224}
    232225
    233 void KernelBuilder::callGenerateDoSegmentMethod() {
    234     mCurrentMethod = getDoSegmentFunction();
    235     iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
    236     auto args = mCurrentMethod->arg_begin();
    237     setInstance(&*(args++));
    238     Value * doFinal = &*(args++);
    239     std::vector<Value *> producerPos;
    240     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    241         producerPos.push_back(&*(args++));
    242     }
    243     generateDoSegmentMethod(doFinal, producerPos); // must be overridden by the KernelBuilder subtype
    244     iBuilder->CreateRetVoid();
    245 }
    246 
    247 void KernelBuilder::callGenerateInitMethod() {
     226inline void KernelBuilder::callGenerateInitializeMethod() {
    248227    mCurrentMethod = getInitFunction();
    249228    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
     
    257236        setConsumerState(binding.name, &*(args++));
    258237    }
    259     generateInitMethod();
     238    generateInitializeMethod();
    260239    iBuilder->CreateRetVoid();
    261240}
    262241
    263 void KernelBuilder::callGenerateTerminateMethod() {
    264     mCurrentMethod = getTerminateFunction();
    265     iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
     242inline void KernelBuilder::callGenerateDoSegmentMethod() {
     243    mCurrentMethod = getDoSegmentFunction();
     244    BasicBlock * const entry = CreateBasicBlock(getName() + "_entry");
     245    iBuilder->SetInsertPoint(entry);
    266246    auto args = mCurrentMethod->arg_begin();
    267247    setInstance(&*(args++));
    268     generateTerminateMethod(); // may be overridden by the KernelBuilder subtype
     248    mIsFinal = &*(args++);
     249    const auto n = mStreamSetInputs.size();
     250    mAvailableItemCount.resize(n, nullptr);
     251    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     252        mAvailableItemCount[i] = &*(args++);
     253    }
     254    generateDoSegmentMethod(); // must be overridden by the KernelBuilder subtype
     255    mIsFinal = nullptr;
     256    mAvailableItemCount.clear();
    269257    iBuilder->CreateRetVoid();
     258}
     259
     260inline void KernelBuilder::callGenerateFinalizeMethod() {
     261    mCurrentMethod = getTerminateFunction();
     262    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
     263    auto args = mCurrentMethod->arg_begin();
     264    setInstance(&*(args++));
     265    generateFinalizeMethod(); // may be overridden by the KernelBuilder subtype
     266    const auto n = mScalarOutputs.size();
     267    if (n == 0) {
     268        iBuilder->CreateRetVoid();
     269    } else {
     270        Value * outputs[n];
     271        for (unsigned i = 0; i < n; ++i) {
     272            outputs[i] = getScalarField(mScalarOutputs[i].name);
     273        }
     274        if (n == 1) {
     275            iBuilder->CreateRet(outputs[0]);
     276        } else {
     277            iBuilder->CreateAggregateRet(outputs, n);
     278        }
     279    }
    270280}
    271281
     
    307317
    308318llvm::Value * KernelBuilder::getAvailableItemCount(const std::string & name) const {
    309     auto arg = mCurrentMethod->arg_begin();
    310     ++arg; // self
    311     ++arg; // doFinal
    312319    for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
    313320        if (mStreamSetInputs[i].name == name) {
    314             return &*arg;
    315         }
    316         ++arg;
     321            return mAvailableItemCount[i];
     322        }
    317323    }
    318324    return nullptr;
     
    334340}
    335341
     342Value * KernelBuilder::getConsumedItemCount(const std::string & name) const {
     343    return getScalarField(name + CONSUMED_ITEM_COUNT_SUFFIX);
     344}
     345
    336346void KernelBuilder::setProducedItemCount(const std::string & name, Value * value) const {
    337347    setScalarField(name + PRODUCED_ITEM_COUNT_SUFFIX, value);
     
    342352}
    343353
     354void KernelBuilder::setConsumedItemCount(const std::string & name, Value * value) const {
     355    setScalarField(name + CONSUMED_ITEM_COUNT_SUFFIX, value);
     356}
     357
    344358Value * KernelBuilder::getTerminationSignal() const {
    345359    return getScalarField(TERMINATION_SIGNAL);
     
    359373
    360374llvm::Value * KernelBuilder::getConsumerState(const std::string & name) const {
    361     return getScalarField(name + CONSUMER_LOGICAL_SEGMENT_SUFFIX);
     375    return getScalarField(name + CONSUMER_SUFFIX);
    362376}
    363377
    364378void KernelBuilder::setConsumerState(const std::string & name, llvm::Value * value) const {
    365     setScalarField(name + CONSUMER_LOGICAL_SEGMENT_SUFFIX, value);
     379    setScalarField(name + CONSUMER_SUFFIX, value);
    366380}
    367381
     
    443457}
    444458
    445 void KernelBuilder::setBaseAddress(const std::string & name, llvm::Value * addr) const {
    446     unsigned index; Port port;
    447     std::tie(port, index) = getStreamPort(name);
    448     const StreamSetBuffer * buf = nullptr;
    449     if (port == Port::Input) {
    450         assert (index < mStreamSetInputBuffers.size());
    451         buf = mStreamSetInputBuffers[index];
    452     } else {
    453         assert (index < mStreamSetOutputBuffers.size());
    454         buf = mStreamSetOutputBuffers[index];
    455     }
    456     return buf->setBaseAddress(getStreamSetBufferPtr(name), addr);
     459Value * KernelBuilder::getBaseAddress(const std::string & name) const {
     460    return getAnyStreamSetBuffer(name)->getBaseAddress(getStreamSetBufferPtr(name));
     461}
     462
     463void KernelBuilder::setBaseAddress(const std::string & name, Value * const addr) const {
     464    return getAnyStreamSetBuffer(name)->setBaseAddress(getStreamSetBufferPtr(name), addr);
    457465}
    458466
    459467Value * KernelBuilder::getBufferedSize(const std::string & name) const {
    460     unsigned index; Port port;
    461     std::tie(port, index) = getStreamPort(name);
    462     const StreamSetBuffer * buf = nullptr;
    463     if (port == Port::Input) {
    464         assert (index < mStreamSetInputBuffers.size());
    465         buf = mStreamSetInputBuffers[index];
    466     } else {
    467         assert (index < mStreamSetOutputBuffers.size());
    468         buf = mStreamSetOutputBuffers[index];
    469     }
    470     return buf->getBufferedSize(getStreamSetBufferPtr(name));
     468    return getAnyStreamSetBuffer(name)->getBufferedSize(getStreamSetBufferPtr(name));
    471469}
    472470
     
    491489}
    492490
    493 KernelBuilder::StreamPort KernelBuilder::getStreamPort(const std::string & name) const {
    494     const auto f = mStreamMap.find(name);
    495     if (LLVM_UNLIKELY(f == mStreamMap.end())) {
    496         report_fatal_error(getName() + " does not contain stream set: " + name);
    497     }
    498     return f->second;
    499 }
    500 
    501491Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
    502492    return getScalarField(name + BUFFER_PTR_SUFFIX);
     
    513503
    514504CallInst * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) const {
    515     assert (getDoSegmentFunction()->getArgumentList().size() == args.size());
    516     return iBuilder->CreateCall(getDoSegmentFunction(), args);
    517 }
    518 
    519 CallInst * KernelBuilder::createGetAccumulatorCall(const std::string & accumName) const {
    520     return iBuilder->CreateCall(getAccumulatorFunction(accumName), { getInstance() });
     505    Function * const doSegment = getDoSegmentFunction();
     506    assert (doSegment->getArgumentList().size() == args.size());
     507    return iBuilder->CreateCall(doSegment, args);
     508}
     509
     510Value * KernelBuilder::getAccumulator(const std::string & accumName) const {
     511    if (LLVM_UNLIKELY(mOutputScalarResult == nullptr)) {
     512        report_fatal_error("Cannot get accumulator " + accumName + " until " + getName() + " has terminated.");
     513    }
     514    const auto n = mScalarOutputs.size();
     515    if (LLVM_UNLIKELY(n == 0)) {
     516        report_fatal_error(getName() + " has no output scalars.");
     517    } else {
     518        for (unsigned i = 0; i < n; ++i) {
     519            const Binding & b = mScalarOutputs[i];
     520            if (b.name == accumName) {
     521                if (n == 1) {
     522                    return mOutputScalarResult;
     523                } else {
     524                    return iBuilder->CreateExtractValue(mOutputScalarResult, {i});
     525                }
     526            }
     527        }
     528        report_fatal_error(getName() + " has no output scalar named " + accumName);
     529    }
    521530}
    522531
     
    576585    StructType * const consumerTy = StructType::get(sizeTy, sizePtrPtrTy, nullptr);
    577586    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
    578         const auto & consumers = mStreamSetOutputBuffers[i]->getConsumers();
     587        const auto output = mStreamSetOutputBuffers[i];
     588        const auto & consumers = output->getConsumers();
    579589        const auto n = consumers.size();
    580590        AllocaInst * const outputConsumers = iBuilder->CreateAlloca(consumerTy);
     
    582592        for (unsigned i = 0; i < n; ++i) {
    583593            KernelBuilder * const consumer = consumers[i];
    584             assert (consumer->getInstance());
    585             Value * const segNo = consumer->getScalarFieldPtr(consumer->getInstance(), LOGICAL_SEGMENT_NO_SCALAR);
    586             iBuilder->CreateStore(segNo, iBuilder->CreateGEP(consumerSegNoArray, { iBuilder->getInt32(0), iBuilder->getInt32(i) }));
     594            assert ("all instances must be created prior to initialization of any instance" && consumer->getInstance());
     595            Value * const segmentNoPtr = consumer->getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR);
     596            iBuilder->CreateStore(segmentNoPtr, iBuilder->CreateGEP(consumerSegNoArray, { iBuilder->getInt32(0), iBuilder->getInt32(i) }));
    587597        }
    588598        Value * const consumerCountPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     
    592602        args.push_back(outputConsumers);
    593603    }
     604
     605
    594606    iBuilder->CreateCall(getInitFunction(), args);
    595 }
    596 
    597 void KernelBuilder::terminateInstance() {
    598     iBuilder->CreateCall(getTerminateFunction(), { getInstance() });
    599607}
    600608
     
    602610//  each block of the given number of blocksToDo, and then updates counts.
    603611
    604 void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
     612void BlockOrientedKernel::generateDoSegmentMethod() {
    605613
    606614    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
     
    613621    Value * baseTarget = nullptr;
    614622    if (useIndirectBr()) {
    615         baseTarget = iBuilder->CreateSelect(doFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
     623        baseTarget = iBuilder->CreateSelect(mIsFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
    616624    }
    617625
    618626    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
    619     Value * availablePos = producerPos[0];
     627    Value * availablePos = mAvailableItemCount[0];
    620628    Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
    621629    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
     
    674682        mStrideLoopBranch->addDestination(segmentDone);
    675683    } else {
    676         iBuilder->CreateUnlikelyCondBr(doFinal, doFinalBlock, segmentDone);
     684        iBuilder->CreateUnlikelyCondBr(mIsFinal, doFinalBlock, segmentDone);
    677685    }
    678686
     
    681689    iBuilder->SetInsertPoint(doFinalBlock);
    682690
    683     Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(mStreamSetInputs[0].name));
     691    Value * remainingItems = iBuilder->CreateSub(mAvailableItemCount[0], getProcessedItemCount(mStreamSetInputs[0].name));
    684692    writeFinalBlockMethod(remainingItems);
    685693
    686     itemsDone = producerPos[0];
     694    itemsDone = mAvailableItemCount[0];
    687695    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
    688696    setTerminationSignal();
     
    839847}
    840848
     849void KernelBuilder::finalizeInstance() {
     850    mOutputScalarResult = iBuilder->CreateCall(getTerminateFunction(), { getInstance() });
     851}
     852
     853KernelBuilder::StreamPort KernelBuilder::getStreamPort(const std::string & name) const {
     854    const auto f = mStreamMap.find(name);
     855    if (LLVM_UNLIKELY(f == mStreamMap.end())) {
     856        report_fatal_error(getName() + " does not contain stream set " + name);
     857    }
     858    return f->second;
     859}
    841860
    842861// CONSTRUCTOR
     
    849868                             std::vector<Binding> && internal_scalars)
    850869: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
     870, mModule(nullptr)
    851871, mCurrentMethod(nullptr)
    852872, mNoTerminateAttribute(false)
    853 , mIsGenerated(false) {
     873, mIsGenerated(false)
     874, mIsFinal(nullptr)
     875, mOutputScalarResult(nullptr) {
    854876
    855877}
     
    886908
    887909}
     910
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5411 r5418  
    77#define KERNEL_BUILDER_H
    88
    9 #include <string>           // for string
    10 #include <memory>           // for unique_ptr
    119#include "interface.h"      // for KernelInterface
    1210#include <boost/container/flat_map.hpp>
    1311#include <IR_Gen/idisa_builder.h>
    1412#include <kernels/pipeline.h>
    15 
    16 //namespace llvm { class ConstantInt; }
    1713#include <llvm/IR/Constants.h>
     14
     15//#include <string>           // for string
     16//#include <memory>           // for unique_ptr
     17
    1818namespace llvm { class Function; }
    1919namespace llvm { class IntegerType; }
     
    3232    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
    3333    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
    34 
    35     friend void ::generateSegmentParallelPipeline(IDISA::IDISA_Builder *, const std::vector<KernelBuilder *> &);
    36     friend void ::generatePipelineLoop(IDISA::IDISA_Builder *, const std::vector<KernelBuilder *> &);
    37     friend void ::generateParallelPipeline(IDISA::IDISA_Builder *, const std::vector<KernelBuilder *> &);
     34    using Kernels = std::vector<KernelBuilder *>;
     35
     36    friend void ::generateSegmentParallelPipeline(IDISA::IDISA_Builder *, const Kernels &);
     37    friend void ::generatePipelineLoop(IDISA::IDISA_Builder *, const Kernels &);
     38    friend void ::generateParallelPipeline(IDISA::IDISA_Builder *, const Kernels &);
    3839public:
    3940   
     
    7172    // Create a module stub for the kernel, populated only with its Module ID.     
    7273    //
    73     llvm::Module * createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
    74      
     74    void createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
     75
     76    llvm::Module * getModule() const {
     77        return mModule;
     78    }
     79
    7580    // Generate the Kernel to the current module (iBuilder->getModule()).
    7681    void generateKernel();
     
    8085    void initializeInstance() final;
    8186
    82     void terminateInstance() final;
     87    void finalizeInstance() final;
    8388
    8489    llvm::Value * getProducedItemCount(const std::string & name, llvm::Value * doFinal = nullptr) const final;
     
    8994
    9095    void setProcessedItemCount(const std::string & name, llvm::Value * value) const final;
     96
     97    llvm::Value * getConsumedItemCount(const std::string & name) const final;
     98
     99    void setConsumedItemCount(const std::string & name, llvm::Value * value) const final;
    91100
    92101    bool hasNoTerminateAttribute() const {
     
    164173    llvm::CallInst * createDoSegmentCall(const std::vector<llvm::Value *> & args) const;
    165174
    166     llvm::CallInst * createGetAccumulatorCall(const std::string & accumName) const;
     175    llvm::Value * getAccumulator(const std::string & accumName) const;
    167176
    168177    virtual ~KernelBuilder() = 0;
     
    198207    virtual void prepareKernel();
    199208
    200     virtual void generateInitMethod() { }
    201    
    202     virtual void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) = 0;
    203 
    204     virtual void generateTerminateMethod() { }
     209    virtual void generateInitializeMethod() { }
     210   
     211    virtual void generateDoSegmentMethod() = 0;
     212
     213    virtual void generateFinalizeMethod() { }
    205214
    206215    // Add an additional scalar field to the KernelState struct.
     
    242251    llvm::Value * getRawOutputPointer(const std::string & name, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
    243252
     253    llvm::Value * getBaseAddress(const std::string & name) const;
     254
    244255    void setBaseAddress(const std::string & name, llvm::Value * addr) const;
    245256
     
    251262
    252263    llvm::Value * getAvailableItemCount(const std::string & name) const;
     264
     265    llvm::Value * getIsFinal() const {
     266        return mIsFinal;
     267    }
     268
    253269
    254270    llvm::BasicBlock * CreateBasicBlock(std::string && name) const;
     
    266282        return getScalarFieldPtr(instance, getScalarIndex(fieldName));
    267283    }
     284
     285    void callGenerateInitializeMethod();
     286
     287    void callGenerateDoSegmentMethod();
     288
     289    void callGenerateFinalizeMethod();
    268290
    269291    StreamPort getStreamPort(const std::string & name) const;
     
    283305    }
    284306
    285     void callGenerateInitMethod();
    286 
    287     void callGenerateDoSegmentMethod();
    288 
    289     void callGenerateTerminateMethod();
     307    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
     308        unsigned index; Port port;
     309        std::tie(port, index) = getStreamPort(name);
     310        if (port == Port::Input) {
     311            assert (index < mStreamSetInputBuffers.size());
     312            return mStreamSetInputBuffers[index];
     313        } else {
     314            assert (index < mStreamSetOutputBuffers.size());
     315            return mStreamSetOutputBuffers[index];
     316        }
     317    }
    290318
    291319private:
     
    297325protected:
    298326
    299     llvm::Function *                mCurrentMethod;
    300     std::vector<llvm::Type *>       mKernelFields;
    301     KernelMap                       mKernelMap;
    302     StreamMap                       mStreamMap;
    303     StreamSetBuffers                mStreamSetInputBuffers;
    304     StreamSetBuffers                mStreamSetOutputBuffers;
    305     bool                            mNoTerminateAttribute;
    306     bool                            mIsGenerated;
     327    llvm::Module *                      mModule;
     328    llvm::Function *                    mCurrentMethod;
     329    bool                                mNoTerminateAttribute;
     330    bool                                mIsGenerated;
     331
     332    llvm::Value *                       mIsFinal;
     333    std::vector<llvm::Value *>          mAvailableItemCount;
     334    llvm::Value *                       mOutputScalarResult;
     335
     336
     337    std::vector<llvm::Type *>           mKernelFields;
     338    KernelMap                           mKernelMap;
     339    StreamMap                           mStreamMap;
     340    StreamSetBuffers                    mStreamSetInputBuffers;
     341    StreamSetBuffers                    mStreamSetOutputBuffers;
    307342
    308343};
     
    339374    virtual void generateFinalBlockMethod(llvm::Value * remainingItems);
    340375
    341     void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override final;
     376    void generateDoSegmentMethod() override final;
    342377
    343378    BlockOrientedKernel(IDISA::IDISA_Builder * builder,
  • icGREP/icgrep-devel/icgrep/kernels/mmap_kernel.cpp

    r5414 r5418  
    44 */
    55#include "mmap_kernel.h"
    6 #include <llvm/IR/Function.h>  // for Function, Function::arg_iterator
    76#include <llvm/IR/Module.h>
    87#include <IR_Gen/idisa_builder.h>
    98#include <kernels/streamset.h>
    10 namespace llvm { class BasicBlock; }
    11 namespace llvm { class Constant; }
    12 namespace llvm { class Module; }
    13 namespace llvm { class Value; }
    149
    1510using namespace llvm;
     
    1712namespace kernel {
    1813
    19 void MMapSourceKernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &producerPos) {
     14void MMapSourceKernel::generateInitializeMethod() {
     15    Value * fd = getScalarField("fileDescriptor");
     16    Value * fileSize = iBuilder->CreateFileSize(fd);
     17    if (mCodeUnitWidth > 8) {
     18        fileSize = iBuilder->CreateUDiv(fileSize, iBuilder->getSize(mCodeUnitWidth / 8));
     19    }
     20    Value * buffer = iBuilder->CreateFileSourceMMap(fd, fileSize);
     21    setBaseAddress("sourceBuffer", buffer);
     22    setBufferedSize("sourceBuffer", fileSize);   
     23    setScalarField("readableBuffer", buffer);
     24    setScalarField("fileSize", fileSize);
     25    iBuilder->CreateMAdvise(buffer, fileSize, CBuilder::MMAP_WILLNEED);
     26}
    2027
    21     BasicBlock * entryBlock = iBuilder->GetInsertBlock();
     28void MMapSourceKernel::generateDoSegmentMethod() {
     29
     30    BasicBlock * dropPages = CreateBasicBlock("dropPages");
     31    BasicBlock * produceData = CreateBasicBlock("produceData");
    2232    BasicBlock * setTermination = CreateBasicBlock("setTermination");
    2333    BasicBlock * mmapSourceExit = CreateBasicBlock("mmapSourceExit");
     34
     35    // instruct the OS that it can safely drop any fully consumed pages
     36    Value * consumed = getConsumedItemCount("sourceBuffer");
     37    Type * const consumedTy = consumed->getType();
     38    Type * const voidPtrTy = iBuilder->getVoidPtrTy();
     39
     40    // multiply the consumed count by the code unit size then mask off any partial pages
     41    if (mCodeUnitWidth > 8) {
     42        consumed = iBuilder->CreateMul(consumed, iBuilder->getSize(mCodeUnitWidth / 8));
     43    }
     44    const auto pageSize = getpagesize();
     45    if (LLVM_LIKELY((pageSize & (pageSize - 1)) == 0)) {
     46        consumed = iBuilder->CreateAnd(consumed, ConstantExpr::getNot(ConstantInt::get(consumedTy, pageSize - 1)));
     47    } else {
     48        consumed = iBuilder->CreateSub(consumed, iBuilder->CreateURem(consumed, ConstantInt::get(consumedTy, pageSize)));
     49    }
     50    Value * sourceBuffer = getBaseAddress("sourceBuffer");
     51    sourceBuffer = iBuilder->CreatePtrToInt(sourceBuffer, consumedTy);
     52    Value * consumedBuffer = iBuilder->CreateAdd(sourceBuffer, consumed);
     53    Value * readableBuffer = getScalarField("readableBuffer");
     54    readableBuffer = iBuilder->CreatePtrToInt(readableBuffer, consumedTy);
     55    Value * unnecessaryBytes = iBuilder->CreateSub(consumedBuffer, readableBuffer);
     56    // avoid calling madvise unless an actual page table change could occur
     57    Value * hasPagesToDrop = iBuilder->CreateICmpEQ(unnecessaryBytes, ConstantInt::getNullValue(unnecessaryBytes->getType()));
     58    iBuilder->CreateLikelyCondBr(hasPagesToDrop, produceData, dropPages);
     59
     60    iBuilder->SetInsertPoint(dropPages);
     61    iBuilder->CreateMAdvise(iBuilder->CreateIntToPtr(readableBuffer, voidPtrTy), unnecessaryBytes, CBuilder::MMAP_DONTNEED);   
     62    readableBuffer = iBuilder->CreateIntToPtr(iBuilder->CreateAdd(readableBuffer, unnecessaryBytes), voidPtrTy);
     63    setScalarField("readableBuffer", readableBuffer);
     64    iBuilder->CreateBr(produceData);
     65
     66    // determine whether or not we've exhausted the file buffer
     67    iBuilder->SetInsertPoint(produceData);
    2468    ConstantInt * segmentItems = iBuilder->getSize(mSegmentBlocks * iBuilder->getBitBlockWidth());
    25     Value * fileItems = getScalarField("fileSize");
    26     if (mCodeUnitWidth > 8) {
    27         fileItems = iBuilder->CreateUDiv(fileItems, iBuilder->getSize(mCodeUnitWidth / 8));
    28     }
     69    Value * const fileSize = getBufferedSize("sourceBuffer");
    2970    Value * produced = getProducedItemCount("sourceBuffer");
    3071    produced = iBuilder->CreateAdd(produced, segmentItems);
    31     Value * lessThanFullSegment = iBuilder->CreateICmpULT(fileItems, produced);
     72
     73    Value * lessThanFullSegment = iBuilder->CreateICmpULT(fileSize, produced);
    3274    iBuilder->CreateCondBr(lessThanFullSegment, setTermination, mmapSourceExit);
    3375    iBuilder->SetInsertPoint(setTermination);
     76
    3477    setTerminationSignal();
    3578    iBuilder->CreateBr(mmapSourceExit);
    3679
     80    // finally, set the "produced" count to reflect current position in the file
    3781    iBuilder->SetInsertPoint(mmapSourceExit);
     82    PHINode * itemsRead = iBuilder->CreatePHI(produced->getType(), 2);
     83    itemsRead->addIncoming(produced, produceData);
     84    itemsRead->addIncoming(fileSize, setTermination);
     85    setProducedItemCount("sourceBuffer", itemsRead);
     86}
    3887
    39     PHINode * itemsRead = iBuilder->CreatePHI(produced->getType(), 2);
    40     itemsRead->addIncoming(produced, entryBlock);
    41     itemsRead->addIncoming(fileItems, setTermination);
    42     setProducedItemCount("sourceBuffer", itemsRead);
     88void MMapSourceKernel::generateFinalizeMethod() {
     89    Value * buffer = getBaseAddress("sourceBuffer");
     90    Value * fileSize = getBufferedSize("sourceBuffer");
     91    iBuilder->CreateMUnmap(buffer, fileSize);
    4392}
    4493
     
    4796    {},
    4897    {Binding{iBuilder->getStreamSetTy(1, codeUnitWidth), "sourceBuffer"}},
    49     {Binding{iBuilder->getSizeTy(), "fileSize"}}, {}, {})
     98    {Binding{iBuilder->getInt32Ty(), "fileDescriptor"}}, {Binding{iBuilder->getSizeTy(), "fileSize"}}, {Binding{iBuilder->getVoidPtrTy(), "readableBuffer"}})
    5099, mSegmentBlocks(blocksPerSegment)
    51100, mCodeUnitWidth(codeUnitWidth) {
  • icGREP/icgrep-devel/icgrep/kernels/mmap_kernel.h

    r5398 r5418  
    2020    bool moduleIDisSignature() override {return true;}
    2121private:
    22     void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override;
     22    void generateInitializeMethod() override;
     23    void generateDoSegmentMethod() override;
     24    void generateFinalizeMethod() override;
    2325private:
    2426    const unsigned mSegmentBlocks;
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5411 r5418  
    4444    IntegerType * const sizeTy = iBuilder->getSizeTy();
    4545    PointerType * const voidPtrTy = iBuilder->getVoidPtrTy();
    46     const unsigned threads = codegen::ThreadNum;
    4746    Constant * nullVoidPtrVal = ConstantPointerNull::getNullValue(voidPtrTy);
    48 
    4947    std::vector<Type *> structTypes;
    5048
     
    8381    segNo->addIncoming(segOffset, entryBlock);
    8482
    85     Value * doFinal = iBuilder->getFalse();
     83    Value * terminated = iBuilder->getFalse();
    8684    Value * const nextSegNo = iBuilder->CreateAdd(segNo, iBuilder->getSize(1));
    8785
     
    9088
    9189    StreamSetBufferMap<Value *> producedPos;
     90    StreamSetBufferMap<Value *> consumedPos;
    9291
    9392    for (unsigned k = 0; k < n; ++k) {
     
    124123        iBuilder->SetInsertPoint(segmentLoopBody);
    125124        const auto & inputs = kernel->getStreamInputs();
    126         std::vector<Value *> args = {kernel->getInstance(), doFinal};
     125        std::vector<Value *> args = {kernel->getInstance(), terminated};
    127126        for (unsigned i = 0; i < inputs.size(); ++i) {
    128127            const auto f = producedPos.find(kernel->getStreamSetInputBuffer(i));
    129             if (LLVM_UNLIKELY(f == producedPos.end())) {
    130                 report_fatal_error(kernel->getName() + " uses stream set " + inputs[i].name + " prior to its definition");
    131             }
     128            assert (f != producedPos.end());
    132129            args.push_back(f->second);
    133130        }
    134131
    135         kernel->createDoSegmentCall(args);
     132        kernel->createDoSegmentCall(args);       
    136133        if (!kernel->hasNoTerminateAttribute()) {
    137             doFinal = iBuilder->CreateOr(doFinal, kernel->getTerminationSignal());
     134            terminated = iBuilder->CreateOr(terminated, kernel->getTerminationSignal());
    138135        }
    139136
    140137        const auto & outputs = kernel->getStreamOutputs();
    141138        for (unsigned i = 0; i < outputs.size(); ++i) {
    142             Value * const produced = kernel->getProducedItemCount(outputs[i].name, doFinal);
     139            Value * const produced = kernel->getProducedItemCount(outputs[i].name, terminated);
    143140            const StreamSetBuffer * const buf = kernel->getStreamSetOutputBuffer(i);
    144141            assert (producedPos.count(buf) == 0);
    145142            producedPos.emplace(buf, produced);
    146143        }
     144        for (unsigned i = 0; i < inputs.size(); ++i) {
     145            Value * const processedItemCount = kernel->getProcessedItemCount(inputs[i].name);
     146            const StreamSetBuffer * const buf = kernel->getStreamSetInputBuffer(i);
     147            auto f = consumedPos.find(buf);
     148            if (f == consumedPos.end()) {
     149                consumedPos.emplace(buf, processedItemCount);
     150            } else {
     151                Value * lesser = iBuilder->CreateICmpULT(processedItemCount, f->second);
     152                f->second = iBuilder->CreateSelect(lesser, processedItemCount, f->second);
     153            }
     154        }
    147155
    148156        kernel->releaseLogicalSegmentNo(nextSegNo);
     
    151159    assert (segmentLoopBody);
    152160    exitThreadBlock->moveAfter(segmentLoopBody);
    153     segNo->addIncoming(iBuilder->CreateAdd(segNo, iBuilder->getSize(threads)), segmentLoopBody);
    154     iBuilder->CreateCondBr(doFinal, exitThreadBlock, segmentLoop);
     161
     162    for (const auto consumed : consumedPos) {
     163        const StreamSetBuffer * const buf = consumed.first;
     164        KernelBuilder * k = buf->getProducer();
     165        const auto & outputs = k->getStreamSetOutputBuffers();
     166        for (unsigned i = 0; i < outputs.size(); ++i) {
     167            if (outputs[i] == buf) {
     168                k->setConsumedItemCount(k->getStreamOutputs()[i].name, consumed.second);
     169                break;
     170            }
     171        }
     172    }
     173
     174    segNo->addIncoming(iBuilder->CreateAdd(segNo, iBuilder->getSize(codegen::ThreadNum)), segmentLoopBody);
     175    iBuilder->CreateCondBr(terminated, exitThreadBlock, segmentLoop);
    155176
    156177    iBuilder->SetInsertPoint(exitThreadBlock);
     178
     179    // only call pthread_exit() within spawned threads; otherwise it'll be equivalent to calling exit() within the process
     180    BasicBlock * const exitThread = BasicBlock::Create(iBuilder->getContext(), "ExitThread", threadFunc);
     181    BasicBlock * const exitFunction = BasicBlock::Create(iBuilder->getContext(), "ExitProcessFunction", threadFunc);
     182
     183    Value * const exitCond = iBuilder->CreateICmpEQ(segOffset, ConstantInt::getNullValue(segOffset->getType()));
     184    iBuilder->CreateCondBr(exitCond, exitFunction, exitThread);
     185    iBuilder->SetInsertPoint(exitThread);
    157186    iBuilder->CreatePThreadExitCall(nullVoidPtrVal);
     187    iBuilder->CreateBr(exitFunction);
     188    iBuilder->SetInsertPoint(exitFunction);
    158189    iBuilder->CreateRetVoid();
    159190
     
    168199    // MAKE SEGMENT PARALLEL PIPELINE DRIVER
    169200    // -------------------------------------------------------------------------------------------------------------------------
     201    const unsigned threads = codegen::ThreadNum - 1;
     202    assert (codegen::ThreadNum > 1);
    170203    Type * const pthreadsTy = ArrayType::get(sizeTy, threads);
    171204    AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);
     
    186219    }
    187220
     221    // use the process thread to handle the initial segment function after spawning (n - 1) threads to handle the subsequent offsets
    188222    for (unsigned i = 0; i < threads; ++i) {
    189         AllocaInst * threadState = iBuilder->CreateAlloca(threadStructType);
    190         Value * const sharedStatePtr = iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    191         iBuilder->CreateStore(sharedStruct, sharedStatePtr);
    192         Value * const segmentOffsetPtr = iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
    193         iBuilder->CreateStore(iBuilder->getSize(i), segmentOffsetPtr);
     223        AllocaInst * const threadState = iBuilder->CreateAlloca(threadStructType);
     224        iBuilder->CreateStore(sharedStruct, iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
     225        iBuilder->CreateStore(iBuilder->getSize(i + 1), iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
    194226        iBuilder->CreatePThreadCreateCall(threadIdPtr[i], nullVoidPtrVal, threadFunc, threadState);
    195227    }
     228
     229    AllocaInst * const threadState = iBuilder->CreateAlloca(threadStructType);
     230    iBuilder->CreateStore(sharedStruct, iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
     231    iBuilder->CreateStore(iBuilder->getSize(0), iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
     232    iBuilder->CreateCall(threadFunc, iBuilder->CreatePointerCast(threadState, voidPtrTy));
    196233
    197234    AllocaInst * const status = iBuilder->CreateAlloca(voidPtrTy);
     
    369406
    370407        iBuilder->SetInsertPoint(exitThreadBlock);
     408
    371409        iBuilder->CreatePThreadExitCall(nullVoidPtrVal);
     410
    372411        iBuilder->CreateRetVoid();
    373412
     
    406445
    407446    StreamSetBufferMap<Value *> producedPos;
     447    StreamSetBufferMap<Value *> consumedPos;
    408448
    409449    iBuilder->CreateBr(pipelineLoop);
     
    412452    Value * terminated = iBuilder->getFalse();
    413453    for (auto & kernel : kernels) {
     454
    414455        const auto & inputs = kernel->getStreamInputs();
     456        const auto & outputs = kernel->getStreamOutputs();
     457
    415458        std::vector<Value *> args = {kernel->getInstance(), terminated};
    416459        for (unsigned i = 0; i < inputs.size(); ++i) {
     
    421464            args.push_back(f->second);
    422465        }
    423         Value * const segNo = kernel->acquireLogicalSegmentNo();
     466
    424467        kernel->createDoSegmentCall(args);
    425468        if (!kernel->hasNoTerminateAttribute()) {
    426469            terminated = iBuilder->CreateOr(terminated, kernel->getTerminationSignal());
    427470        }
    428         const auto & outputs = kernel->getStreamOutputs();
    429471        for (unsigned i = 0; i < outputs.size(); ++i) {
    430472            Value * const produced = kernel->getProducedItemCount(outputs[i].name, terminated);
     
    434476        }
    435477
     478        for (unsigned i = 0; i < inputs.size(); ++i) {
     479            Value * const processedItemCount = kernel->getProcessedItemCount(inputs[i].name);
     480            const StreamSetBuffer * const buf = kernel->getStreamSetInputBuffer(i);
     481            auto f = consumedPos.find(buf);
     482            if (f == consumedPos.end()) {
     483                consumedPos.emplace(buf, processedItemCount);
     484            } else {
     485                Value * lesser = iBuilder->CreateICmpULT(processedItemCount, f->second);
     486                f->second = iBuilder->CreateSelect(lesser, processedItemCount, f->second);
     487            }
     488        }
     489
     490        Value * const segNo = kernel->acquireLogicalSegmentNo();
    436491        kernel->releaseLogicalSegmentNo(iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
     492    }
     493
     494    for (const auto consumed : consumedPos) {
     495        const StreamSetBuffer * const buf = consumed.first;
     496        KernelBuilder * k = buf->getProducer();
     497        const auto & outputs = k->getStreamSetOutputBuffers();
     498        for (unsigned i = 0; i < outputs.size(); ++i) {
     499            if (outputs[i] == buf) {
     500                k->setConsumedItemCount(k->getStreamOutputs()[i].name, consumed.second);
     501                break;
     502            }
     503        }
    437504    }
    438505
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5402 r5418  
    4141// a continous buffer for the full segment (number of blocks).
    4242
    43 void expand3_4Kernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &) {
     43void expand3_4Kernel::generateDoSegmentMethod() {
    4444
    4545    BasicBlock * expand2_3entry = iBuilder->GetInsertBlock();
     
    9595    // process in multiples of 3 full blocks of data.
    9696    //
    97     Value * loopDivisor = iBuilder->CreateSelect(doFinal, triplePackSize, tripleBlockSize);
     97    Value * loopDivisor = iBuilder->CreateSelect(getIsFinal(), triplePackSize, tripleBlockSize);
    9898    Value * excessItems = iBuilder->CreateURem(itemsAvail, loopDivisor);
    9999    Value * loopItemsToDo = iBuilder->CreateSub(itemsAvail, excessItems);
     
    162162
    163163    // Except for final segment processing, we are done.
    164     iBuilder->CreateCondBr(doFinal, expand3_4_final, expand3_4_exit);
     164    iBuilder->CreateCondBr(getIsFinal(), expand3_4_final, expand3_4_exit);
    165165
    166166    // Final segment processing.   Less than a triplePack remains.
  • icGREP/icgrep-devel/icgrep/kernels/radix64.h

    r5297 r5418  
    2323    expand3_4Kernel(IDISA::IDISA_Builder * iBuilder);
    2424private:
    25     void generateDoSegmentMethod(llvm::Value *doFinal, const std::vector<llvm::Value *> &producerPos) override final;
     25    void generateDoSegmentMethod() override final;
    2626};
    2727
  • icGREP/icgrep-devel/icgrep/kernels/stdin_kernel.cpp

    r5411 r5418  
    1717namespace kernel {
    1818
    19 void StdInKernel::generateDoSegmentMethod(Value * /* doFinal */, const std::vector<Value *> & /* producerPos */) {
     19void StdInKernel::generateDoSegmentMethod() {
    2020
    2121    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
     
    3535    iBuilder->SetInsertPoint(readBlock);
    3636
     37//    Value * consumed = getConsumedItemCount("InputStream");
     38//    Value * remaining = iBuilder->CreateSub(itemsAlreadyRead, consumed);
     39
    3740    // how many pages are required to have enough data for the segment plus one overflow block?
    3841    const auto PageAlignedSegmentSize = round_up_to_nearest((mSegmentBlocks + 1) * iBuilder->getBitBlockWidth() * (mCodeUnitWidth / 8), getpagesize());
     
    4043    reserveBytes("InputStream", bytesToRead);
    4144    BasicBlock * const readExit = iBuilder->GetInsertBlock();
     45
     46
    4247    Value * const ptr = getRawOutputPointer("InputStream", iBuilder->getInt32(0), bufferedSize);
    4348    Value * const bytePtr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
     
    7580}
    7681
    77 void FileSourceKernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &) {
     82void FileSourceKernel::generateDoSegmentMethod() {
    7883
    7984    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
     
    101106}
    102107
    103 void FileSourceKernel::generateInitMethod() {
     108void FileSourceKernel::generateInitializeMethod() {
    104109    setBaseAddress("sourceBuffer", getScalarField("fileSource"));
    105110    setBufferedSize("sourceBuffer", getScalarField("fileSize"));
  • icGREP/icgrep-devel/icgrep/kernels/stdin_kernel.h

    r5398 r5418  
    1919    bool moduleIDisSignature() override { return true; }
    2020protected:
    21     void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override;
     21    void generateDoSegmentMethod() override;
    2222private:
    2323    unsigned mSegmentBlocks;
     
    3030    bool moduleIDisSignature() override { return true; }
    3131protected:
    32     void generateInitMethod() override;
    33     void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override;
     32    void generateInitializeMethod() override;
     33    void generateDoSegmentMethod() override;
    3434private:
    3535    unsigned mSegmentBlocks;
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r5415 r5418  
    1818// However, if the segment spans two memory areas (e.g., because of wraparound),
    1919// then two write calls are made.
    20 void StdOutKernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &producerPos) {
     20void StdOutKernel::generateDoSegmentMethod() {
    2121    PointerType * i8PtrTy = iBuilder->getInt8PtrTy();
    2222
     
    2424    Constant * itemBytes = iBuilder->getSize(mCodeUnitWidth / 8);
    2525    Value * processed = getProcessedItemCount("codeUnitBuffer");
    26     Value * itemsToDo = iBuilder->CreateSub(producerPos[0], processed);
     26    Value * itemsToDo = iBuilder->CreateSub(mAvailableItemCount[0], processed);
    2727    // There may be two memory areas if we are at the physical end of a circular buffer.
    2828    const auto b  = getInputStreamSetBuffer("codeUnitBuffer");
     
    5656        bytePtr = iBuilder->CreateGEP(bytePtr, byteOffset);
    5757
    58         itemsToDo = iBuilder->CreateSub(producerPos[0], processed);
     58        itemsToDo = iBuilder->CreateSub(mAvailableItemCount[0], processed);
    5959        iBuilder->CreateWriteCall(iBuilder->getInt32(1), bytePtr, iBuilder->CreateMul(itemsToDo, itemBytes));
    6060        processed = iBuilder->CreateAdd(processed, itemsToDo);
    61         setProcessedItemCount("codeUnitBuffer", producerPos[0]);
     61        setProcessedItemCount("codeUnitBuffer", mAvailableItemCount[0]);
    6262        iBuilder->CreateBr(stdoutExit);
    6363        iBuilder->SetInsertPoint(stdoutExit);
     
    7171}
    7272
    73 void FileSink::generateInitMethod() {
     73void FileSink::generateInitializeMethod() {
    7474    BasicBlock * setTerminationOnFailure = CreateBasicBlock("setTerminationOnFailure");
    7575    BasicBlock * fileSinkInitExit = CreateBasicBlock("fileSinkInitExit");
     
    9999}
    100100
    101 void FileSink::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &producerPos) {
     101void FileSink::generateDoSegmentMethod() {
    102102
    103103    PointerType * i8PtrTy = iBuilder->getInt8PtrTy();
     
    148148        iBuilder->SetInsertPoint(checkFinal);
    149149    }
    150     iBuilder->CreateCondBr(doFinal, closeFile, fileOutExit);
     150    iBuilder->CreateCondBr(mIsFinal, closeFile, fileOutExit);
    151151
    152152    iBuilder->SetInsertPoint(closeFile);
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.h

    r5292 r5418  
    1616    StdOutKernel(IDISA::IDISA_Builder * iBuilder, unsigned codeUnitWidth);
    1717private:
    18     void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override final;
     18    void generateDoSegmentMethod() override final;
    1919private:
    2020    const unsigned mCodeUnitWidth;
     
    2727    FileSink(IDISA::IDISA_Builder * iBuilder, unsigned codeUnitWidth);
    2828protected:
    29     void generateInitMethod() override final;
    30     void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override final;
     29    void generateInitializeMethod() override final;
     30    void generateDoSegmentMethod() override final;
    3131private:
    3232    const unsigned mCodeUnitWidth;
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5411 r5418  
    178178void SourceFileBuffer::setBaseAddress(Value * self, Value * addr) const {
    179179    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    180     iBuilder->CreateStore(addr, ptr);
     180    iBuilder->CreateStore(iBuilder->CreatePointerCast(addr, ptr->getType()->getPointerElementType()), ptr);
    181181}
    182182
  • icGREP/icgrep-devel/icgrep/kernels/toolchain.cpp

    r5414 r5418  
    2929#include <kernels/pipeline.h>
    3030#include <kernels/kernel.h>
    31 #ifdef CUDA_ENABLED
    32 #include <IR_Gen/llvm2ptx.h>
    33 #endif
     31#include <sys/stat.h>
    3432
    3533using namespace llvm;
     
    229227}
    230228
    231 
    232229void ParabixDriver::addKernelCall(kernel::KernelBuilder & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
    233     assert (mModuleMap.count(&kb) == 0);
     230    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
    234231    mPipeline.push_back(&kb);
    235     mModuleMap.emplace(&kb, kb.createKernelStub(inputs, outputs));
     232    kb.createKernelStub(inputs, outputs);
    236233}
    237234
    238235void ParabixDriver::makeKernelCall(kernel::KernelBuilder * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
    239     assert (mModuleMap.count(kb) == 0);
     236    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
    240237    mPipeline.push_back(kb);
    241     mModuleMap.emplace(kb, kb->createKernelStub(inputs, outputs));
     238    kb->createKernelStub(inputs, outputs);
    242239}
    243240
     
    272269    }
    273270    for (const auto & k : mPipeline) {
    274         k->terminateInstance();
    275     }
    276 }
    277 
    278 void ParabixDriver::addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, FunctionType *type, void * functionPtr) const {
    279     const auto f = mModuleMap.find(&kb);
    280     assert ("addKernelCall(kb, ...) must be called before addExternalLink(kb, ...)" && f != mModuleMap.end());
    281     mEngine->addGlobalMapping(cast<Function>(f->second->getOrInsertFunction(name, type)), functionPtr);
     271        k->finalizeInstance();
     272    }
     273}
     274
     275void ParabixDriver::addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
     276    assert ("addKernelCall or makeKernelCall must be called before addExternalLink" && (kb.getModule() != nullptr));
     277    mEngine->addGlobalMapping(cast<Function>(kb.getModule()->getOrInsertFunction(name, type)), functionPtr);
     278}
     279
     280uint64_t file_size(const uint32_t fd) {
     281    struct stat st;
     282    if (LLVM_UNLIKELY(fstat(fd, &st) != 0)) {
     283        st.st_size = 0;
     284    }
     285    return st.st_size;
    282286}
    283287
     
    322326    #endif
    323327
     328    FunctionType * fileSizeType = FunctionType::get(iBuilder->getInt64Ty(), { iBuilder->getInt32Ty() });
     329    mEngine->addGlobalMapping(cast<Function>(mMainModule->getOrInsertFunction("file_size", fileSizeType)), (void *)&file_size);
     330
    324331    PM.run(*m);
    325     for (auto pair : mModuleMap) {
    326         kernel::KernelBuilder * const kb = std::get<0>(pair);
    327         m = std::get<1>(pair);
     332    for (kernel::KernelBuilder * const kb : mPipeline) {
     333        m = kb->getModule();
    328334        bool uncachedObject = true;
    329335        if (mCache) {
     
    352358    } catch (...) { m->dump(); throw; }
    353359    #endif
    354     mModuleMap.clear();
    355360}
    356361
  • icGREP/icgrep-devel/icgrep/kernels/toolchain.h

    r5414 r5418  
    1010#include <IR_Gen/idisa_builder.h>
    1111#include <llvm/IR/TypeBuilder.h>
     12#include <kernels/kernel.h>
     13#include <kernels/streamset.h>
    1214#include <boost/container/flat_map.hpp>
    1315
     
    1921namespace IDISA { class IDISA_Builder; }
    2022namespace kernel { class KernelBuilder; }
    21 //namespace parabix { class StreamSetBuffer; }
    22 #include <kernels/streamset.h>
     23
    2324class ParabixObjectCache;
    2425
     
    8283    void addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, ExternalFunctionType * functionPtr) const;
    8384
    84     void addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const;
    85 
    8685    void linkAndFinalize();
    8786   
    8887    void * getPointerToMain();
     88
     89private:
     90
     91
     92    void addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const;
     93
    8994
    9095private:
     
    98103    std::vector<std::unique_ptr<kernel::KernelBuilder>> mOwnedKernels;
    99104    std::vector<std::unique_ptr<parabix::StreamSetBuffer>> mOwnedBuffers;
    100     ModuleMap                               mModuleMap;
    101105};
    102106
Note: See TracChangeset for help on using the changeset viewer.