Ignore:
Timestamp:
Apr 22, 2017, 4:03:25 PM (2 years ago)
Author:
nmedfort
Message:

Removed non-functional CUDA code from icgrep and consolidated grep and multigrep mode into a single function; allowed segment parallel pipeline to utilize process as its initial thread; modified MMapSourceKernel to map and perform mmap directly and advise the OS to drop consumed data streams.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5411 r5418  
    3434static const std::string BUFFER_PTR_SUFFIX = "_bufferPtr";
    3535
    36 static const std::string CONSUMER_LOGICAL_SEGMENT_SUFFIX = "_cls";
     36static const std::string CONSUMER_SUFFIX = "_cls";
    3737
    3838using namespace llvm;
     
    5555unsigned KernelBuilder::addUnnamedScalar(Type * const type) {
    5656    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    57         report_fatal_error("Cannot add unnamed kernel field after kernel state finalized");
     57        report_fatal_error("Cannot add unnamed field  to " + getName() + " after kernel state finalized");
    5858    }
    5959    const auto index = mKernelFields.size();
     
    7272   
    7373void KernelBuilder::prepareKernel() {
    74 
    7574    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    7675        report_fatal_error("Cannot prepare kernel after kernel state finalized");
    7776    }
    78 
    7977    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
    8078        std::string tmp;
     
    9795        }
    9896        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
    99         if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
     97        if ((i == 0) || mStreamSetInputs[i].rate.isUnknown()) {
    10098            addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
    10199        }       
     
    124122    Type * const consumerSetTy = StructType::get(sizeTy, sizeTy->getPointerTo()->getPointerTo(), nullptr)->getPointerTo();
    125123    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    126         addScalar(consumerSetTy, mStreamSetOutputs[i].name + CONSUMER_LOGICAL_SEGMENT_SUFFIX);
     124        addScalar(consumerSetTy, mStreamSetOutputs[i].name + CONSUMER_SUFFIX);
    127125    }
    128126
     
    130128    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
    131129
     130    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     131        addScalar(sizeTy, mStreamSetOutputs[i].name + CONSUMED_ITEM_COUNT_SUFFIX);
     132    }
     133
    132134    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
    133135}
    134136
    135 Module * KernelBuilder::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
    136 
     137void KernelBuilder::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
     138
     139    assert (mModule == nullptr);
    137140    assert (mStreamSetInputBuffers.empty());
    138141    assert (mStreamSetOutputBuffers.empty());
     
    188191    prepareKernel();
    189192
    190     Module * const m = new Module(cacheName.str(), iBuilder->getContext());
    191     m->setTargetTriple(iBuilder->getModule()->getTargetTriple());
    192     return m;
     193    mModule = new Module(cacheName.str(), iBuilder->getContext());
     194    mModule->setTargetTriple(iBuilder->getModule()->getTargetTriple());
    193195}
    194196
     
    213215        auto savePoint = iBuilder->saveIP();
    214216        addKernelDeclarations(iBuilder->getModule());
    215         callGenerateInitMethod();
     217        callGenerateInitializeMethod();
    216218        callGenerateDoSegmentMethod();       
    217         // Implement the accumulator get functions
    218         for (auto binding : mScalarOutputs) {
    219             Function * f = getAccumulatorFunction(binding.name);
    220             iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, f));
    221             Value * self = &*(f->arg_begin());
    222             Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
    223             Value * retVal = iBuilder->CreateLoad(ptr);
    224             iBuilder->CreateRet(retVal);
    225         }
    226         callGenerateTerminateMethod();
     219        callGenerateFinalizeMethod();
    227220        iBuilder->restoreIP(savePoint);
    228221        setInstance(saveInstance);
     
    231224}
    232225
    233 void KernelBuilder::callGenerateDoSegmentMethod() {
    234     mCurrentMethod = getDoSegmentFunction();
    235     iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
    236     auto args = mCurrentMethod->arg_begin();
    237     setInstance(&*(args++));
    238     Value * doFinal = &*(args++);
    239     std::vector<Value *> producerPos;
    240     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    241         producerPos.push_back(&*(args++));
    242     }
    243     generateDoSegmentMethod(doFinal, producerPos); // must be overridden by the KernelBuilder subtype
    244     iBuilder->CreateRetVoid();
    245 }
    246 
    247 void KernelBuilder::callGenerateInitMethod() {
     226inline void KernelBuilder::callGenerateInitializeMethod() {
    248227    mCurrentMethod = getInitFunction();
    249228    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
     
    257236        setConsumerState(binding.name, &*(args++));
    258237    }
    259     generateInitMethod();
     238    generateInitializeMethod();
    260239    iBuilder->CreateRetVoid();
    261240}
    262241
    263 void KernelBuilder::callGenerateTerminateMethod() {
    264     mCurrentMethod = getTerminateFunction();
    265     iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
     242inline void KernelBuilder::callGenerateDoSegmentMethod() {
     243    mCurrentMethod = getDoSegmentFunction();
     244    BasicBlock * const entry = CreateBasicBlock(getName() + "_entry");
     245    iBuilder->SetInsertPoint(entry);
    266246    auto args = mCurrentMethod->arg_begin();
    267247    setInstance(&*(args++));
    268     generateTerminateMethod(); // may be overridden by the KernelBuilder subtype
     248    mIsFinal = &*(args++);
     249    const auto n = mStreamSetInputs.size();
     250    mAvailableItemCount.resize(n, nullptr);
     251    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     252        mAvailableItemCount[i] = &*(args++);
     253    }
     254    generateDoSegmentMethod(); // must be overridden by the KernelBuilder subtype
     255    mIsFinal = nullptr;
     256    mAvailableItemCount.clear();
    269257    iBuilder->CreateRetVoid();
     258}
     259
     260inline void KernelBuilder::callGenerateFinalizeMethod() {
     261    mCurrentMethod = getTerminateFunction();
     262    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
     263    auto args = mCurrentMethod->arg_begin();
     264    setInstance(&*(args++));
     265    generateFinalizeMethod(); // may be overridden by the KernelBuilder subtype
     266    const auto n = mScalarOutputs.size();
     267    if (n == 0) {
     268        iBuilder->CreateRetVoid();
     269    } else {
     270        Value * outputs[n];
     271        for (unsigned i = 0; i < n; ++i) {
     272            outputs[i] = getScalarField(mScalarOutputs[i].name);
     273        }
     274        if (n == 1) {
     275            iBuilder->CreateRet(outputs[0]);
     276        } else {
     277            iBuilder->CreateAggregateRet(outputs, n);
     278        }
     279    }
    270280}
    271281
     
    307317
    308318llvm::Value * KernelBuilder::getAvailableItemCount(const std::string & name) const {
    309     auto arg = mCurrentMethod->arg_begin();
    310     ++arg; // self
    311     ++arg; // doFinal
    312319    for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
    313320        if (mStreamSetInputs[i].name == name) {
    314             return &*arg;
    315         }
    316         ++arg;
     321            return mAvailableItemCount[i];
     322        }
    317323    }
    318324    return nullptr;
     
    334340}
    335341
     342Value * KernelBuilder::getConsumedItemCount(const std::string & name) const {
     343    return getScalarField(name + CONSUMED_ITEM_COUNT_SUFFIX);
     344}
     345
    336346void KernelBuilder::setProducedItemCount(const std::string & name, Value * value) const {
    337347    setScalarField(name + PRODUCED_ITEM_COUNT_SUFFIX, value);
     
    342352}
    343353
     354void KernelBuilder::setConsumedItemCount(const std::string & name, Value * value) const {
     355    setScalarField(name + CONSUMED_ITEM_COUNT_SUFFIX, value);
     356}
     357
    344358Value * KernelBuilder::getTerminationSignal() const {
    345359    return getScalarField(TERMINATION_SIGNAL);
     
    359373
    360374llvm::Value * KernelBuilder::getConsumerState(const std::string & name) const {
    361     return getScalarField(name + CONSUMER_LOGICAL_SEGMENT_SUFFIX);
     375    return getScalarField(name + CONSUMER_SUFFIX);
    362376}
    363377
    364378void KernelBuilder::setConsumerState(const std::string & name, llvm::Value * value) const {
    365     setScalarField(name + CONSUMER_LOGICAL_SEGMENT_SUFFIX, value);
     379    setScalarField(name + CONSUMER_SUFFIX, value);
    366380}
    367381
     
    443457}
    444458
    445 void KernelBuilder::setBaseAddress(const std::string & name, llvm::Value * addr) const {
    446     unsigned index; Port port;
    447     std::tie(port, index) = getStreamPort(name);
    448     const StreamSetBuffer * buf = nullptr;
    449     if (port == Port::Input) {
    450         assert (index < mStreamSetInputBuffers.size());
    451         buf = mStreamSetInputBuffers[index];
    452     } else {
    453         assert (index < mStreamSetOutputBuffers.size());
    454         buf = mStreamSetOutputBuffers[index];
    455     }
    456     return buf->setBaseAddress(getStreamSetBufferPtr(name), addr);
     459Value * KernelBuilder::getBaseAddress(const std::string & name) const {
     460    return getAnyStreamSetBuffer(name)->getBaseAddress(getStreamSetBufferPtr(name));
     461}
     462
     463void KernelBuilder::setBaseAddress(const std::string & name, Value * const addr) const {
     464    return getAnyStreamSetBuffer(name)->setBaseAddress(getStreamSetBufferPtr(name), addr);
    457465}
    458466
    459467Value * KernelBuilder::getBufferedSize(const std::string & name) const {
    460     unsigned index; Port port;
    461     std::tie(port, index) = getStreamPort(name);
    462     const StreamSetBuffer * buf = nullptr;
    463     if (port == Port::Input) {
    464         assert (index < mStreamSetInputBuffers.size());
    465         buf = mStreamSetInputBuffers[index];
    466     } else {
    467         assert (index < mStreamSetOutputBuffers.size());
    468         buf = mStreamSetOutputBuffers[index];
    469     }
    470     return buf->getBufferedSize(getStreamSetBufferPtr(name));
     468    return getAnyStreamSetBuffer(name)->getBufferedSize(getStreamSetBufferPtr(name));
    471469}
    472470
     
    491489}
    492490
    493 KernelBuilder::StreamPort KernelBuilder::getStreamPort(const std::string & name) const {
    494     const auto f = mStreamMap.find(name);
    495     if (LLVM_UNLIKELY(f == mStreamMap.end())) {
    496         report_fatal_error(getName() + " does not contain stream set: " + name);
    497     }
    498     return f->second;
    499 }
    500 
    501491Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
    502492    return getScalarField(name + BUFFER_PTR_SUFFIX);
     
    513503
    514504CallInst * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) const {
    515     assert (getDoSegmentFunction()->getArgumentList().size() == args.size());
    516     return iBuilder->CreateCall(getDoSegmentFunction(), args);
    517 }
    518 
    519 CallInst * KernelBuilder::createGetAccumulatorCall(const std::string & accumName) const {
    520     return iBuilder->CreateCall(getAccumulatorFunction(accumName), { getInstance() });
     505    Function * const doSegment = getDoSegmentFunction();
     506    assert (doSegment->getArgumentList().size() == args.size());
     507    return iBuilder->CreateCall(doSegment, args);
     508}
     509
     510Value * KernelBuilder::getAccumulator(const std::string & accumName) const {
     511    if (LLVM_UNLIKELY(mOutputScalarResult == nullptr)) {
     512        report_fatal_error("Cannot get accumulator " + accumName + " until " + getName() + " has terminated.");
     513    }
     514    const auto n = mScalarOutputs.size();
     515    if (LLVM_UNLIKELY(n == 0)) {
     516        report_fatal_error(getName() + " has no output scalars.");
     517    } else {
     518        for (unsigned i = 0; i < n; ++i) {
     519            const Binding & b = mScalarOutputs[i];
     520            if (b.name == accumName) {
     521                if (n == 1) {
     522                    return mOutputScalarResult;
     523                } else {
     524                    return iBuilder->CreateExtractValue(mOutputScalarResult, {i});
     525                }
     526            }
     527        }
     528        report_fatal_error(getName() + " has no output scalar named " + accumName);
     529    }
    521530}
    522531
     
    576585    StructType * const consumerTy = StructType::get(sizeTy, sizePtrPtrTy, nullptr);
    577586    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
    578         const auto & consumers = mStreamSetOutputBuffers[i]->getConsumers();
     587        const auto output = mStreamSetOutputBuffers[i];
     588        const auto & consumers = output->getConsumers();
    579589        const auto n = consumers.size();
    580590        AllocaInst * const outputConsumers = iBuilder->CreateAlloca(consumerTy);
     
    582592        for (unsigned i = 0; i < n; ++i) {
    583593            KernelBuilder * const consumer = consumers[i];
    584             assert (consumer->getInstance());
    585             Value * const segNo = consumer->getScalarFieldPtr(consumer->getInstance(), LOGICAL_SEGMENT_NO_SCALAR);
    586             iBuilder->CreateStore(segNo, iBuilder->CreateGEP(consumerSegNoArray, { iBuilder->getInt32(0), iBuilder->getInt32(i) }));
     594            assert ("all instances must be created prior to initialization of any instance" && consumer->getInstance());
     595            Value * const segmentNoPtr = consumer->getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR);
     596            iBuilder->CreateStore(segmentNoPtr, iBuilder->CreateGEP(consumerSegNoArray, { iBuilder->getInt32(0), iBuilder->getInt32(i) }));
    587597        }
    588598        Value * const consumerCountPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     
    592602        args.push_back(outputConsumers);
    593603    }
     604
     605
    594606    iBuilder->CreateCall(getInitFunction(), args);
    595 }
    596 
    597 void KernelBuilder::terminateInstance() {
    598     iBuilder->CreateCall(getTerminateFunction(), { getInstance() });
    599607}
    600608
     
    602610//  each block of the given number of blocksToDo, and then updates counts.
    603611
    604 void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
     612void BlockOrientedKernel::generateDoSegmentMethod() {
    605613
    606614    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
     
    613621    Value * baseTarget = nullptr;
    614622    if (useIndirectBr()) {
    615         baseTarget = iBuilder->CreateSelect(doFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
     623        baseTarget = iBuilder->CreateSelect(mIsFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
    616624    }
    617625
    618626    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
    619     Value * availablePos = producerPos[0];
     627    Value * availablePos = mAvailableItemCount[0];
    620628    Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
    621629    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
     
    674682        mStrideLoopBranch->addDestination(segmentDone);
    675683    } else {
    676         iBuilder->CreateUnlikelyCondBr(doFinal, doFinalBlock, segmentDone);
     684        iBuilder->CreateUnlikelyCondBr(mIsFinal, doFinalBlock, segmentDone);
    677685    }
    678686
     
    681689    iBuilder->SetInsertPoint(doFinalBlock);
    682690
    683     Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(mStreamSetInputs[0].name));
     691    Value * remainingItems = iBuilder->CreateSub(mAvailableItemCount[0], getProcessedItemCount(mStreamSetInputs[0].name));
    684692    writeFinalBlockMethod(remainingItems);
    685693
    686     itemsDone = producerPos[0];
     694    itemsDone = mAvailableItemCount[0];
    687695    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
    688696    setTerminationSignal();
     
    839847}
    840848
     849void KernelBuilder::finalizeInstance() {
     850    mOutputScalarResult = iBuilder->CreateCall(getTerminateFunction(), { getInstance() });
     851}
     852
     853KernelBuilder::StreamPort KernelBuilder::getStreamPort(const std::string & name) const {
     854    const auto f = mStreamMap.find(name);
     855    if (LLVM_UNLIKELY(f == mStreamMap.end())) {
     856        report_fatal_error(getName() + " does not contain stream set " + name);
     857    }
     858    return f->second;
     859}
    841860
    842861// CONSTRUCTOR
     
    849868                             std::vector<Binding> && internal_scalars)
    850869: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
     870, mModule(nullptr)
    851871, mCurrentMethod(nullptr)
    852872, mNoTerminateAttribute(false)
    853 , mIsGenerated(false) {
     873, mIsGenerated(false)
     874, mIsFinal(nullptr)
     875, mOutputScalarResult(nullptr) {
    854876
    855877}
     
    886908
    887909}
     910
Note: See TracChangeset for help on using the changeset viewer.