Ignore:
Timestamp:
Apr 18, 2017, 12:51:26 PM (2 years ago)
Author:
nmedfort
Message:

Potential bug fix for 32-bit. Modified MRemap to check for Linux OS support. Added MMapAdvise to CBuilder.

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5408 r5411  
    1515
    1616static const auto INIT_SUFFIX = "_Init";
     17
     18static const auto TERMINATE_SUFFIX = "_Terminate";
    1719
    1820static const auto DO_SEGMENT_SUFFIX = "_DoSegment";
     
    99101    for (auto binding : mStreamSetOutputs) {
    100102        args->setName(binding.name + "ConsumerLogicalSegments");       
    101 //        args->addAttr(Attribute::NoCapture);
    102 //        args->addAttr(Attribute::ReadOnly);
    103103        ++args;
    104104    }
     
    119119        (++args)->setName(input.name + "AvailableItems");
    120120    }
     121
     122    // Create the terminate function prototype
     123    FunctionType * terminateType = FunctionType::get(iBuilder->getVoidTy(), {selfType}, false);
     124    Function * terminateFunc = Function::Create(terminateType, GlobalValue::ExternalLinkage, getName() + TERMINATE_SUFFIX, client);
     125    terminateFunc->setCallingConv(CallingConv::C);
     126    terminateFunc->setDoesNotThrow();
     127    terminateFunc->setDoesNotCapture(1);
     128    args = terminateFunc->arg_begin();
     129    args->setName("self");
    121130
    122131    /// INVESTIGATE: replace the accumulator methods with a single Exit method that handles any clean up and returns
     
    168177    return f;
    169178}
     179
     180Function * KernelInterface::getTerminateFunction() const {
     181    const auto name = getName() + TERMINATE_SUFFIX;
     182    Function * f = iBuilder->getModule()->getFunction(name);
     183    if (LLVM_UNLIKELY(f == nullptr)) {
     184        llvm::report_fatal_error("Cannot find " + name);
     185    }
     186    return f;
     187}
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5408 r5411  
    104104    virtual void initializeInstance() = 0;
    105105
     106    virtual void terminateInstance() = 0;
     107
    106108    void setInitialArguments(std::vector<llvm::Value *> args);
    107109
     
    139141
    140142    llvm::Function * getAccumulatorFunction(const std::string & accumName) const;
     143
     144    llvm::Function * getTerminateFunction() const;
    141145
    142146protected:
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5408 r5411  
    187187
    188188    prepareKernel();
    189     return new Module(cacheName.str(), iBuilder->getContext());
     189
     190    Module * const m = new Module(cacheName.str(), iBuilder->getContext());
     191    m->setTargetTriple(iBuilder->getModule()->getTargetTriple());
     192    return m;
    190193}
    191194
     
    204207
    205208void KernelBuilder::generateKernel() {
    206     // If the module id is cannot uniquely identify this kernel, "generateKernelSignature()" will have already
     209    // If the module id cannot uniquely identify this kernel, "generateKernelSignature()" will have already
    207210    // generated the unoptimized IR.
    208211    if (!mIsGenerated) {
     212        auto saveInstance = getInstance();
    209213        auto savePoint = iBuilder->saveIP();
    210214        addKernelDeclarations(iBuilder->getModule());
    211215        callGenerateInitMethod();
    212         callGenerateDoSegmentMethod();
     216        callGenerateDoSegmentMethod();       
    213217        // Implement the accumulator get functions
    214218        for (auto binding : mScalarOutputs) {
     
    220224            iBuilder->CreateRet(retVal);
    221225        }
     226        callGenerateTerminateMethod();
    222227        iBuilder->restoreIP(savePoint);
     228        setInstance(saveInstance);
    223229        mIsGenerated = true;       
    224230    }
     
    252258    }
    253259    generateInitMethod();
     260    iBuilder->CreateRetVoid();
     261}
     262
     263void KernelBuilder::callGenerateTerminateMethod() {
     264    mCurrentMethod = getTerminateFunction();
     265    iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
     266    auto args = mCurrentMethod->arg_begin();
     267    setInstance(&*(args++));
     268    generateTerminateMethod(); // may be overridden by the KernelBuilder subtype
    254269    iBuilder->CreateRetVoid();
    255270}
     
    502517}
    503518
    504 CallInst * KernelBuilder::createGetAccumulatorCall(Value * self, const std::string & accumName) const {
    505     return iBuilder->CreateCall(getAccumulatorFunction(accumName), {self});
     519CallInst * KernelBuilder::createGetAccumulatorCall(const std::string & accumName) const {
     520    return iBuilder->CreateCall(getAccumulatorFunction(accumName), { getInstance() });
    506521}
    507522
     
    519534
    520535void KernelBuilder::initializeInstance() {
     536
     537
    521538    if (LLVM_UNLIKELY(getInstance() == nullptr)) {
    522539        report_fatal_error("Cannot initialize " + getName() + " before calling createInstance()");
     
    558575    PointerType * const sizePtrPtrTy = sizePtrTy->getPointerTo();
    559576    StructType * const consumerTy = StructType::get(sizeTy, sizePtrPtrTy, nullptr);
    560     Constant * const sizeOfSizePtrTy = ConstantExpr::getSizeOf(sizePtrTy);
    561 
    562577    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
    563578        const auto & consumers = mStreamSetOutputBuffers[i]->getConsumers();
     579        const auto n = consumers.size();
    564580        AllocaInst * const outputConsumers = iBuilder->CreateAlloca(consumerTy);
    565         Value * const numPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    566         const auto n = consumers.size();
    567         const auto consumerCount = iBuilder->getSize(n);
    568         iBuilder->CreateStore(consumerCount, numPtr);
    569         Value * const consumerPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
    570         Value * const segNoPtrs = iBuilder->CreatePointerCast(iBuilder->CreateMalloc(ConstantExpr::getMul(consumerCount, sizeOfSizePtrTy)), sizePtrPtrTy);
    571         iBuilder->CreateStore(segNoPtrs, consumerPtr);
     581        Value * const consumerSegNoArray = iBuilder->CreateAlloca(ArrayType::get(sizePtrTy, n));
    572582        for (unsigned i = 0; i < n; ++i) {
    573583            KernelBuilder * const consumer = consumers[i];
    574584            assert (consumer->getInstance());
    575             iBuilder->CreateStore(consumer->getScalarFieldPtr(consumer->getInstance(), LOGICAL_SEGMENT_NO_SCALAR), iBuilder->CreateGEP(segNoPtrs, iBuilder->getSize(i)));
    576         }
     585            Value * const segNo = consumer->getScalarFieldPtr(consumer->getInstance(), LOGICAL_SEGMENT_NO_SCALAR);
     586            iBuilder->CreateStore(segNo, iBuilder->CreateGEP(consumerSegNoArray, { iBuilder->getInt32(0), iBuilder->getInt32(i) }));
     587        }
     588        Value * const consumerCountPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     589        iBuilder->CreateStore(iBuilder->getSize(n), consumerCountPtr);
     590        Value * const consumerSegNoArrayPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
     591        iBuilder->CreateStore(iBuilder->CreatePointerCast(consumerSegNoArray, sizePtrPtrTy), consumerSegNoArrayPtr);
    577592        args.push_back(outputConsumers);
    578593    }
    579594    iBuilder->CreateCall(getInitFunction(), args);
     595}
     596
     597void KernelBuilder::terminateInstance() {
     598    iBuilder->CreateCall(getTerminateFunction(), { getInstance() });
    580599}
    581600
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5408 r5411  
    8080    void initializeInstance() final;
    8181
     82    void terminateInstance() final;
     83
    8284    llvm::Value * getProducedItemCount(const std::string & name, llvm::Value * doFinal = nullptr) const final;
    8385
     
    121123        iBuilder->CreateStore(value, getScalarFieldPtr(index));
    122124    }
    123 
    124125
    125126    // Synchronization actions for executing a kernel for a particular logical segment.
     
    163164    llvm::CallInst * createDoSegmentCall(const std::vector<llvm::Value *> & args) const;
    164165
    165     llvm::CallInst * createGetAccumulatorCall(llvm::Value * self, const std::string & accumName) const;
     166    llvm::CallInst * createGetAccumulatorCall(const std::string & accumName) const;
    166167
    167168    virtual ~KernelBuilder() = 0;
     
    201202    virtual void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) = 0;
    202203
     204    virtual void generateTerminateMethod() { }
     205
    203206    // Add an additional scalar field to the KernelState struct.
    204207    // Must occur before any call to addKernelDeclarations or createKernelModule.
     
    284287    void callGenerateDoSegmentMethod();
    285288
     289    void callGenerateTerminateMethod();
    286290
    287291private:
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5408 r5411  
    2222using FlatSet = boost::container::flat_set<Value>;
    2323
    24 Function * makeThreadFunction(const std::string & name, Module * const m) {
    25     LLVMContext & C = m->getContext();
    26     Type * const voidTy = Type::getVoidTy(C);
    27     PointerType * const int8PtrTy = Type::getInt8PtrTy(C);
    28     Function * const f = Function::Create(FunctionType::get(voidTy, {int8PtrTy}, false), Function::InternalLinkage, name, m);
     24Function * makeThreadFunction(IDISA::IDISA_Builder * const b, const std::string & name) {
     25    Function * const f = Function::Create(FunctionType::get(b->getVoidTy(), {b->getVoidPtrTy()}, false), Function::InternalLinkage, name, b->getModule());
    2926    f->setCallingConv(CallingConv::C);
    3027    f->arg_begin()->setName("input");
     
    4744    IntegerType * const sizeTy = iBuilder->getSizeTy();
    4845    PointerType * const voidPtrTy = iBuilder->getVoidPtrTy();
    49     PointerType * const int8PtrTy = iBuilder->getInt8PtrTy();
    5046    const unsigned threads = codegen::ThreadNum;
    5147    Constant * nullVoidPtrVal = ConstantPointerNull::getNullValue(voidPtrTy);
    5248
    53     assert (!kernels.empty());
    54 
    5549    std::vector<Type *> structTypes;
    5650
     
    6357    StructType * const threadStructType = StructType::get(sharedStructType->getPointerTo(), sizeTy, nullptr);
    6458
    65     Function * const threadFunc = makeThreadFunction("segment", m);
     59    Function * const threadFunc = makeThreadFunction(iBuilder, "segment");
    6660
    6761    // -------------------------------------------------------------------------------------------------------------------------
     
    130124        iBuilder->SetInsertPoint(segmentLoopBody);
    131125        const auto & inputs = kernel->getStreamInputs();
    132         const auto & outputs = kernel->getStreamOutputs();
    133126        std::vector<Value *> args = {kernel->getInstance(), doFinal};
    134127        for (unsigned i = 0; i < inputs.size(); ++i) {
     
    140133        }
    141134
    142         CallInst * ci = kernel->createDoSegmentCall(args);
    143         // TODO: investigate whether this actually inlines the function call correctly despite being in a seperate module.
    144         ci->addAttribute(AttributeSet::FunctionIndex, Attribute::AlwaysInline);
    145 
     135        kernel->createDoSegmentCall(args);
    146136        if (!kernel->hasNoTerminateAttribute()) {
    147137            doFinal = iBuilder->CreateOr(doFinal, kernel->getTerminationSignal());
    148138        }
     139
     140        const auto & outputs = kernel->getStreamOutputs();
    149141        for (unsigned i = 0; i < outputs.size(); ++i) {
    150142            Value * const produced = kernel->getProducedItemCount(outputs[i].name, doFinal);
     
    203195    }
    204196
    205     AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
     197    AllocaInst * const status = iBuilder->CreateAlloca(voidPtrTy);
    206198    for (unsigned i = 0; i < threads; ++i) {
    207199        Value * threadId = iBuilder->CreateLoad(threadIdPtr[i]);
     
    219211    IntegerType * const sizeTy = iBuilder->getSizeTy();
    220212    PointerType * const voidPtrTy = iBuilder->getVoidPtrTy();
    221     PointerType * const int8PtrTy = iBuilder->getInt8PtrTy();
    222213    ConstantInt * bufferSegments = ConstantInt::get(sizeTy, codegen::BufferSegments - 1);
    223214    ConstantInt * segmentItems = ConstantInt::get(sizeTy, codegen::SegmentSize * iBuilder->getBitBlockWidth());
     
    295286        const auto & inputs = kernel->getStreamInputs();
    296287
    297         Function * const threadFunc = makeThreadFunction("ppt:" + kernel->getName(), m);
     288        Function * const threadFunc = makeThreadFunction(iBuilder, "ppt:" + kernel->getName());
    298289
    299290         // Create the basic blocks for the thread function.
     
    394385    }
    395386
    396     AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
     387    AllocaInst * const status = iBuilder->CreateAlloca(voidPtrTy);
    397388    for (unsigned i = 0; i < n; ++i) {
    398389        Value * threadId = iBuilder->CreateLoad(threadIdPtr[i]);
     
    409400    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    410401    Function * main = entryBlock->getParent();
    411 
    412     assert (!kernels.empty());
    413402
    414403    // Create the basic blocks for the loop.
     
    432421            args.push_back(f->second);
    433422        }
     423        Value * const segNo = kernel->acquireLogicalSegmentNo();
    434424        kernel->createDoSegmentCall(args);
    435425        if (!kernel->hasNoTerminateAttribute()) {
     
    444434        }
    445435
    446         Value * const segNo = kernel->acquireLogicalSegmentNo();
    447436        kernel->releaseLogicalSegmentNo(iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
    448437    }
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5398 r5411  
    4545    BasicBlock * const scanWordExit = CreateBasicBlock("ScanWordExit");
    4646    IntegerType * const sizeTy = iBuilder->getSizeTy();
    47     PointerType * const codeUnitTy = iBuilder->getIntNTy(mCodeUnitWidth)->getPointerTo();
    4847    const unsigned fieldCount = iBuilder->getBitBlockWidth() / sizeTy->getBitWidth();
    4948    VectorType * const scanwordVectorType =  VectorType::get(sizeTy, fieldCount);
     
    5251    Value * const lastRecordStart = getProcessedItemCount("InputStream");
    5352    Value * const lastRecordNum = getScalarField("LineNum");
    54     Value * const inputStream = iBuilder->CreatePointerCast(getRawInputPointer("InputStream", iBuilder->getInt32(0), iBuilder->getInt32(0)), codeUnitTy);
    5553
    5654    Value * const matches = iBuilder->CreateBitCast(loadInputStreamBlock("matchResult", iBuilder->getInt32(0)), scanwordVectorType);
     
    127125            matchRecordStart->addIncoming(priorRecordStart, prior_breaks_block);
    128126            phiRecordStart->addIncoming(matchRecordStart, loop_final_block);
    129 
    130127            Value * matchRecordEnd = iBuilder->CreateAdd(phiScanwordPos, iBuilder->CreateCountForwardZeroes(phiMatchWord));
    131             Function * const matcher = m->getFunction("matcher");
    132             assert (matcher);
    133             switch (mGrepType) {
    134                 case GrepType::Normal:
    135                     iBuilder->CreateCall(matcher, {matchRecordNum, matchRecordStart, matchRecordEnd, inputStream, getBufferedSize("InputStream"), getScalarField("FileIdx")});
    136                     break;
    137                 case GrepType::NameExpression:
    138                 case GrepType::PropertyValue:
    139                     iBuilder->CreateCall(matcher, {matchRecordNum, matchRecordStart, matchRecordEnd, inputStream});
    140                     break;
    141                 default: break;
     128
     129            Function * const matcher = m->getFunction("matcher"); assert (matcher);
     130            auto args = matcher->arg_begin();
     131            Value * const mrn = iBuilder->CreateZExtOrTrunc(matchRecordNum, args->getType());
     132            Value * const mrs = iBuilder->CreateZExtOrTrunc(matchRecordStart, (++args)->getType());
     133            Value * const mre = iBuilder->CreateZExtOrTrunc(matchRecordEnd, (++args)->getType());
     134            Value * const inputStream = getRawInputPointer("InputStream", iBuilder->getInt32(0), iBuilder->getInt32(0));
     135            Value * const is = iBuilder->CreatePointerCast(inputStream, (++args)->getType());
     136            if (mGrepType == GrepType::Normal) {
     137                Value * const sz = iBuilder->CreateZExtOrTrunc(getBufferedSize("InputStream"), (++args)->getType());
     138                Value * const fi = iBuilder->CreateZExtOrTrunc(getScalarField("FileIdx"), (++args)->getType());
     139                iBuilder->CreateCall(matcher, {mrn, mrs, mre, is, sz, fi});
     140            } else {
     141                iBuilder->CreateCall(matcher, {mrn, mrs, mre, is});
    142142            }
    143143
     
    204204    {},
    205205    {Binding{iBuilder->getSizeTy(), "BlockNo"}, Binding{iBuilder->getSizeTy(), "LineNum"}})
    206 , mGrepType(grepType)
    207 , mCodeUnitWidth(codeUnitWidth) {
    208 
    209 }
    210 
    211 }
     206, mGrepType(grepType) {
     207
     208}
     209
     210}
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.h

    r5398 r5411  
    1616class ScanMatchKernel final : public BlockOrientedKernel {
    1717public:
    18     ScanMatchKernel(IDISA::IDISA_Builder * iBuilder, GrepType grepType, unsigned codeUnitWidth);
    19     bool moduleIDisSignature() override {return true;}
     18    ScanMatchKernel(IDISA::IDISA_Builder * const iBuilder, const GrepType grepType, const unsigned codeUnitWidth);
     19    bool moduleIDisSignature() override { return true; }
    2020protected:
    2121    void generateDoBlockMethod() override;
     
    2525private:
    2626    const GrepType      mGrepType;
    27     const unsigned      mCodeUnitWidth;
    2827};
    2928}
  • icGREP/icgrep-devel/icgrep/kernels/stdin_kernel.cpp

    r5408 r5411  
    3535    iBuilder->SetInsertPoint(readBlock);
    3636
    37 
    38 
    39 
    4037    // how many pages are required to have enough data for the segment plus one overflow block?
    4138    const auto PageAlignedSegmentSize = round_up_to_nearest((mSegmentBlocks + 1) * iBuilder->getBitBlockWidth() * (mCodeUnitWidth / 8), getpagesize());
     
    4340    reserveBytes("InputStream", bytesToRead);
    4441    BasicBlock * const readExit = iBuilder->GetInsertBlock();
    45 
    4642    Value * const ptr = getRawOutputPointer("InputStream", iBuilder->getInt32(0), bufferedSize);
    47 
    4843    Value * const bytePtr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
    4944    Value * const bytesRead = iBuilder->CreateReadCall(iBuilder->getInt32(STDIN_FILENO), bytePtr, bytesToRead);
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5408 r5411  
    1515#include <llvm/IR/CFG.h>
    1616#include <kernels/kernel.h>
     17#include <kernels/toolchain.h>
    1718
    1819namespace llvm { class Constant; }
     
    200201}
    201202
     203Value * ExtensibleBuffer::roundUpToPageSize(Value * const value) const {
     204    const auto pageSize = getpagesize();
     205    assert ((pageSize & (pageSize - 1)) == 0);
     206    Constant * const pageMask = ConstantInt::get(value->getType(), pageSize - 1);
     207    return iBuilder->CreateAnd(iBuilder->CreateAdd(value, pageMask), iBuilder->CreateNot(pageMask));
     208}
     209
    202210void ExtensibleBuffer::allocateBuffer() {
    203211    Type * ty = getType();
    204212    Value * instance = iBuilder->CreateCacheAlignedAlloca(ty);
    205213    Value * const capacityPtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    206     Constant * initialSize = ConstantExpr::getSizeOf(ty->getStructElementType(1)->getPointerElementType());
    207     initialSize = ConstantExpr::getMul(initialSize, iBuilder->getSize(mBufferBlocks));
    208     initialSize = ConstantExpr::getIntegerCast(initialSize, iBuilder->getSizeTy(), false);
     214
     215    Type * const elementType = ty->getStructElementType(1)->getPointerElementType();
     216    Constant * size = ConstantExpr::getSizeOf(elementType);
     217    size = ConstantExpr::getMul(size, iBuilder->getSize(mBufferBlocks));
     218    size = ConstantExpr::getIntegerCast(size, iBuilder->getSizeTy(), false);
     219    Value * const initialSize = roundUpToPageSize(size);
     220
    209221    iBuilder->CreateStore(initialSize, capacityPtr);
    210     Value * addr = iBuilder->CreateAnonymousMMap(initialSize);
     222    Value * addr = iBuilder->CreateAnonymousMMap(size);
    211223    Value * const addrPtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
    212224    addr = iBuilder->CreatePointerCast(addr, addrPtr->getType()->getPointerElementType());
     
    222234
    223235void ExtensibleBuffer::reserveBytes(Value * const self, llvm::Value * const requiredSize) const {
    224 
    225     // TODO: tweak this function to allow AlignedMalloc to begin copying prior to waiting for the
    226     // consumers to finish. MRemap could be used with the "do not move" flag set safely.
    227236
    228237    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     
    241250    kernel::KernelBuilder * const kernel = getProducer();
    242251    auto consumers = kernel->getStreamOutputs();
    243     if (LLVM_UNLIKELY(consumers.empty())) {
     252    if (consumers.empty()) {
    244253        iBuilder->CreateLikelyCondBr(noExpansionNeeded, resume, expand);
    245254    } else { // we cannot risk expanding this buffer until all of the consumers have finished reading the data
    246255
    247         ConstantInt * const zeroSz = iBuilder->getSize(0);
     256        ConstantInt * const size0 = iBuilder->getSize(0);
    248257        Value * const segNo = kernel->acquireLogicalSegmentNo();
    249258        const auto n = consumers.size();
     
    262271            iBuilder->SetInsertPoint(load[i]);
    263272            Value * const outputConsumers = kernel->getConsumerState(consumers[i].name);
     273
    264274            Value * const consumerCount = iBuilder->CreateLoad(iBuilder->CreateGEP(outputConsumers, {zero, zero}));
    265275            Value * const consumerPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(outputConsumers, {zero, one}));
    266             Value * const noConsumers = iBuilder->CreateICmpEQ(consumerCount, zeroSz);
     276            Value * const noConsumers = iBuilder->CreateICmpEQ(consumerCount, size0);
    267277            iBuilder->CreateUnlikelyCondBr(noConsumers, load[i + 1], wait[i]);
    268278
    269279            iBuilder->SetInsertPoint(wait[i]);
    270280            PHINode * const consumerPhi = iBuilder->CreatePHI(sizeTy, 2);
    271             consumerPhi->addIncoming(zeroSz, load[i]);
     281            consumerPhi->addIncoming(size0, load[i]);
    272282
    273283            Value * const conSegPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(consumerPtr, consumerPhi));
    274284            Value * const processedSegmentCount = iBuilder->CreateAtomicLoadAcquire(conSegPtr);
    275285            Value * const ready = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
     286            assert (ready->getType() == iBuilder->getInt1Ty());
    276287            Value * const nextConsumerIdx = iBuilder->CreateAdd(consumerPhi, iBuilder->CreateZExt(ready, sizeTy));
    277288            consumerPhi->addIncoming(nextConsumerIdx, wait[i]);
     
    284295    }
    285296    iBuilder->SetInsertPoint(expand);
    286     Value * const reservedSize = iBuilder->CreateShl(requiredSize, 1);
    287 #ifdef __APPLE__
    288     Value * newAddr = iBuilder->CreateAlignedMalloc(reservedSize, iBuilder->getCacheAlignment());
     297    Value * const reservedSize = roundUpToPageSize(iBuilder->CreateShl(requiredSize, 1));
    289298    Value * const baseAddrPtr = iBuilder->CreateGEP(self, {zero, one});
    290     Value * const baseAddr = iBuilder->CreateLoad(baseAddrPtr);
    291     iBuilder->CreateMemCpy(newAddr, baseAddr, currentSize, iBuilder->getCacheAlignment());
    292     iBuilder->CreateAlignedFree(baseAddr);
    293     Value * const remainingSize = iBuilder->CreateSub(reservedSize, currentSize);
    294     iBuilder->CreateMemZero(iBuilder->CreateGEP(newAddr, currentSize), remainingSize, iBuilder->getBitBlockWidth() / 8);
    295     newAddr = iBuilder->CreatePointerCast(newAddr, baseAddr->getType());
    296 #else
    297     Value * const baseAddrPtr = iBuilder->CreateGEP(self, {zero, one});
     299
    298300    Value * const baseAddr = iBuilder->CreateLoad(baseAddrPtr);
    299301    Value * newAddr = iBuilder->CreateMRemap(baseAddr, currentSize, reservedSize);
    300302    newAddr = iBuilder->CreatePointerCast(newAddr, baseAddr->getType());
    301 #endif
    302303    iBuilder->CreateStore(newAddr, baseAddrPtr);
    303304    iBuilder->CreateStore(reservedSize, capacityPtr);
     
    425426: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
    426427    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
    427     if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
    428     if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
    429 
     428    if (mOverflowBlocks != 1) {
     429        mUniqueID += "_" + std::to_string(mOverflowBlocks);
     430    }
     431    if (AddressSpace > 0) {
     432        mUniqueID += "@" + std::to_string(AddressSpace);
     433    }
    430434}
    431435
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5408 r5411  
    207207protected:
    208208
     209    llvm::Value * roundUpToPageSize(llvm::Value * const value) const;
     210
    209211    llvm::Value * getBaseAddress(llvm::Value * self) const override;
    210212
  • icGREP/icgrep-devel/icgrep/kernels/toolchain.cpp

    r5409 r5411  
    1616#ifndef NDEBUG
    1717#include <llvm/IR/Verifier.h>
     18#include <boost/container/flat_set.hpp>
    1819#endif
    1920#include <llvm/PassRegistry.h>                     // for PassRegistry
     
    3132#include <IR_Gen/llvm2ptx.h>
    3233#endif
    33  
    34 
    3534
    3635using namespace llvm;
     
    233232
    234233void ParabixDriver::generatePipelineIR() {
     234    #ifndef NDEBUG
     235    if (LLVM_UNLIKELY(mKernelList.empty())) {
     236        report_fatal_error("Pipeline must contain at least one kernel");
     237    } else {
     238        boost::container::flat_set<kernel::KernelBuilder *> K(mKernelList.begin(), mKernelList.end());
     239        if (LLVM_UNLIKELY(K.size() != mKernelList.size())) {
     240            report_fatal_error("Kernel definitions can only occur once in the pipeline");
     241        }
     242    }
     243    #endif
    235244    // note: instantiation of all kernels must occur prior to initialization
    236245    for (const auto & k : mKernelList) {
     
    250259        codegen::ThreadNum = 1;
    251260        generatePipelineLoop(iBuilder, mKernelList);
     261    }
     262    for (const auto & k : mKernelList) {
     263        k->terminateInstance();
    252264    }
    253265}
Note: See TracChangeset for help on using the changeset viewer.