Changeset 5597


Ignore:
Timestamp:
Aug 4, 2017, 12:31:21 PM (4 months ago)
Author:
nmedfort
Message:

Modified stream set buffers to use heap memory.

Location:
icGREP/icgrep-devel/icgrep
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5543 r5597  
    295295}
    296296
     297llvm::Value * CBuilder::CreateCacheAlignedMalloc(llvm::Value * size) {
     298    const auto alignment = getCacheAlignment();
     299    if (LLVM_LIKELY(isa<Constant>(size))) {
     300        Constant * const align = ConstantInt::get(size->getType(), alignment, false);
     301        Constant * offset = ConstantExpr::getURem(cast<Constant>(size), align);
     302        if (!offset->isNullValue()) {
     303            size = ConstantExpr::getAdd(cast<Constant>(size), ConstantExpr::getSub(align, offset));
     304        }
     305    }
     306    return CreateAlignedMalloc(size, alignment);
     307}
     308
    297309Value * CBuilder::CreateAlignedMalloc(Value * size, const unsigned alignment) {
    298310    if (LLVM_UNLIKELY((alignment & (alignment - 1)) != 0)) {
     
    308320        CreateAssertZero(CreateURem(size, align), "CreateAlignedMalloc: size must be an integral multiple of alignment.");
    309321    }
    310 
    311322    Value * ptr = nullptr;
    312323    if (hasAlignedAlloc()) {
     
    363374    }
    364375    CallInst * const ci = CreateCall(f, {CreatePointerCast(ptr, voidPtrTy), CreateZExtOrTrunc(size, sizeTy)});
    365 
    366376    return CreatePointerCast(ci, ptr->getType());
    367377}
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r5543 r5597  
    7878    }
    7979
    80     llvm::Value * CreateCacheAlignedMalloc(llvm::Value * size) {
    81         return CreateAlignedMalloc(size, getCacheAlignment());
    82     }
     80    llvm::Value * CreateCacheAlignedMalloc(llvm::Value * size);
    8381
    8482    // stdio.h functions
  • icGREP/icgrep-devel/icgrep/base64.cpp

    r5547 r5597  
    8585   
    8686    pxDriver.generatePipelineIR();
    87 
     87    pxDriver.deallocateBuffers();
    8888    iBuilder->CreateRetVoid();
    8989
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r5591 r5597  
    265265
    266266    pxDriver.generatePipelineIR();
    267 
     267    pxDriver.deallocateBuffers();
    268268    idb->CreateRetVoid();
    269269
     
    337337
    338338    pxDriver.generatePipelineIR();
    339 
     339    pxDriver.deallocateBuffers();
    340340    iBuilder->CreateRetVoid();
    341341
     
    398398
    399399    pxDriver.generatePipelineIR();
    400 
     400    pxDriver.deallocateBuffers();
    401401    idb->CreateRetVoid();
    402402
     
    532532
    533533    pxDriver.generatePipelineIR();
    534 
     534    pxDriver.deallocateBuffers();
    535535    iBuilder->CreateRetVoid();
    536536
     
    632632       
    633633    pxDriver.generatePipelineIR();
     634    pxDriver.deallocateBuffers();
    634635    iBuilder->CreateRetVoid();
    635636
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5590 r5597  
    447447    Value * outputThreadPtr = idb->CreateGEP(outputPtr, idb->CreateAdd(idb->CreateMul(bid, strideBlocks), tid));
    448448    idb->CreateStore(matchedLineCount, outputThreadPtr);
     449    mGrepDriver->deallocateBuffers();
    449450    idb->CreateRetVoid();
    450451
     
    599600    pxDriver.LinkFunction(*scanMatchK, "matcher", &insert_codepoints);
    600601    pxDriver.generatePipelineIR();
     602    pxDriver.deallocateBuffers();
    601603    idb->CreateRetVoid();
    602604    pxDriver.finalizeObject();
     
    680682    pxDriver.LinkFunction(*scanMatchK, "matcher", &insert_property_values);
    681683    pxDriver.generatePipelineIR();
     684    pxDriver.deallocateBuffers();
    682685    idb->CreateRetVoid();
    683686    pxDriver.finalizeObject();
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5544 r5597  
    3030    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
    3131        Type * const ty = getType();
    32         mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
     32        if (mAddressSpace == 0) {
     33            Constant * size = ConstantExpr::getSizeOf(ty);
     34            size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks));
     35            mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
     36        } else {
     37            mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
     38        }
    3339        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
    3440    } else {
    3541        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
     42    }
     43}
     44
     45void StreamSetBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const {
     46    if (mAddressSpace == 0) {
     47        iBuilder->CreateFree(mStreamSetBufferPtr);
    3648    }
    3749}
     
    156168}
    157169
    158 void StreamSetBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & /* kb */) const {
    159     /* do nothing: memory is stack allocated */
    160 }
    161 
    162170void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
    163171    Type * i8ptr = iBuilder->getInt8PtrTy();
     
    257265}
    258266
     267void SourceBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     268    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
     269        Type * const ty = getType();
     270        mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
     271        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
     272    } else {
     273        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
     274    }
     275}
     276
     277void SourceBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const {
     278
     279}
    259280
    260281// External File Buffer
     
    263284}
    264285
     286void ExternalBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> &) const {
     287
     288}
     289
    265290Value * ExternalBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
    266291    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
    267292}
    268293
    269 Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value *) const {
     294Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *) const {
    270295    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
    271296}
     
    292317// CircularCopybackBuffer Buffer
    293318void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    294     mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
     319    Type * const ty = getType();
     320    Constant * size = ConstantExpr::getSizeOf(ty);
     321    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
     322    mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
    295323}
    296324
     
    311339
    312340void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    313     mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
     341    Type * const ty = getType();
     342    Constant * size = ConstantExpr::getSizeOf(ty);
     343    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
     344    mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
    314345}
    315346
     
    607638}
    608639
     640
    609641Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * handle, Value * fromPosition) const {
    610642    Constant * blockSize = b->getSize(b->getBitBlockWidth());
     
    612644        return b->CreateSub(blockSize, b->CreateURem(fromPosition, blockSize));
    613645    } else {
    614         Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
     646        Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
    615647        Value * bufSize = b->CreateMul(bufBlocks, blockSize);
    616648        return b->CreateSub(bufSize, b->CreateURem(fromPosition, bufSize, "linearItems"));
     
    619651
    620652Value * DynamicBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const b, Value * handle, Value * fromBlock) const {
    621     Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
     653    Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
    622654    return b->CreateSub(bufBlocks, b->CreateURem(fromBlock, bufBlocks), "linearBlocks");
    623655}
     
    632664    Value * bufSize = b->getSize((mBufferBlocks + mOverflowBlocks) * b->getBitBlockWidth() * numStreams * fieldWidth/8);
    633665    bufSize = b->CreateRoundUp(bufSize, b->getSize(b->getCacheAlignment()));
    634     Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
     666    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::BaseAddress))});
    635667    Value * bufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufSize), bufBasePtrField->getType()->getPointerElementType());
    636668    b->CreateStore(bufPtr, bufBasePtrField);
    637     b->CreateStore(bufSize, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::AllocatedCapacity))}));
    638     b->CreateStore(b->getSize(mBufferBlocks), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
    639     b->CreateStore(b->getSize(-1), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::Length))}));
    640     b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::ProducedPosition))}));
    641     b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::ConsumedPosition))}));
     669    b->CreateStore(bufSize, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::AllocatedCapacity))}));
     670    b->CreateStore(b->getSize(mBufferBlocks), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
     671    b->CreateStore(b->getSize(-1), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::Length))}));
     672    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ProducedPosition))}));
     673    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ConsumedPosition))}));
    642674    mStreamSetBufferPtr = handle;
    643675}
     
    645677void DynamicBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
    646678    /* Free the dynamically allocated buffer, but not the stack-allocated buffer struct. */
    647     b->CreateFree(b->CreateLoad(b->CreateGEP(mStreamSetBufferPtr, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))})));
     679    b->CreateFree(b->CreateLoad(b->CreateGEP(mStreamSetBufferPtr, {b->getInt32(0), b->getInt32(int(Field::BaseAddress))})));
    648680}
    649681
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5544 r5597  
    162162    llvm::Value * getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * fromBlock) const override;
    163163
    164     virtual llvm::Type * getStreamSetBlockType() const override;
     164    llvm::Type * getStreamSetBlockType() const override;
     165
     166    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) override;
     167
     168    void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const override;
    165169
    166170protected:
     
    182186    ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, llvm::Value * addr, unsigned AddressSpace = 0);
    183187
    184     // Can't allocate - raise an error. */
     188    llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * fromPosition) const override;
     189
    185190    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    186191
    187     llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * fromPosition) const override;
     192    void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const override;
    188193
    189194protected:
     
    221226   
    222227    CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace = 0);
    223 
    224     void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    225228   
    226229    // Generate copyback code for the given number of overflowItems.
     
    230233   
    231234    llvm::Value * getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * fromBlock) const override;
    232    
     235
     236    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     237
    233238private:
    234239    size_t mOverflowBlocks;
     
    241246   
    242247    SwizzledCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth = 64, unsigned AddressSpace = 0);
    243    
    244     void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    245    
     248       
    246249    void createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, llvm::Value * targetBlockPtr, llvm::Value * sourceBlockPtr, llvm::Value * itemsToCopy) const override;
    247250
     
    253256    llvm::Value * getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * fromBlock) const override;
    254257   
     258    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     259
    255260protected:
    256261    llvm::Value * getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * blockIndex) const override;
     
    276281    llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * fromPosition) const override;
    277282
     283    llvm::Value * getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self) const override;
     284
    278285    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    279 
    280     llvm::Value * getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self) const override;
    281286
    282287    void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const override;
  • icGREP/icgrep-devel/icgrep/lz4d.cpp

    r5493 r5597  
    105105    pxDriver.generatePipelineIR();
    106106
     107    pxDriver.deallocateBuffers();
     108
    107109    iBuilder->CreateRetVoid();
    108110 
  • icGREP/icgrep-devel/icgrep/toolchain/pipeline.cpp

    r5591 r5597  
    4646void generateSegmentParallelPipeline(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<Kernel *> & kernels) {
    4747
    48     assert (codegen::BufferSegments >= codegen::ThreadNum);
    49 
    5048    const unsigned n = kernels.size();
    5149    Module * const m = iBuilder->getModule();
     
    5452    Constant * nullVoidPtrVal = ConstantPointerNull::getNullValue(voidPtrTy);
    5553    std::vector<Type *> structTypes;
     54
     55    codegen::BufferSegments = std::max(codegen::BufferSegments, codegen::ThreadNum);
    5656
    5757    Value * instance[n];
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r5540 r5597  
    345345    pxDriver.generatePipelineIR();
    346346   
     347    pxDriver.deallocateBuffers();
     348
    347349    iBuilder->CreateRetVoid();
    348350   
     
    418420    pxDriver.generatePipelineIR();
    419421   
     422    pxDriver.deallocateBuffers();
     423
    420424    iBuilder->CreateRetVoid();
    421425
  • icGREP/icgrep-devel/icgrep/wc.cpp

    r5486 r5597  
    189189
    190190    iBuilder->CreateCall(recordCounts, {lineCount, wordCount, charCount, fileSize, fileIdx});
    191    
     191    pxDriver.deallocateBuffers();
    192192    iBuilder->CreateRetVoid();
    193193
Note: See TracChangeset for help on using the changeset viewer.