Changeset 5386 for icGREP/icgrep-devel


Ignore:
Timestamp:
Mar 31, 2017, 3:05:41 PM (2 years ago)
Author:
nmedfort
Message:

Replaced stdin input stream with mmap'ed buffer and aligned each read call to the page size.

Location:
icGREP/icgrep-devel/icgrep
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5374 r5386  
    1212#include <llvm/IR/MDBuilder.h>
    1313#include <fcntl.h>
     14#include <unistd.h>
     15#include <sys/mman.h>
    1416#include <toolchain.h>
    1517
     
    165167    }
    166168    return CreateCall(aligned_malloc, {CreateZExtOrTrunc(size, intTy)});
     169}
     170
     171Value * CBuilder::CreateAnonymousMMap(Value * size) {
     172    DataLayout DL(mMod);
     173    PointerType * const voidPtrTy = getVoidPtrTy();
     174    IntegerType * const intTy = getIntPtrTy(DL);
     175    IntegerType * const sizeTy = getSizeTy();
     176    Type * const offTy = TypeBuilder<off_t, false>::get(getContext());
     177    Function * fMMap = mMod->getFunction("mmap");
     178    if (LLVM_UNLIKELY(fMMap == nullptr)) {
     179        FunctionType * fty = FunctionType::get(voidPtrTy, {voidPtrTy, sizeTy, intTy, intTy, intTy, offTy}, false);
     180        fMMap = Function::Create(fty, Function::ExternalLinkage, "mmap", mMod);
     181    }
     182    ConstantInt * const prot =  ConstantInt::get(intTy, PROT_READ | PROT_WRITE);
     183    ConstantInt * const flags =  ConstantInt::get(intTy, MAP_PRIVATE | MAP_ANONYMOUS);
     184    ConstantInt * const fd =  ConstantInt::get(intTy, -1);
     185    ConstantInt * const offset = ConstantInt::get(intTy, 0); // getCacheAlignment()
     186    Value * const ptr = CreateCall(fMMap, {Constant::getNullValue(voidPtrTy), size, prot, flags, fd, offset});
     187    CreateAssert(CreateICmpNE(CreatePtrToInt(ptr, getSizeTy()), getSize((size_t)MAP_FAILED)), "CreateAnonymousMMap: mmap failed to allocate memory");
     188    return ptr;
     189}
     190
     191Value * CBuilder::CreateFileSourceMMap(Value * const fd, Value * const size) {
     192    DataLayout DL(mMod);
     193    PointerType * const voidPtrTy = getVoidPtrTy();
     194    IntegerType * const intTy = getIntPtrTy(DL);
     195    IntegerType * const sizeTy = getSizeTy();
     196    Type * const offTy = TypeBuilder<off_t, false>::get(getContext());
     197    Function * fMMap = mMod->getFunction("mmap");
     198    if (LLVM_UNLIKELY(fMMap == nullptr)) {
     199        FunctionType * fty = FunctionType::get(voidPtrTy, {voidPtrTy, sizeTy, intTy, intTy, intTy, offTy}, false);
     200        fMMap = Function::Create(fty, Function::ExternalLinkage, "mmap", mMod);
     201    }
     202    ConstantInt * const prot =  ConstantInt::get(intTy, PROT_READ);
     203    ConstantInt * const flags =  ConstantInt::get(intTy, MAP_PRIVATE);
     204    ConstantInt * const offset = ConstantInt::get(intTy, 0); // getCacheAlignment()
     205    Value * const ptr = CreateCall(fMMap, {Constant::getNullValue(voidPtrTy), size, prot, flags, fd, offset});
     206    CreateAssert(CreateICmpNE(CreatePtrToInt(ptr, getSizeTy()), getSize((size_t)MAP_FAILED)), "CreateFileSourceMMap: mmap failed to allocate memory");
     207    return ptr;
     208}
     209
     210Value * CBuilder::CreateMRemap(Value * addr, Value * oldSize, Value * newSize, const bool mayMove) {
     211    DataLayout DL(mMod);
     212    PointerType * const voidPtrTy = getVoidPtrTy();
     213    IntegerType * const intTy = getIntPtrTy(DL);
     214    IntegerType * const sizeTy = getSizeTy();
     215    Function * fMRemap = mMod->getFunction("mremap");
     216    if (LLVM_UNLIKELY(fMRemap == nullptr)) {
     217        //    void * mremap (void *addr, size_t old_size,
     218        //                   size_t new_size, unsigned long flags);
     219
     220        FunctionType * fty = FunctionType::get(voidPtrTy, {voidPtrTy, sizeTy, sizeTy, intTy}, false);
     221        fMRemap = Function::Create(fty, Function::ExternalLinkage, "mremap", mMod);
     222    }   
     223    addr = CreatePointerCast(addr, voidPtrTy);
     224    CreateAssert(addr, "CreateMRemap: initial addr is null");
     225    Value * ptr = CreateCall(fMRemap, {addr, oldSize, newSize, ConstantInt::get(intTy, mayMove ? MREMAP_MAYMOVE : 0)});
     226    CreateAssert(addr, "CreateMRemap: mremap failed to allocate memory");
     227    return ptr;
     228}
     229
     230Value * CBuilder::CreateMUnmap(Value * addr, Value * size) {
     231    DataLayout DL(mMod);
     232    IntegerType * const intTy = getIntPtrTy(DL);
     233    IntegerType * const sizeTy = getSizeTy();
     234    Function * fMUnmap = mMod->getFunction("munmap");
     235    if (LLVM_UNLIKELY(fMUnmap == nullptr)) {
     236        // int munmap (void *addr, size_t len);
     237
     238        PointerType * const voidPtrTy = getVoidPtrTy();
     239        FunctionType * fty = FunctionType::get(intTy, {voidPtrTy, sizeTy}, false);
     240        fMUnmap = Function::Create(fty, Function::ExternalLinkage, "munmap", mMod);
     241    }
     242    return CreateCall(fMUnmap, {addr, size});
    167243}
    168244
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r5368 r5386  
    1919   
    2020public:
    21    
     21
    2222    CBuilder(llvm::Module * m, const unsigned GeneralRegisterWidthInBits, const bool SupportsIndirectBr, const unsigned CacheLineAlignmentInBytes = 64);
    2323   
     
    5151        return instr;
    5252    }
    53    
     53
    5454    // stdio.h functions
    5555    //
     
    7575    //  Create a call to:  int close(int filedes);
    7676    llvm::Value * CreateCloseCall(llvm::Value * fildes);
     77
     78
     79    llvm::Value * CreateAnonymousMMap(llvm::Value * size);
     80
     81    llvm::Value * CreateFileSourceMMap(llvm::Value * fd, llvm::Value * size);
     82
     83    llvm::Value * CreateMRemap(llvm::Value * addr, llvm::Value * oldSize, llvm::Value * newSize, const bool mayMove = true);
     84
     85    llvm::Value * CreateMUnmap(llvm::Value * addr, llvm::Value * size);
     86
     87
    7788
    7889    //  Posix thread (pthread.h) functions.
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.cpp

    r5278 r5386  
    513513        runs.push_back(std::make_pair(type, lo_index));
    514514        if (type == Mixed) {
    515             assert (std::distance(qi, mQuads.cend()) >= lo_index);
     515            assert (static_cast<codepoint_t>(std::distance(qi, mQuads.cend())) >= lo_index);
    516516            qi += lo_index;
    517517        }
     
    566566        } else {
    567567            if (type == Mixed) {
    568                 assert (std::distance(qi, mQuads.cend()) > hi_index);
     568                assert (static_cast<codepoint_t>(std::distance(qi, mQuads.cend())) > hi_index);
    569569                qi += hi_index;
    570570                hi_quad |= *qi++;
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5380 r5386  
    3636
    3737#include <llvm/Support/raw_ostream.h>
     38#include <sys/stat.h>
     39
    3840
    3941#ifdef CUDA_ENABLED
     
    302304        byteStream = new ExternalFileBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
    303305        cast<ExternalFileBuffer>(byteStream)->setStreamSetBuffer(inputStream);
    304         sourceK = new kernel::MMapSourceKernel(iBuilder, segmentSize);
     306        sourceK = new kernel::MMapSourceKernel(iBuilder, segmentSize * bufferSegments);
    305307        sourceK->generateKernel({}, {byteStream});
    306308        sourceK->setInitialArguments({fileSize});
     
    483485    kernel::KernelBuilder * sourceK = nullptr;
    484486    if (usingStdIn) {
     487        // TODO: use fstat(STDIN_FILENO) to see if we can mmap the stdin safely and avoid the calls to read
     488
    485489        byteStream = new ExtensibleBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize * bufferSegments);
    486490        cast<ExtensibleBuffer>(byteStream)->allocateBuffer();
     
    633637template<typename CodeUnit>
    634638void wrapped_report_match(const size_t lineNum, size_t line_start, size_t line_end, const CodeUnit * const buffer, const size_t filesize, const int fileIdx) {
    635 
    636  //   errs() << lineNum << " : (" << line_start << ", " << line_end << ") -- " << filesize << "\n";
    637 
    638639    assert (buffer);
    639640    assert (line_start <= line_end);
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5379 r5386  
    249249
    250250    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
    251         return mStreamSetInputBuffers[getStreamSetIndex(name)];
     251        const auto index = getStreamSetIndex(name);
     252        assert (index < mStreamSetInputBuffers.size());
     253        return mStreamSetInputBuffers[index];
    252254    }
    253255
    254256    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
    255         return mStreamSetOutputBuffers[getStreamSetIndex(name)];
     257        const auto index = getStreamSetIndex(name);
     258        assert (index < mStreamSetOutputBuffers.size());
     259        return mStreamSetOutputBuffers[index];
    256260    }
    257261
  • icGREP/icgrep-devel/icgrep/kernels/stdin_kernel.cpp

    r5379 r5386  
    1010using namespace llvm;
    1111
     12inline static size_t round_up_to_nearest(const size_t x, const size_t y) {
     13    return (((x - 1) | (y - 1)) + 1);
     14}
     15
    1216namespace kernel {
    1317
    1418void StdInKernel::generateDoSegmentMethod(Value * /* doFinal */, const std::vector<Value *> & /* producerPos */) {
    1519
    16     BasicBlock * setTermination = CreateBasicBlock("setTermination");
    17     BasicBlock * stdInExit = CreateBasicBlock("stdInExit");
    18     ConstantInt * segmentBytes = iBuilder->getSize(mSegmentBlocks * iBuilder->getBitBlockWidth());
    19     ConstantInt * segmentBytes2 = iBuilder->getSize(2 * mSegmentBlocks * iBuilder->getBitBlockWidth());
    20     // on the first segment, we buffer twice the data to ensure the ScanMatch kernel can safely check for a non-LF line break
     20    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
     21    BasicBlock * const readBlock = CreateBasicBlock("ReadMoreData");
     22    BasicBlock * const setTermination = CreateBasicBlock("SetTermination");
     23    BasicBlock * const stdInExit = CreateBasicBlock("StdInExit");
     24
     25    ConstantInt * const segmentSize = iBuilder->getSize(mSegmentBlocks * iBuilder->getBitBlockWidth());
     26    Value * bufferedSize = getScalarField("BufferedSize");
    2127    Value * const itemsAlreadyRead = getProducedItemCount("codeUnitBuffer");
    22     Value * const bytesToRead = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(itemsAlreadyRead, iBuilder->getSize(0)), segmentBytes2, segmentBytes);
     28    Value * const bytesAlreadyRead = iBuilder->CreateMul(itemsAlreadyRead, iBuilder->getSize(mCodeUnitWidth / 8));
     29    Value * unreadSize = iBuilder->CreateSub(bufferedSize, bytesAlreadyRead);
     30    Value * const exaustedBuffer = iBuilder->CreateICmpULT(unreadSize, segmentSize);
     31    iBuilder->CreateUnlikelyCondBr(exaustedBuffer, readBlock, stdInExit);
     32
     33    iBuilder->SetInsertPoint(readBlock);
     34    // how many pages are required to have enough data for the segment plus one overflow block?
     35    const auto PageAlignedSegmentSize = round_up_to_nearest((mSegmentBlocks + 1) * iBuilder->getBitBlockWidth() * (mCodeUnitWidth / 8), getpagesize());
     36    ConstantInt * const bytesToRead = iBuilder->getSize(PageAlignedSegmentSize);
    2337    reserveBytes("codeUnitBuffer", bytesToRead);
    24     Value * const bytePtr = iBuilder->CreatePointerCast(getOutputStreamBlockPtr("codeUnitBuffer", iBuilder->getInt32(0)), iBuilder->getInt8PtrTy());
     38    BasicBlock * const readExit = iBuilder->GetInsertBlock();
     39
     40    Value * const ptr = getRawOutputPointer("codeUnitBuffer", iBuilder->getInt32(0), bufferedSize);
     41    Value * const bytePtr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
    2542    Value * const bytesRead = iBuilder->CreateReadCall(iBuilder->getInt32(STDIN_FILENO), bytePtr, bytesToRead);
    26     Value * const itemsRead = iBuilder->CreateAdd(itemsAlreadyRead, iBuilder->CreateUDiv(bytesRead, iBuilder->getSize(mCodeUnitWidth / 8)));
    27     setProducedItemCount("codeUnitBuffer", itemsRead);
    28     iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(bytesRead, ConstantInt::getNullValue(bytesRead->getType())), setTermination, stdInExit);
     43
     44    unreadSize = iBuilder->CreateAdd(unreadSize, bytesRead);
     45    bufferedSize = iBuilder->CreateAdd(bufferedSize, bytesRead);
     46    setScalarField("BufferedSize", bufferedSize);
     47    iBuilder->CreateUnlikelyCondBr(iBuilder->CreateICmpULT(unreadSize, segmentSize), setTermination, stdInExit);
     48
    2949    iBuilder->SetInsertPoint(setTermination);
     50    Value * const itemsRemaining = iBuilder->CreateUDiv(unreadSize, iBuilder->getSize(mCodeUnitWidth / 8));
    3051    setTerminationSignal();
    3152    iBuilder->CreateBr(stdInExit);
     53
    3254    stdInExit->moveAfter(iBuilder->GetInsertBlock());
     55
    3356    iBuilder->SetInsertPoint(stdInExit);
     57    PHINode * const produced = iBuilder->CreatePHI(itemsAlreadyRead->getType(), 3);
     58
     59    produced->addIncoming(segmentSize, entryBlock);
     60    produced->addIncoming(segmentSize, readExit);
     61    produced->addIncoming(itemsRemaining, setTermination);
     62    Value * const itemsRead = iBuilder->CreateAdd(itemsAlreadyRead, produced);
     63
     64    setProducedItemCount("codeUnitBuffer", itemsRead);
    3465}
    3566
    3667StdInKernel::StdInKernel(IDISA::IDISA_Builder * iBuilder, unsigned blocksPerSegment, unsigned codeUnitWidth)
    37 : SegmentOrientedKernel(iBuilder, "stdin_source", {}, {Binding{iBuilder->getStreamSetTy(1, codeUnitWidth), "codeUnitBuffer"}}, {}, {}, {})
     68: SegmentOrientedKernel(iBuilder, "stdin_source", {}, {Binding{iBuilder->getStreamSetTy(1, codeUnitWidth), "codeUnitBuffer"}}, {}, {}, {Binding{iBuilder->getSizeTy(), "BufferedSize"}})
    3869, mSegmentBlocks(blocksPerSegment)
    3970, mCodeUnitWidth(codeUnitWidth) {
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5381 r5386  
    123123}
    124124
     125void StreamSetBuffer::releaseBuffer(Value * /* self */) {
     126    /* do nothing: memory is stack allocated */
     127}
     128
    125129// Single Block Buffer
    126130
     
    162166    initialSize = ConstantExpr::getIntegerCast(initialSize, iBuilder->getSizeTy(), false);
    163167    iBuilder->CreateStore(initialSize, capacityPtr);
    164     Value * addr = iBuilder->CreateAlignedMalloc(initialSize, iBuilder->getCacheAlignment());
    165     iBuilder->CreateMemZero(addr, initialSize, iBuilder->getCacheAlignment());
     168    Value * addr = iBuilder->CreateAnonymousMMap(initialSize);
    166169    Value * const addrPtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
    167170    addr = iBuilder->CreatePointerCast(addr, addrPtr->getType()->getPointerElementType());
     
    185188    Value * const reserved = iBuilder->CreateAdd(iBuilder->CreateMul(position, blockSize), requested);
    186189    iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpULT(reserved, currentSize), resume, expand);
    187 
    188190    iBuilder->SetInsertPoint(expand);
    189191    Value * const reservedSize = iBuilder->CreateShl(reserved, 1);
    190     Value * newAddr = iBuilder->CreateAlignedMalloc(reservedSize, iBuilder->getCacheAlignment());
    191192    Value * const baseAddrPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
    192193    Value * const baseAddr = iBuilder->CreateLoad(baseAddrPtr);
    193     iBuilder->CreateMemCpy(newAddr, baseAddr, currentSize, iBuilder->getCacheAlignment());
    194     iBuilder->CreateAlignedFree(baseAddr);
    195     Value * const remainingSize = iBuilder->CreateSub(reservedSize, currentSize);
    196     iBuilder->CreateMemZero(iBuilder->CreateGEP(newAddr, currentSize), remainingSize, iBuilder->getBitBlockWidth() / 8);
     194    Value * newAddr = iBuilder->CreateMRemap(baseAddr, currentSize, reservedSize);
    197195    newAddr = iBuilder->CreatePointerCast(newAddr, baseAddr->getType());
    198196    iBuilder->CreateStore(reservedSize, capacityPtr);
     
    205203Value * ExtensibleBuffer::getBaseAddress(Value * const self) const {
    206204    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
     205}
     206
     207void ExtensibleBuffer::releaseBuffer(Value * self) {
     208    Value * const sizePtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     209    Value * size = iBuilder->CreateLoad(sizePtr);
     210    iBuilder->CreateMUnmap(getBaseAddress(self), size);
    207211}
    208212
     
    447451}
    448452
     453void ExpandableBuffer::releaseBuffer(Value * self) {
     454    iBuilder->CreateAlignedFree(getBaseAddress(self));
     455}
     456
    449457Value * ExpandableBuffer::getStreamSetBlockPtr(Value *, Value *) const {
    450458    report_fatal_error("Expandable buffers: getStreamSetBlockPtr is not supported.");
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5379 r5386  
    4646
    4747    virtual void allocateBuffer();
     48
     49    virtual void releaseBuffer(llvm::Value * self);
    4850
    4951    virtual llvm::Value * getStreamBlockPtr(llvm::Value * self, llvm::Value * streamIndex, llvm::Value * blockIndex, const bool readOnly) const;
     
    131133    void reserveBytes(llvm::Value * self, llvm::Value * position, llvm::Value * requested) const override;
    132134
     135    void releaseBuffer(llvm::Value * self) override;
     136
    133137protected:
    134138
     
    214218    llvm::Value * getStreamSetCount(llvm::Value * self) const override;
    215219
     220    void releaseBuffer(llvm::Value * self) override;
     221
    216222protected:
    217223
Note: See TracChangeset for help on using the changeset viewer.