Changeset 6241


Ignore:
Timestamp:
Dec 17, 2018, 12:17:07 PM (3 months ago)
Author:
nmedfort
Message:

Bug fix for kernel state malloc; minor optimization of pipeline state; grep printing bug fix

Location:
icGREP/icgrep-devel/icgrep
Files:
13 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r6237 r6241  
    251251    return CreateCall(unlinkFunc, path);
    252252}
     253
     254Value * CBuilder::CreateFSync(Value * fileDescriptor) {
     255    Module * const m = getModule();
     256    Function * fSync = m->getFunction("fsync");
     257    if (fSync == nullptr) {
     258        IntegerType * int32Ty = getInt32Ty();
     259        FunctionType * fty = FunctionType::get(int32Ty, {int32Ty}, true);
     260        fSync = Function::Create(fty, Function::ExternalLinkage, "fsync", m);
     261    }
     262    return CreateCall(fSync, fileDescriptor);
     263}
     264
     265
    253266
    254267Value * CBuilder::CreateMkstempCall(Value * ftemplate) {
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r6228 r6241  
    163163    llvm::Value * CreateUnlinkCall(llvm::Value * path);
    164164
    165     llvm::Value * CreateFileSize(llvm::Value * fileDescriptor);
     165    // llvm::Value * CreateFileSize(llvm::Value * fileDescriptor);
     166
     167    llvm::Value * CreateFSync(llvm::Value * fileDescriptor);
    166168
    167169    //  Create calls to stdlib.h functions.
  • icGREP/icgrep-devel/icgrep/base64.cpp

    r6209 r6241  
    55 */
    66
    7 #include <iostream>
    8 #include <llvm/IR/Function.h>
    9 #include <llvm/IR/Module.h>
    10 #include <llvm/ExecutionEngine/ExecutionEngine.h>
    11 #include <llvm/IR/Verifier.h>
    12 #include <llvm/Support/CommandLine.h>
    137#include <toolchain/toolchain.h>
    148#include <toolchain/cpudriver.h>
    15 #include <IR_Gen/idisa_target.h>
    169#include <kernels/source_kernel.h>
    17 #include <kernels/streamset.h>
    1810#include <kernels/radix64.h>
    1911#include <kernels/stdout_kernel.h>
    2012#include <kernels/kernel_builder.h>
    21 #include <boost/interprocess/mapped_region.hpp>
    22 #include <boost/interprocess/anonymous_shared_memory.hpp>
    23 #include <boost/math/common_factor_rt.hpp>
     13#include <kernels/pipeline_builder.h>
     14#include <llvm/Support/CommandLine.h>
     15#include <iostream>
    2416#include <sys/stat.h>
    2517#include <fcntl.h>
    26 #include <mutex>
    27 #include <kernels/pipeline_builder.h>
    2818
    2919
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp

    r6229 r6241  
    698698                }
    699699                mFileStatus[printIdx] = FileStatus::PrintComplete;
    700                 printIdx = mNextFileToPrint++;
     700                printIdx++;
    701701            } else {
    702702                sched_yield();
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r6233 r6241  
    483483        }
    484484    }
    485 
    486485    generateFinalizeMethod(b); // may be overridden by the Kernel subtype
    487486    const auto outputs = getFinalOutputScalars(b);
    488487    b->CreateFree(mHandle);
    489488    mHandle = nullptr;
    490 
    491489    if (outputs.empty()) {
    492490        b->CreateRetVoid();
     
    718716        handle = b->CreateAlignedMalloc(size, b->getCacheAlignment());
    719717    }
    720 //    mHandle = b->CreatePointerCast(handle, mKernelStateType->getPointerTo());
    721 //    return mHandle;
    722718    return b->CreatePointerCast(handle, mKernelStateType->getPointerTo());
    723719}
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/buffer_management_logic.hpp

    r6228 r6241  
    246246
    247247            // calculate overflow (copyback) and fascimile (copyforward) space
    248             overflowSpace = lcm(overflowSpace, b->getBitBlockWidth());
     248            const auto blockWidth = b->getBitBlockWidth();
     249            overflowSpace = lcm(overflowSpace, blockWidth);
    249250            assert (overflowSpace.denominator() == 1);
    250             facsimileSpace = lcm(facsimileSpace, b->getBitBlockWidth());
     251            facsimileSpace = lcm(facsimileSpace, blockWidth);
    251252            assert (facsimileSpace.denominator() == 1);
    252253            bn.Overflow = overflowSpace.numerator();
     
    255256
    256257            // compute the buffer size
    257             const auto bufferSpace = lcm(requiredSpace, b->getBitBlockWidth());
     258            const auto bufferMod = overflowSize ? overflowSize : blockWidth;
     259            const auto bufferSpace = lcm(requiredSpace, bufferMod);
    258260            assert (bufferSpace.denominator() == 1);
    259261            const auto bufferSize = bufferSpace.numerator() * mPipelineKernel->getNumOfThreads();
    260 
    261 
    262262            // A DynamicBuffer is necessary when we cannot bound the amount of unconsumed data a priori.
    263263            if (dynamic) {
     
    274274    }
    275275
    276 //    printBufferGraph(G, errs());
     276  //  printBufferGraph(G, errs());
    277277
    278278    return G;
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/core_logic.hpp

    r6237 r6241  
    3434    const auto name = makeKernelName(kernelIndex);
    3535    // TODO: prove two termination signals can be fused into a single counter?
    36     mPipelineKernel->addInternalScalar(b->getInt1Ty(), name + TERMINATION_SIGNAL);
     36    mPipelineKernel->addInternalScalar(sizeTy, name + TERMINATION_SIGNAL);
    3737    // TODO: non deferred item count for fixed rates could be calculated from seg no.
    3838    mPipelineKernel->addInternalScalar(sizeTy, name + LOGICAL_SEGMENT_NO_SCALAR);
     
    565565    Value * const terminated = b->getScalarField(prefix + TERMINATION_SIGNAL);
    566566    b->setKernel(mKernel);
    567     return terminated;
     567    return b->CreateICmpNE(terminated, b->getSize(0));
    568568}
    569569
     
    574574    const auto prefix = makeKernelName(mKernelIndex);
    575575    b->setKernel(mPipelineKernel);
    576     b->setScalarField(prefix + TERMINATION_SIGNAL, value);
     576    b->setScalarField(prefix + TERMINATION_SIGNAL, b->CreateZExtOrTrunc(value, b->getSizeTy()));
    577577    #ifdef PRINT_DEBUG_MESSAGES
    578578    b->CallPrintInt("*** " + prefix + "_terminated ***", value);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_compiler.hpp

    r6240 r6241  
    195195
    196196    LLVM_READNONE StructType * getLocalStateType(BuilderRef b);
    197     Value * allocateThreadLocalState(BuilderRef b, StructType * localStateType);
     197    void allocateThreadLocalState(BuilderRef b, Value * const localState);
    198198    void setThreadLocalState(BuilderRef b, Value * const localState);
    199199    void deallocateThreadLocalState(BuilderRef b, Value * const localState);
     
    284284    LLVM_READNONE unsigned getPopCountReferenceBuffer(const Kernel * kernel, const ProcessingRate & rate) const;
    285285
    286     StructType * getPopCountThreadLocalStateType(BuilderRef b);
     286    LLVM_READNONE StructType * getPopCountThreadLocalStateType(BuilderRef b);
    287287    void allocatePopCountArrays(BuilderRef b, Value * base);
    288288    void deallocatePopCountArrays(BuilderRef b, Value * base);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_logic.hpp

    r6240 r6241  
    1010 ** ------------------------------------------------------------------------------------------------------------- */
    1111void PipelineCompiler::generateSingleThreadKernelMethod(BuilderRef b) {
    12     StructType * const localStateType = getLocalStateType(b);
    13     Value * const localState = allocateThreadLocalState(b, localStateType);
     12    Value * const localState = b->CreateCacheAlignedAlloca(getLocalStateType(b));
     13    allocateThreadLocalState(b, localState);
    1414    setThreadLocalState(b, localState);
    1515    start(b, b->getSize(0));
     
    6868    Value * const processState = allocateThreadState(b, 0);
    6969    b->CreateCall(threadFunc, b->CreatePointerCast(processState, voidPtrTy));
    70     deallocateThreadLocalState(b, processState);
     70    deallocateThreadState(b, processState);
    7171
    7272    // wait for all other threads to complete
     
    7575        Value * threadId = b->CreateLoad(threadIdPtr[i]);
    7676        b->CreatePThreadJoinCall(threadId, status);
    77         deallocateThreadLocalState(b, threadState[i]);
     77        deallocateThreadState(b, threadState[i]);
    7878    }
    7979
     
    118118 ** ------------------------------------------------------------------------------------------------------------- */
    119119inline StructType * PipelineCompiler::getThreadStateType(BuilderRef b) {
    120 
    121     StructType * const localStateTy = getLocalStateType(b);
    122120    std::vector<Type *> threadStructFields;
    123121    threadStructFields.push_back(mPipelineKernel->getHandle()->getType());
    124122    threadStructFields.push_back(b->getSizeTy());
    125     threadStructFields.push_back(localStateTy->getPointerTo());
     123    threadStructFields.push_back(getLocalStateType(b));
    126124    const auto numOfInputs = mPipelineKernel->getNumOfStreamInputs();
    127125    for (unsigned i = 0; i < numOfInputs; ++i) {
     
    137135    }
    138136    return StructType::get(b->getContext(), threadStructFields);
    139 
    140 }
     137}
     138
     139enum : int {
     140    POP_COUNT_STRUCT_INDEX = 0
     141};
    141142
    142143/** ------------------------------------------------------------------------------------------------------------- *
     
    154155    b->CreateStore(mPipelineKernel->getHandle(), b->CreateGEP(threadState, {ZERO, HANDLE}));
    155156    b->CreateStore(b->getSize(segOffset), b->CreateGEP(threadState, {ZERO, SEG_OFFSET}));
    156     StructType * const localStateTy = getLocalStateType(b);
    157     Value * const localState = allocateThreadLocalState(b, localStateTy);
    158     b->CreateStore(localState, b->CreateGEP(threadState, {ZERO, LOCAL_STATE}));
     157    allocateThreadLocalState(b, b->CreateGEP(threadState, {ZERO, LOCAL_STATE}));
     158
    159159    const auto numOfInputs = mPipelineKernel->getNumOfStreamInputs();
    160160    for (unsigned i = 0; i < numOfInputs; ++i) {
     
    187187    mPipelineKernel->setHandle(b, handle);
    188188    Value * const segmentOffset = b->CreateLoad(b->CreateGEP(threadState, {ZERO, SEG_OFFSET}));
    189     setThreadLocalState(b, b->CreateLoad(b->CreateGEP(threadState, {ZERO, LOCAL_STATE})));
     189    setThreadLocalState(b, b->CreateGEP(threadState, {ZERO, LOCAL_STATE}));
    190190    const auto numOfInputs = mPipelineKernel->getNumOfStreamInputs();
    191191    for (unsigned i = 0; i < numOfInputs; ++i) {
     
    213213}
    214214
    215 
    216 enum : int {
    217     POP_COUNT_STRUCT_INDEX = 0
    218 };
    219 
    220215/** ------------------------------------------------------------------------------------------------------------- *
    221216 * @brief getLocalStateType
     
    229224 * @brief allocateThreadLocalState
    230225 ** ------------------------------------------------------------------------------------------------------------- */
    231 inline Value * PipelineCompiler::allocateThreadLocalState(BuilderRef b, StructType * localStateType) {
    232     Value * const localState = b->CreateCacheAlignedAlloca(localStateType);
     226inline void PipelineCompiler::allocateThreadLocalState(BuilderRef b, Value * const localState) {
    233227    Constant * const ZERO = b->getInt32(0);
    234228    Constant * const POP_COUNT_STRUCT = b->getInt32(POP_COUNT_STRUCT_INDEX);
    235229    allocatePopCountArrays(b, b->CreateGEP(localState, {ZERO, POP_COUNT_STRUCT}));
    236     return localState;
    237230}
    238231
     
    243236    Constant * const ZERO = b->getInt32(0);
    244237    Constant * const POP_COUNT_STRUCT = b->getInt32(POP_COUNT_STRUCT_INDEX);
     238    assert (localState->getType()->getPointerElementType() == getLocalStateType(b));
    245239    mPopCountState = b->CreateGEP(localState, {ZERO, POP_COUNT_STRUCT});
     240    assert (mPopCountState->getType()->getPointerElementType() == getPopCountThreadLocalStateType(b));
    246241}
    247242
     
    252247    Constant * const ZERO = b->getInt32(0);
    253248    Constant * const POP_COUNT_STRUCT = b->getInt32(POP_COUNT_STRUCT_INDEX);
     249    assert (localState->getType()->getPointerElementType() == getLocalStateType(b));
    254250    deallocatePopCountArrays(b, b->CreateGEP(localState, {ZERO, POP_COUNT_STRUCT}));
    255251}
  • icGREP/icgrep-devel/icgrep/kernels/source_kernel.cpp

    r6230 r6241  
    128128    Value * unconsumedBytes = b->CreateSub(readEndInt, readStartInt);
    129129    unconsumedBytes = b->CreateTrunc(unconsumedBytes, b->getSizeTy());
    130 
    131130    Value * const bufferSize = b->CreateRoundUp(b->CreateAdd(unconsumedBytes, PADDING_SIZE), STRIDE_SIZE);
    132131    Value * const buffer = b->CreateAlignedMalloc(bufferSize, b->getCacheAlignment());
    133 
    134132    b->CreateMemCpy(buffer, readStart, unconsumedBytes, 1);
    135133    b->CreateMemZero(b->CreateGEP(buffer, unconsumedBytes), b->CreateSub(bufferSize, unconsumedBytes), 1);
     
    140138    Value * const offsettedBuffer = b->CreateGEP(buffer, diff);
    141139    PointerType * const codeUnitPtrTy = b->getIntNTy(codeUnitWidth)->getPointerTo();
    142     // set the original base address as the buffer address.
    143     //b->setScalarField("buffer", b->CreatePointerCast(base, codeUnitPtrTy));
    144140    b->setScalarField("ancillaryBuffer", b->CreatePointerCast(buffer, codeUnitPtrTy));
    145141    b->setBaseAddress("sourceBuffer", b->CreatePointerCast(offsettedBuffer, codeUnitPtrTy));
    146142    b->setTerminationSignal();
     143
     144
    147145    BasicBlock * const terminationExit = b->GetInsertBlock();
    148146    b->CreateBr(exit);
     
    419417    newProducedItems->addIncoming(nextProducedItems, entry);
    420418    newProducedItems->addIncoming(fileItems, terminationExit);
    421     b->setProducedItemCount("sourceBuffer", newProducedItems);   
     419    b->setProducedItemCount("sourceBuffer", newProducedItems);
    422420}
    423421
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r6228 r6241  
    2525    }
    2626    b->CreateWriteCall(b->getInt32(STDOUT_FILENO), codeUnitBuffer, bytesToDo);
     27}
     28
     29void StdOutKernel::generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) {
     30    b->CreateFSync(b->getInt32(STDOUT_FILENO));
    2731}
    2832
     
    110114    BasicBlock * const exit = b->CreateBasicBlock("exit");
    111115    Value * const temporaryFileName = b->getScalarField("temporaryFileName");
     116    Value * const fileDescriptor = b->getScalarField("fileDescriptor");
     117    b->CreateFSync(fileDescriptor);
    112118    b->CreateLikelyCondBr(b->CreateIsNotNull(temporaryFileName), hasTemporaryFile, exit);
    113119
    114120    b->SetInsertPoint(hasTemporaryFile);
    115     Value * const fileDescriptor = b->getScalarField("fileDescriptor");
    116121    b->CreateCloseCall(fileDescriptor);
    117122    Value * const fileName = b->getScalarField("fileName");
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.h

    r6184 r6241  
    1717private:
    1818    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) override;
     19    void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) override;
    1920private:
    2021    const unsigned mCodeUnitWidth;
    21    
     22
    2223};
    2324
    2425class FileSink final : public SegmentOrientedKernel {
    25 public: 
     26public:
    2627    FileSink(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Scalar * outputFileName, StreamSet * codeUnitBuffer);
    2728protected:
     
    3132private:
    3233    const unsigned mCodeUnitWidth;
    33    
     34
    3435};
    3536}
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r6228 r6241  
    193193}
    194194
     195LLVM_READNONE inline ConstantPointerNull * nullPointerFor(Value * ptr) {
     196    return ConstantPointerNull::get(cast<PointerType>(ptr->getType()));
     197}
     198
    195199void StaticBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
    196     b->CreateFree(getBaseAddress(b.get()));
     200    Value * buffer = b->CreateLoad(mHandle);
     201    b->CreateFree(buffer);
     202    b->CreateStore(nullPointerFor(buffer), mHandle);
    197203}
    198204
     
    328334    Value * const handle = getHandle(b.get());
    329335    Value * priorAddressField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(PriorBaseAddress)});
    330     b->CreateFree(b->CreateLoad(priorAddressField));
     336    Value * priorAddress = b->CreateLoad(priorAddressField);
     337    b->CreateFree(priorAddress);
     338    b->CreateStore(nullPointerFor(priorAddress), priorAddressField);
    331339    Value * baseAddressField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
    332     b->CreateFree(b->CreateLoad(baseAddressField));
     340    Value * baseAddress = b->CreateLoad(baseAddressField);
     341    b->CreateFree(baseAddress);
     342    b->CreateStore(nullPointerFor(baseAddress), baseAddressField);
    333343}
    334344
Note: See TracChangeset for help on using the changeset viewer.