Changeset 6233 for icGREP


Ignore:
Timestamp:
Dec 15, 2018, 12:09:52 PM (2 months ago)
Author:
nmedfort
Message:

Moved termination signals into pipeline kernel

Location:
icGREP/icgrep-devel/icgrep
Files:
10 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r6228 r6233  
    352352    size = CreateZExtOrTrunc(size, sizeTy);
    353353    CallInst * const ptr = CreateCall(f, size);
    354     CreateAssert(ptr, "CreateMalloc: returned null pointer");
     354    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     355        CreateAssert(ptr, "CreateMalloc: returned null pointer");
     356    }
    355357    CreateMemZero(ptr, size, 1);
    356358    return ptr;
  • icGREP/icgrep-devel/icgrep/icgrep.files

    r6229 r6233  
    55grep_interface.cpp
    66grep_interface.h
    7 icgrep.cpp
    87idisa_test.cpp
    98lz4_decoder.cpp
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r6228 r6233  
    103103
    104104    IntegerType * const sizeTy = b->getSizeTy();
    105     PointerType * const sizePtrPtrTy = sizeTy->getPointerTo()->getPointerTo();
    106105
    107106    addInternalScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
    108     addInternalScalar(sizeTy, TERMINATION_SIGNAL);
    109107
    110108    // TODO: if we had a way of easily calculating the number of processed/produced items of the
     
    119117
    120118    // If an output is a managed buffer, we need to store both the buffer and a set of consumers.
    121     Type * const consumerSetTy = StructType::get(b->getContext(), {sizeTy, sizePtrPtrTy})->getPointerTo();
    122119    for (unsigned i = 0; i < numOfOutputStreams; ++i) {
    123120        const Binding & output = mOutputStreamSets[i];
     
    129126            Type * const handleTy = mStreamSetOutputBuffers[i]->getHandleType(b);
    130127            addInternalScalar(handleTy, output.getName() + BUFFER_HANDLE_SUFFIX);
    131             addInternalScalar(consumerSetTy, output.getName() + CONSUMER_SUFFIX);
    132128            addInternalScalar(sizeTy, output.getName() + CONSUMED_ITEM_COUNT_SUFFIX);
    133129        }
     
    191187    }
    192188
    193     FunctionType * const initType = FunctionType::get(b->getVoidTy(), params, false);
     189    FunctionType * const initType = FunctionType::get(b->getInt1Ty(), params, false);
    194190    Function * const initFunc = Function::Create(initType, GlobalValue::ExternalLinkage, getName() + INIT_SUFFIX, b->getModule());
    195191    initFunc->setCallingConv(CallingConv::C);
     
    231227        }
    232228    }
     229    // any kernel can set termination on initialization
     230    mTerminationSignalPtr = b->CreateAlloca(b->getInt1Ty(), nullptr, "terminationSignal");
     231    b->CreateStore(b->getFalse(), mTerminationSignalPtr);
    233232    generateInitializeMethod(b);
    234233    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
    235234        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
    236235    }
    237     b->CreateRetVoid();
     236    b->CreateRet(b->CreateLoad(mTerminationSignalPtr));
     237    mTerminationSignalPtr = nullptr;
     238
    238239    b->setKernel(storedKernel);
    239240    mHandle = storedHandle;
     
    248249    IntegerType * const sizeTy = b->getSizeTy();
    249250    PointerType * const sizePtrTy = sizeTy->getPointerTo();
    250     Type * const voidTy = b->getVoidTy();
    251251
    252252    std::vector<Type *> params;
     
    279279    }
    280280
    281     FunctionType * const doSegmentType = FunctionType::get(voidTy, params, false);
     281    Type * const retTy = canSetTerminateSignal() ? b->getInt1Ty() : b->getVoidTy();
     282    FunctionType * const doSegmentType = FunctionType::get(retTy, params, false);
    282283    Function * const doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, b->getModule());
    283284    doSegment->setCallingConv(CallingConv::C);
     
    399400    }
    400401
     402    // initialize the termination signal if this kernel can set it
     403    if (canSetTerminateSignal()) {
     404        mTerminationSignalPtr = b->CreateAlloca(b->getInt1Ty(), nullptr, "terminationSignal");
     405        b->CreateStore(b->getFalse(), mTerminationSignalPtr);
     406    }
     407
    401408    // Calculate and/or load the accessible and writable item counts. If they are unneeded,
    402409    // LLVM ought to recognize them as dead code and remove them.
     
    405412        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
    406413    }
    407     b->CreateRetVoid();
     414
     415    // return the termination signal (if one exists)
     416    if (canSetTerminateSignal()) {
     417        b->CreateRet(b->CreateLoad(mTerminationSignalPtr));
     418        mTerminationSignalPtr = nullptr;
     419    } else {
     420        b->CreateRetVoid();
     421    }
    408422
    409423    // Clean up all of the constructed buffers.
     
    758772    const auto f = mScalarMap.find(name);
    759773    if (LLVM_UNLIKELY(f == mScalarMap.end())) {
    760         assert (false && "could not find scalar!");
     774        assert (!"could not find scalar!");
    761775        report_fatal_error(getName() + " does not contain scalar: " + name);
    762776    }
     
    792806    const auto f = mStreamSetMap.find(name);
    793807    if (LLVM_UNLIKELY(f == mStreamSetMap.end())) {
    794         assert (!mStreamSetMap.empty());
     808        assert (!"could not find stream set!");
    795809        report_fatal_error(getName() + " does not contain stream set " + name);
    796810    }
     
    10371051, mCurrentMethod(nullptr)
    10381052, mStride(0)
     1053, mTerminationSignalPtr(nullptr)
    10391054, mIsFinal(nullptr)
    10401055, mNumOfStrides(nullptr)
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r6228 r6233  
    3939const static std::string CONSUMED_ITEM_COUNT_SUFFIX = "_consumedItemCount";
    4040const static std::string NON_DEFERRED_ITEM_COUNT_SUFFIX = "_nonDeferredItemCount";
    41 const static std::string TERMINATION_SIGNAL = "terminationSignal";
    4241const static std::string BUFFER_HANDLE_SUFFIX = "_buffer";
    43 const static std::string CONSUMER_SUFFIX = "_consumerLocks";
    4442const static std::string CYCLECOUNT_SCALAR = "CPUcycles";
    4543
     
    434432    llvm::Value * getPopCountRateItemCount(const std::unique_ptr<KernelBuilder> & b, const ProcessingRate & rate, llvm::Value * const strideIndex);
    435433
     434    LLVM_READNONE bool canSetTerminateSignal() const {
     435        return hasAttribute(Attribute::KindId::CanTerminateEarly) || hasAttribute(Attribute::KindId::MustExplicitlyTerminate);
     436    }
     437
     438    llvm::Value * getTerminationSignalPtr() const {
     439        return mTerminationSignalPtr;
     440    }
     441
     442    llvm::Value * isFinal() const {
     443        return mIsFinal;
     444    }
     445
    436446    // Constructor
    437447    Kernel(std::string && kernelName,
     
    483493    unsigned                        mStride;
    484494
     495    llvm::Value *                   mTerminationSignalPtr;
    485496    llvm::Value *                   mIsFinal;
    486497    llvm::Value *                   mNumOfStrides;
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r6228 r6233  
    124124 ** ------------------------------------------------------------------------------------------------------------- */
    125125Value * KernelBuilder::getTerminationSignal() {
    126     return CreateICmpNE(getScalarField(TERMINATION_SIGNAL), getSize(0));
     126    Value * const ptr = mKernel->getTerminationSignalPtr();
     127    if (ptr) {
     128        return CreateLoad(ptr);
     129    } else {
     130        return getFalse();
     131    }
    127132}
    128133
     
    131136 ** ------------------------------------------------------------------------------------------------------------- */
    132137void KernelBuilder::setTerminationSignal(Value * const value) {
     138    assert (value);
    133139    assert (value->getType() == getInt1Ty());
    134140    if (codegen::DebugOptionIsSet(codegen::TraceCounts)) {
    135141        CallPrintInt(mKernel->getName() + ": setTerminationSignal", value);
    136142    }
    137     setScalarField(TERMINATION_SIGNAL, CreateZExt(value, getSizeTy()));
    138 }
    139 
    140 Value * KernelBuilder::getConsumerLock(const std::string & name) {
    141     return getScalarField(name + CONSUMER_SUFFIX);
    142 }
    143 
    144 void KernelBuilder::setConsumerLock(const std::string & name, Value * const value) {
    145     setScalarField(name + CONSUMER_SUFFIX, value);
     143    Value * const ptr = mKernel->getTerminationSignalPtr();
     144    if (LLVM_UNLIKELY(ptr == nullptr)) {
     145        llvm::report_fatal_error(mKernel->getName() + " does not have CanTerminateEarly or MustExplicitlyTerminate set.");
     146    }
     147    CreateStore(value, ptr);
    146148}
    147149
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.h

    r6228 r6233  
    163163    llvm::CallInst * createDoSegmentCall(const std::vector<llvm::Value *> & args);
    164164
    165     llvm::Value * getConsumerLock(const std::string & name);
    166 
    167     void setConsumerLock(const std::string & name, llvm::Value * value);
    168 
    169165    const Kernel * getKernel() const {
    170166        return mKernel;
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/core_logic.hpp

    r6228 r6233  
    11#include "pipeline_compiler.hpp"
     2
     3const static std::string TERMINATION_SIGNAL = "terminationSignal";
    24
    35namespace kernel {
     
    1012    const auto numOfKernels = mPipeline.size();
    1113    b->setKernel(mPipelineKernel);
     14    IntegerType * const boolTy = b->getInt1Ty();
    1215    for (unsigned i = 0; i < numOfKernels; ++i) {
     16        // TODO: prove two termination signals can be fused into a single counter?
     17        const auto prefix = makeKernelName(i);
     18        mPipelineKernel->addInternalScalar(boolTy, prefix + TERMINATION_SIGNAL);
    1319        addBufferHandlesToPipelineKernel(b, i);
    1420        addPopCountScalarsToPipelineKernel(b, i);
    1521    }
     22    b->setKernel(mPipelineKernel);
    1623}
    1724
     
    5057        }
    5158        b->setKernel(mKernel);
    52         b->CreateCall(getInitializationFunction(b), args);
     59        Value * const terminatedOnInit = b->CreateCall(getInitializationFunction(b), args);
     60        if (mKernel->canSetTerminateSignal()) {
     61            setTerminated(b, terminatedOnInit);
     62        }
    5363    }
    5464}
     
    106116    /// KERNEL ENTRY
    107117    /// -------------------------------------------------------------------------------------
    108     Value * const term = b->getTerminationSignal();
     118    Value * const initiallyTerminated = getInitialTerminationSignal(b);
    109119    #ifdef PRINT_DEBUG_MESSAGES
    110120    if (1) {
    111121    Constant * const MAX_INT = ConstantInt::getAllOnesValue(mSegNo->getType());
    112     Value * const round = b->CreateSelect(term, MAX_INT, mSegNo);
     122    Value * const round = b->CreateSelect(initiallyTerminated, MAX_INT, mSegNo);
    113123    b->CallPrintInt("--- " + kernelName + "_start ---", round);
    114124    }
    115125    #endif
    116     b->CreateUnlikelyCondBr(term, mKernelExit, checkProducers);
     126    b->CreateUnlikelyCondBr(initiallyTerminated, mKernelExit, checkProducers);
    117127
    118128    /// -------------------------------------------------------------------------------------
     
    218228    // If the kernel itself terminates, it must set the final processed/produced item counts.
    219229    // Otherwise, the pipeline will update any countable rates, even upon termination.
    220     b->CreateUnlikelyCondBr(terminatedExplicitly(b), terminated, incrementItemCounts);
     230    b->CreateUnlikelyCondBr(mTerminationExplicitly, terminated, incrementItemCounts);
    221231
    222232    /// -------------------------------------------------------------------------------------
     
    254264    b->SetInsertPoint(terminated);
    255265    zeroFillPartiallyWrittenOutputStreams(b);
    256     setTerminated(b);
     266    setTerminated(b, b->getTrue());
    257267    BasicBlock * const kernelTerminatedEnd = b->GetInsertBlock();
    258268    mTerminatedPhi->addIncoming(b->getTrue(), kernelTerminatedEnd);
     
    327337 ** ------------------------------------------------------------------------------------------------------------- */
    328338void PipelineCompiler::end(BuilderRef b, const unsigned step) {
    329 
     339    b->setKernel(mPipelineKernel);
    330340    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    331341        ConstantInt * const ZERO = b->getSize(0);
     
    351361        // TODO: not a very elegant way here; revise
    352362        const auto bufferVertex = source(e, mBufferGraph);
    353         mKernelIndex = parent(bufferVertex, mBufferGraph);
    354         mKernel = mPipeline[mKernelIndex];
     363        setActiveKernel(b, parent(bufferVertex, mBufferGraph));
    355364        resetMemoizedFields();
    356365        const auto outputPort = mBufferGraph[e].Port;
     
    364373        notEnoughSpace = b->CreateOr(b->CreateICmpULT(writable, strideLength), notEnoughSpace);
    365374    }
     375    b->setKernel(mPipelineKernel);
    366376    Value * const done = b->CreateOr(allTerminated, notEnoughSpace);
    367377    #ifdef PRINT_DEBUG_MESSAGES
     
    491501}
    492502
    493 #warning TODO: move termination variables into pipeline if the kernel cannot signal termination itself.
    494 
    495 /** ------------------------------------------------------------------------------------------------------------- *
    496  * @brief terminatedExplicitly
    497  ** ------------------------------------------------------------------------------------------------------------- */
    498 Value * PipelineCompiler::terminatedExplicitly(BuilderRef b) const {
    499     if (LLVM_UNLIKELY(mKernel->hasAttribute(AttrId::MustExplicitlyTerminate) || mKernel->hasAttribute(AttrId::CanTerminateEarly))) {
    500         return b->getTerminationSignal();
    501     } else {
    502         return b->getFalse();
    503     }
     503/** ------------------------------------------------------------------------------------------------------------- *
     504 * @brief getInitialTerminationSignal
     505 ** ------------------------------------------------------------------------------------------------------------- */
     506inline Value * PipelineCompiler::getInitialTerminationSignal(BuilderRef b) const {
     507    b->setKernel(mPipelineKernel);
     508    const auto prefix = makeKernelName(mKernelIndex);
     509    Value * const terminated = b->getScalarField(prefix + TERMINATION_SIGNAL);
     510    b->setKernel(mKernel);
     511    return terminated;
    504512}
    505513
     
    507515 * @brief setTerminated
    508516 ** ------------------------------------------------------------------------------------------------------------- */
    509 inline void PipelineCompiler::setTerminated(BuilderRef b) {
    510     if (LLVM_UNLIKELY(mKernel->hasAttribute(AttrId::MustExplicitlyTerminate))) {
    511         return;
    512     }
     517inline void PipelineCompiler::setTerminated(BuilderRef b, Value * const value) {
    513518    const auto prefix = makeKernelName(mKernelIndex);
    514     BasicBlock * const terminationIsSet = b->CreateBasicBlock(prefix + "_terminationIsSet", mKernelLoopExit);
    515     if (mKernel->hasAttribute(AttrId::CanTerminateEarly)) {
    516         BasicBlock * const setTermination = b->CreateBasicBlock(prefix + "_setTermination", terminationIsSet);
    517         Value * const terminated = b->getTerminationSignal();
    518         b->CreateCondBr(terminated, terminationIsSet, setTermination);
    519 
    520         b->SetInsertPoint(setTermination);
    521     }
    522     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
    523         b->CreateMProtect(mKernel->getHandle(), CBuilder::Protect::WRITE);
    524     }
    525     b->setTerminationSignal();
    526     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
    527         b->CreateMProtect(mKernel->getHandle(), CBuilder::Protect::READ);
    528     }
     519    b->setKernel(mPipelineKernel);
     520    b->setScalarField(prefix + TERMINATION_SIGNAL, value);
    529521    #ifdef PRINT_DEBUG_MESSAGES
    530     b->CallPrintInt("*** " + prefix + "_terminated ***", b->getTrue());
     522    b->CallPrintInt("*** " + prefix + "_terminated ***", value);
    531523    #endif
    532     b->CreateBr(terminationIsSet);
    533 
    534     b->SetInsertPoint(terminationIsSet);
     524    b->setKernel(mKernel);
    535525}
    536526
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/kernel_logic.hpp

    r6228 r6233  
    509509    #endif
    510510
    511     b->CreateCall(getDoSegmentFunction(b), arguments);
     511
     512    mTerminationExplicitly = b->CreateCall(getDoSegmentFunction(b), arguments);
     513    if (LLVM_LIKELY(mTerminationExplicitly->getType()->isVoidTy())) {
     514        mTerminationExplicitly = b->getFalse();
     515    }
    512516
    513517    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
     
    659663        Constant * const strideSize = b->getSize(ceiling(lb * mKernel->getStride()));
    660664        Value * hasEnough = b->CreateICmpULE(itemCount, strideSize);
    661         hasEnough = b->CreateOr(hasEnough, terminatedExplicitly(b));
     665        hasEnough = b->CreateOr(hasEnough, mTerminationExplicitly);
    662666        b->CreateAssert(hasEnough, prefix + " " + label + " fewer items than expected");
    663667    }
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_compiler.hpp

    r6228 r6233  
    240240    Value * truncateBlockSize(BuilderRef b, const Binding & binding, Value * itemCount, Value * all) const;
    241241    Value * getTotalItemCount(BuilderRef b, const unsigned inputPort) const;
    242     Value * terminatedExplicitly(BuilderRef b) const;
    243242    Value * hasProducerTerminated(BuilderRef b, const unsigned inputPort) const;
    244     void setTerminated(BuilderRef b);
     243    Value * getInitialTerminationSignal(BuilderRef b) const;
     244    void setTerminated(BuilderRef b, Value * const terminated);
    245245    void resetMemoizedFields();
    246246
     
    397397    // kernel state
    398398    Value *                                     mNumOfLinearStrides = nullptr;
     399    Value *                                     mTerminationExplicitly = nullptr;
    399400
    400401    std::vector<unsigned>                       mPortOrdering;
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_logic.hpp

    r6228 r6233  
    3737    IntegerType * const sizeTy = b->getSizeTy();
    3838    PointerType * const voidPtrTy = b->getVoidPtrTy();
    39     Constant * const nullVoidPtrVal = ConstantPointerNull::getNullValue(voidPtrTy);
    40     codegen::BufferSegments = std::max(codegen::BufferSegments, codegen::ThreadNum);
    4139
    42     Value * const instance = mPipelineKernel->getHandle(); assert (instance);
     40    ConstantInt * const ZERO = b->getInt32(0);
     41    ConstantInt * const ONE = b->getInt32(1);
     42    ConstantInt * const TWO = b->getInt32(2);
    4343
    44     StructType * const threadStructType = StructType::get(m->getContext(), {instance->getType(), sizeTy, voidPtrTy});
     44    // store where we'll resume compiling the DoSegment method
     45    const auto resumePoint = b->saveIP();
     46    Value * const handle = mPipelineKernel->getHandle(); assert (handle);
     47    StructType * const threadStructType = StructType::get(m->getContext(), {handle->getType(), sizeTy, voidPtrTy});
     48    FunctionType * const threadFuncType = FunctionType::get(voidPtrTy, {voidPtrTy}, false);
    4549
    46     FunctionType * const threadFuncType = FunctionType::get(b->getVoidTy(), {voidPtrTy}, false);
    47     Function * const threadFunc = Function::Create(threadFuncType, Function::InternalLinkage, "internal_thread", b->getModule());
     50    const auto threadName = mPipelineKernel->getName() + "_DoSegmentThread";
     51    Function * const threadFunc = Function::Create(threadFuncType, Function::InternalLinkage, threadName, b->getModule());
    4852    threadFunc->setCallingConv(CallingConv::C);
    4953    auto args = threadFunc->arg_begin();
    5054    args->setName("kernelStateObject");
    51 
    52     // -------------------------------------------------------------------------------------------------------------------------
    53     // MAKE PIPELINE DRIVER
    54     // -------------------------------------------------------------------------------------------------------------------------
    55     const unsigned threads = numOfThreads - 1;
    56     Type * const pthreadsTy = ArrayType::get(sizeTy, threads);
    57     AllocaInst * const pthreads = b->CreateAlloca(pthreadsTy);
    58     Value * threadIdPtr[threads];
    59     ConstantInt * const ZERO = b->getInt32(0);
    60     for (unsigned i = 0; i < threads; ++i) {
    61         threadIdPtr[i] = b->CreateGEP(pthreads, {ZERO, b->getInt32(i)});
    62     }
    63     // use the process thread to handle the initial segment function after spawning
    64     // (n - 1) threads to handle the subsequent offsets
    65     ConstantInt * const ONE = b->getInt32(1);
    66     ConstantInt * const TWO = b->getInt32(2);
    67     Value * localState[threads];
    68     for (unsigned i = 0; i < threads; ++i) {
    69         AllocaInst * const threadState = b->CreateAlloca(threadStructType);
    70         b->CreateStore(instance, b->CreateGEP(threadState, {ZERO, ZERO}));
    71         b->CreateStore(b->getSize(i + 1), b->CreateGEP(threadState, {ZERO, ONE}));
    72         localState[i] = allocateThreadLocalSpace(b);
    73         b->CreateStore(localState[i], b->CreateGEP(threadState, {ZERO, TWO}));
    74         b->CreatePThreadCreateCall(threadIdPtr[i], nullVoidPtrVal, threadFunc, threadState);
    75     }
    76 
    77     AllocaInst * const threadState = b->CreateAlloca(threadStructType);
    78     b->CreateStore(instance, b->CreateGEP(threadState, {ZERO, ZERO}));
    79     b->CreateStore(b->getSize(0), b->CreateGEP(threadState, {ZERO, ONE}));
    80     b->CreateCall(threadFunc, b->CreatePointerCast(threadState, voidPtrTy));
    81 
    82     AllocaInst * const status = b->CreateAlloca(voidPtrTy);
    83     for (unsigned i = 0; i < threads; ++i) {
    84         Value * threadId = b->CreateLoad(threadIdPtr[i]);
    85         b->CreatePThreadJoinCall(threadId, status);
    86         deallocateThreadLocalSpace(b, localState[i]);
    87     }
    88     b->CreateRetVoid();
    8955
    9056    // -------------------------------------------------------------------------------------------------------------------------
     
    11177    b->CreateCondBr(b->CreateIsNull(segmentOffset), exitFunction, exitThread);
    11278    b->SetInsertPoint(exitThread);
     79    Constant * const nullVoidPtrVal = ConstantPointerNull::getNullValue(voidPtrTy);
    11380    b->CreatePThreadExitCall(nullVoidPtrVal);
    11481    b->CreateBr(exitFunction);
    11582    b->SetInsertPoint(exitFunction);
     83    b->CreateRet(nullVoidPtrVal);
     84
     85    // -------------------------------------------------------------------------------------------------------------------------
     86    // MAKE PIPELINE DRIVER
     87    // -------------------------------------------------------------------------------------------------------------------------
     88    b->restoreIP(resumePoint);
     89    mPipelineKernel->setHandle(b, handle);
     90    const unsigned threads = numOfThreads - 1;
     91    Type * const pthreadsTy = ArrayType::get(sizeTy, threads);
     92    AllocaInst * const pthreads = b->CreateAlloca(pthreadsTy);
     93    Value * threadIdPtr[threads];
     94    for (unsigned i = 0; i < threads; ++i) {
     95        threadIdPtr[i] = b->CreateGEP(pthreads, {ZERO, b->getInt32(i)});
     96    }
     97    // use the process thread to handle the initial segment function after spawning
     98    // (n - 1) threads to handle the subsequent offsets
     99    Value * localState[threads];
     100    for (unsigned i = 0; i < threads; ++i) {
     101        AllocaInst * const threadState = b->CreateAlloca(threadStructType);
     102        b->CreateStore(handle, b->CreateGEP(threadState, {ZERO, ZERO}));
     103        b->CreateStore(b->getSize(i + 1), b->CreateGEP(threadState, {ZERO, ONE}));
     104        localState[i] = allocateThreadLocalSpace(b);
     105        b->CreateStore(localState[i], b->CreateGEP(threadState, {ZERO, TWO}));
     106        b->CreatePThreadCreateCall(threadIdPtr[i], nullVoidPtrVal, threadFunc, threadState);
     107    }
     108
     109    AllocaInst * const threadState = b->CreateAlloca(threadStructType);
     110    b->CreateStore(handle, b->CreateGEP(threadState, {ZERO, ZERO}));
     111    b->CreateStore(b->getSize(0), b->CreateGEP(threadState, {ZERO, ONE}));
     112    b->CreateCall(threadFunc, b->CreatePointerCast(threadState, voidPtrTy));
     113
     114    AllocaInst * const status = b->CreateAlloca(voidPtrTy);
     115    for (unsigned i = 0; i < threads; ++i) {
     116        Value * threadId = b->CreateLoad(threadIdPtr[i]);
     117        b->CreatePThreadJoinCall(threadId, status);
     118        deallocateThreadLocalSpace(b, localState[i]);
     119    }
     120
    116121
    117122}
Note: See TracChangeset for help on using the changeset viewer.