Changeset 6288


Ignore:
Timestamp:
Jan 19, 2019, 2:44:53 PM (3 months ago)
Author:
cameron
Message:

Repeat of prior check in

Location:
icGREP/icgrep-devel/icgrep
Files:
1 added
29 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r6275 r6288  
    7373SET(KERNEL_SRC kernels/attributes.cpp kernels/binding.cpp kernels/relationship.cpp kernels/processing_rate.cpp)
    7474SET(KERNEL_SRC ${KERNEL_SRC} kernels/kernel.cpp kernels/streamset.cpp kernels/multiblock_kernel.cpp kernels/block_kernel.cpp)
    75 SET(KERNEL_SRC ${KERNEL_SRC} kernels/pipeline/pipeline_kernel.cpp kernels/pipeline/pipeline_builder.cpp)
    76 SET(KERNEL_SRC ${KERNEL_SRC} kernels/optimizationbranch.cpp)
     75SET(KERNEL_SRC ${KERNEL_SRC} kernels/pipeline/pipeline_kernel.cpp)
     76SET(KERNEL_SRC ${KERNEL_SRC} kernels/optimizationbranch/optimizationbranch.cpp)
     77SET(KERNEL_SRC ${KERNEL_SRC} kernels/pipeline/pipeline_builder.cpp)
    7778SET(KERNEL_SRC ${KERNEL_SRC} kernels/callback.cpp)
    7879
     
    131132
    132133add_executable(icgrep icgrep.cpp grep_interface.cpp)
    133 add_executable(u8u16 u8u16.cpp)
     134add_executable(u8u16 u8u16.cpp kernels/zeroextend.cpp)
    134135add_executable(u32u8 u32u8.cpp kernels/pdep_kernel.cpp)
    135136add_executable(base64 base64.cpp kernels/radix64.cpp)
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r6261 r6288  
    163163#warning make a "CBuffer" class to abstract away the complexity of making these function typedefs.
    164164
    165 typedef void (*preprocessFunctionType)(char * output_data, size_t & output_produced, size_t output_size, const uint32_t fd);
     165typedef void (*preprocessFunctionType)(char * output_data, size_t * output_produced, size_t output_size, const uint32_t fd);
    166166
    167167static char * chStream;
     
    218218}
    219219
    220 #define ALIGNMENT (32UL)
    221 
    222 inline bool is_power_2(const unsigned n) {
     220#define ALIGNMENT (512 / 8)
     221
     222inline bool is_power_2(const size_t n) {
    223223    return ((n & (n - 1)) == 0) && n;
    224224}
    225225
    226 inline unsigned round_up_to(const unsigned x, const unsigned y) {
     226inline size_t round_up_to(const size_t x, const size_t y) {
    227227    assert(is_power_2(y));
    228228    return (x + y - 1) & -y;
     
    231231char * preprocess(preprocessFunctionType preprocess) {
    232232    std::string fileName = inputFiles[0];
    233     const int fd = open(inputFiles[0].c_str(), O_RDONLY);
     233    const auto fd = open(inputFiles[0].c_str(), O_RDONLY);
    234234    if (LLVM_UNLIKELY(fd == -1)) {
    235235        std::cerr << "Error: cannot open " << fileName << " for processing.\n";
     
    240240    // Given a 8-bit bytestream of length n, we need space for 4 bitstreams of length n ...
    241241    AlignedAllocator<char, ALIGNMENT> alloc;
    242     const auto n = round_up_to(size, 8 * ALIGNMENT);
     242    const size_t n = round_up_to(size, 8 * ALIGNMENT);
    243243    chStream = alloc.allocate((4 * n) / 8);
    244     size_t length;
    245     preprocess(chStream, length, n, fd);
     244    size_t length = 0;
     245    preprocess(chStream, &length, n, fd);
    246246    close(fd);
    247247    return chStream;
  • icGREP/icgrep-devel/icgrep/icgrep.files

    r6266 r6288  
    33kernels/optimizationbranch.cpp
    44kernels/optimizationbranch.h
     5kernels/optimizationbranch/optimizationbranch.cpp
     6kernels/optimizationbranch/optimizationbranch_compiler.hpp
    57kernels/pipeline/termination_logic.hpp
     8kernels/zeroextend.cpp
     9kernels/zeroextend.h
    610wc.cpp
    711base64.cpp
  • icGREP/icgrep-devel/icgrep/icgrep.includes

    r6193 r6288  
    2727pablo/passes
    2828kernels/lzparabix/decoder
     29kernels/optimizationbranch
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r6275 r6288  
    2828#include <llvm/Support/Format.h>
    2929#include <sstream>
     30#include <llvm/Support/raw_ostream.h>
    3031
    3132using namespace llvm;
     
    157158
    158159/** ------------------------------------------------------------------------------------------------------------- *
     160 * @brief generateKernel
     161 ** ------------------------------------------------------------------------------------------------------------- */
     162void Kernel::generateKernel(const std::unique_ptr<KernelBuilder> & b) {
     163    if (LLVM_UNLIKELY(mIsGenerated)) return;
     164    b->setKernel(this);
     165    b->setModule(mModule);
     166    addKernelDeclarations(b);
     167    callGenerateInitializeMethod(b);
     168    callGenerateDoSegmentMethod(b);
     169    callGenerateFinalizeMethod(b);
     170    addAdditionalFunctions(b);
     171    mIsGenerated = true;
     172}
     173
     174/** ------------------------------------------------------------------------------------------------------------- *
    159175 * @brief addInitializeDeclaration
    160176 ** ------------------------------------------------------------------------------------------------------------- */
    161 void Kernel::addInitializeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
     177inline void Kernel::addInitializeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
    162178
    163179    std::vector<Type *> params;
     
    187203 * @brief callGenerateInitializeMethod
    188204 ** ------------------------------------------------------------------------------------------------------------- */
    189 void Kernel::callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & b) {
     205inline void Kernel::callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & b) {
    190206    const Kernel * const storedKernel = b->getKernel();
    191207    b->setKernel(this);
     
    260276 * @brief addDoSegmentDeclaration
    261277 ** ------------------------------------------------------------------------------------------------------------- */
    262 void Kernel::addDoSegmentDeclaration(const std::unique_ptr<KernelBuilder> & b) {
    263 
    264     IntegerType * const sizeTy = b->getSizeTy();
    265     PointerType * const sizePtrTy = sizeTy->getPointerTo();
    266 
    267     std::vector<Type *> params;
    268     params.reserve(2 + mInputStreamSets.size() + mOutputStreamSets.size());
    269     if (LLVM_LIKELY(isStateful())) {
    270         params.push_back(mKernelStateType->getPointerTo());  // handle
    271     }
    272     params.push_back(sizeTy); // numOfStrides
    273     for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
    274         Type * const bufferType = mStreamSetInputBuffers[i]->getType();
    275         // logical base input address
    276         params.push_back(bufferType->getPointerTo());
    277         // processed input items
    278         const Binding & input = mInputStreamSets[i];
    279         if (isParamAddressable(input)) {
    280             params.push_back(sizePtrTy); // updatable
    281         }  else if (isParamConstant(input)) {
    282             params.push_back(sizeTy);  // constant
    283         }
    284         // accessible input items (after non-deferred processed item count)
    285         params.push_back(sizeTy);
    286         if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
    287             params.push_back(sizePtrTy);
    288         }
    289         if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
    290             params.push_back(sizePtrTy);
    291         }
    292     }
    293 
    294     const auto canTerminate = canSetTerminateSignal();
    295 
    296     for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
    297         const Binding & output = mOutputStreamSets[i];
    298         // logical base output address
    299         if (LLVM_LIKELY(!isLocalBuffer(output))) {
    300             Type * const bufferType = mStreamSetOutputBuffers[i]->getType();
    301             params.push_back(bufferType->getPointerTo());
    302         }
    303         // produced output items
    304         if (canTerminate || isParamAddressable(output)) {
    305             params.push_back(sizePtrTy); // updatable
    306         } else if (isParamConstant(output)) {
    307             params.push_back(sizeTy); // constant
    308         }
    309         // If this is a local buffer, the next param is its consumed item count;
    310         // otherwise it'll hold its writable output items.
    311         params.push_back(sizeTy);
    312     }
    313 
    314 
    315     Type * const retTy = canTerminate ? b->getInt1Ty() : b->getVoidTy();
    316     FunctionType * const doSegmentType = FunctionType::get(retTy, params, false);
     278inline void Kernel::addDoSegmentDeclaration(const std::unique_ptr<KernelBuilder> & b) {
     279
     280    Type * const retTy = canSetTerminateSignal() ? b->getInt1Ty() : b->getVoidTy();
     281    FunctionType * const doSegmentType = FunctionType::get(retTy, getDoSegmentFields(b), false);
    317282    Function * const doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, b->getModule());
    318283    doSegment->setCallingConv(CallingConv::C);
     
    354319}
    355320
    356 
    357 
    358 /** ------------------------------------------------------------------------------------------------------------- *
    359  * @brief callGenerateKernelMethod
    360  ** ------------------------------------------------------------------------------------------------------------- */
    361 void Kernel::callGenerateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
     321/** ------------------------------------------------------------------------------------------------------------- *
     322 * @brief getDoSegmentFields
     323 ** ------------------------------------------------------------------------------------------------------------- */
     324std::vector<Type *> Kernel::getDoSegmentFields(const std::unique_ptr<KernelBuilder> & b) const {
     325
     326    IntegerType * const sizeTy = b->getSizeTy();
     327    PointerType * const sizePtrTy = sizeTy->getPointerTo();
     328
     329    std::vector<Type *> fields;
     330    fields.reserve(2 + mInputStreamSets.size() + mOutputStreamSets.size());
     331    if (LLVM_LIKELY(isStateful())) {
     332        fields.push_back(mKernelStateType->getPointerTo());  // handle
     333    }
     334    fields.push_back(sizeTy); // numOfStrides
     335    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
     336        Type * const bufferType = mStreamSetInputBuffers[i]->getType();
     337        // logical base input address
     338        fields.push_back(bufferType->getPointerTo());
     339        // processed input items
     340        const Binding & input = mInputStreamSets[i];
     341        if (isParamAddressable(input)) {
     342            fields.push_back(sizePtrTy); // updatable
     343        }  else if (isParamConstant(input)) {
     344            fields.push_back(sizeTy);  // constant
     345        }
     346        // accessible input items (after non-deferred processed item count)
     347        fields.push_back(sizeTy);
     348        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
     349            fields.push_back(sizePtrTy);
     350        }
     351        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
     352            fields.push_back(sizePtrTy);
     353        }
     354    }
     355
     356    const auto canTerminate = canSetTerminateSignal();
     357
     358    for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
     359        const Binding & output = mOutputStreamSets[i];
     360        // logical base output address
     361        if (LLVM_LIKELY(!isLocalBuffer(output))) {
     362            Type * const bufferType = mStreamSetOutputBuffers[i]->getType();
     363            fields.push_back(bufferType->getPointerTo());
     364        }
     365        // produced output items
     366        if (canTerminate || isParamAddressable(output)) {
     367            fields.push_back(sizePtrTy); // updatable
     368        } else if (isParamConstant(output)) {
     369            fields.push_back(sizeTy); // constant
     370        }
     371        // If this is a local buffer, the next param is its consumed item count;
     372        // otherwise it'll hold its writable output items.
     373        fields.push_back(sizeTy);
     374    }
     375
     376    return fields;
     377}
     378
     379/** ------------------------------------------------------------------------------------------------------------- *
     380 * @brief callGenerateDoSegmentMethod
     381 ** ------------------------------------------------------------------------------------------------------------- */
     382inline void Kernel::callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) {
    362383
    363384    assert (mInputStreamSets.size() == mStreamSetInputBuffers.size());
     
    369390    mCurrentMethod = getDoSegmentFunction(b->getModule());
    370391    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
    371     auto args = mCurrentMethod->arg_begin();
    372     if (LLVM_LIKELY(isStateful())) {
    373         setHandle(b, &*(args++));
    374     }
    375     mNumOfStrides = &*(args++);
     392
     393    std::vector<Value *> args;
     394    args.reserve(mCurrentMethod->arg_size());
     395    for (Argument & arg : mCurrentMethod->getArgumentList()) {
     396        args.push_back(&arg);
     397    }
     398    setDoSegmentProperties(b, args);
     399
     400    generateKernelMethod(b);
     401
     402    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
     403        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
     404    }
     405
     406    const auto numOfInputs = getNumOfStreamInputs();
     407
     408    for (unsigned i = 0; i < numOfInputs; i++) {
     409        if (mUpdatableProcessedInputItemPtr[i]) {
     410            Value * const items = b->CreateLoad(mProcessedInputItemPtr[i]);
     411            b->CreateStore(items, mUpdatableProcessedInputItemPtr[i]);
     412        }
     413    }
     414
     415    const auto numOfOutputs = getNumOfStreamOutputs();
     416
     417    for (unsigned i = 0; i < numOfOutputs; i++) {
     418        if (mUpdatableProducedOutputItemPtr[i]) {
     419            Value * const items = b->CreateLoad(mProducedOutputItemPtr[i]);
     420            b->CreateStore(items, mUpdatableProducedOutputItemPtr[i]);
     421        }
     422    }
     423
     424    // return the termination signal (if one exists)
     425    if (mTerminationSignalPtr) {
     426        b->CreateRet(b->CreateLoad(mTerminationSignalPtr));
     427        mTerminationSignalPtr = nullptr;
     428    } else {
     429        b->CreateRetVoid();
     430    }
     431
     432    // Clean up all of the constructed buffers.
     433    b->setKernel(storedKernel);
     434    mHandle = storedHandle;
     435    mCurrentMethod = nullptr;
     436    mIsFinal = nullptr;
     437    mNumOfStrides = nullptr;
     438}
     439
     440/** ------------------------------------------------------------------------------------------------------------- *
     441 * @brief setDoSegmentProperties
     442 ** ------------------------------------------------------------------------------------------------------------- */
     443void Kernel::setDoSegmentProperties(const std::unique_ptr<KernelBuilder> & b, const std::vector<Value *> & args) {
     444
     445    initializeLocalScalarValues(b);
     446
     447    auto arg = args.begin();
     448    if (LLVM_LIKELY(isStateful())) {
     449        setHandle(b, *arg++);
     450    }
     451
     452    mNumOfStrides = *arg++;
    376453    mIsFinal = b->CreateIsNull(mNumOfStrides);
    377454    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
    378         b->CreateMProtect(mHandle,CBuilder::Protect::WRITE);
     455        b->CreateMProtect(mHandle, CBuilder::Protect::WRITE);
    379456    }
    380457
     
    384461
    385462    const auto numOfInputs = getNumOfStreamInputs();
     463
    386464    reset(mProcessedInputItemPtr, numOfInputs);
    387465    reset(mAccessibleInputItems, numOfInputs);
     
    389467    reset(mPopCountRateArray, numOfInputs);
    390468    reset(mNegatedPopCountRateArray, numOfInputs);
    391     std::vector<Value *> updatableProcessedInputItems;
    392     reset(updatableProcessedInputItems, numOfInputs);
     469    reset(mUpdatableProcessedInputItemPtr, numOfInputs);
    393470
    394471    IntegerType * const sizeTy = b->getSizeTy();
     
    399476        /// ----------------------------------------------------
    400477        const Binding & input = mInputStreamSets[i];
    401         assert (args != mCurrentMethod->arg_end());
    402         Value * const addr = &*(args++);
     478        assert (arg != args.end());
     479        Value * const addr = *arg++;
    403480        auto & buffer = mStreamSetInputBuffers[i];
    404481        Value * const localHandle = b->CreateAlloca(buffer->getHandleType(b));
     
    415492        Value * processed = nullptr;
    416493        if (isParamAddressable(input)) {
    417             assert (args != mCurrentMethod->arg_end());
    418             updatableProcessedInputItems[i] = &*(args++);
    419             processed = b->CreateLoad(updatableProcessedInputItems[i]);
     494            assert (arg != args.end());
     495            mUpdatableProcessedInputItemPtr[i] = *arg++;
     496            processed = b->CreateLoad(mUpdatableProcessedInputItemPtr[i]);
    420497        } else if (LLVM_LIKELY(isParamConstant(input))) {
    421             assert (args != mCurrentMethod->arg_end());
    422             processed = &*(args++);
     498            assert (arg != args.end());
     499            processed = *arg++;
    423500        } else { // isRelative
    424501            const ProcessingRate & rate = input.getRate();
     
    436513        /// accessible item count
    437514        /// ----------------------------------------------------
    438         assert (args != mCurrentMethod->arg_end());
    439         Value * const accessible = &*(args++);
     515        assert (arg != args.end());
     516        Value * const accessible = *arg++;
    440517        mAccessibleInputItems[i] = accessible;
    441518        Value * capacity = b->CreateAdd(processed, accessible);
     
    447524
    448525        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
    449             assert (args != mCurrentMethod->arg_end());
    450             mPopCountRateArray[i] = &*(args++);
     526            assert (arg != args.end());
     527            mPopCountRateArray[i] = *arg++;
    451528        }
    452529
    453530        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
    454             assert (args != mCurrentMethod->arg_end());
    455             mNegatedPopCountRateArray[i] = &*(args++);
     531            assert (arg != args.end());
     532            mNegatedPopCountRateArray[i] = *arg++;
    456533        }
    457534    }
     
    462539    reset(mWritableOutputItems, numOfOutputs);
    463540    reset(mConsumedOutputItems, numOfOutputs);
    464     std::vector<Value *> updatableProducedOutputItems;
    465     reset(updatableProducedOutputItems, numOfOutputs);
     541    reset(mUpdatableProducedOutputItemPtr, numOfOutputs);
    466542
    467543    const auto canTerminate = canSetTerminateSignal();
     
    480556            buffer->setHandle(b, handle);
    481557        } else {
    482             assert (args != mCurrentMethod->arg_end());
    483             Value * const logicalBaseAddress = &*(args++);
     558            assert (arg != args.end());
     559            Value * const logicalBaseAddress = *arg++;
    484560            Value * const localHandle = b->CreateAlloca(buffer->getHandleType(b));
    485561            buffer->setHandle(b, localHandle);
     
    491567        Value * produced = nullptr;
    492568        if (LLVM_LIKELY(canTerminate || isParamAddressable(output))) {
    493             assert (args != mCurrentMethod->arg_end());
    494             updatableProducedOutputItems[i] = &*(args++);
    495             produced = b->CreateLoad(updatableProducedOutputItems[i]);
     569            assert (arg != args.end());
     570            mUpdatableProducedOutputItemPtr[i] = *arg++;
     571            produced = b->CreateLoad(mUpdatableProducedOutputItemPtr[i]);
    496572        } else if (LLVM_LIKELY(isParamConstant(output))) {
    497             assert (args != mCurrentMethod->arg_end());
    498             produced = &*(args++);
     573            assert (arg != args.end());
     574            produced = *arg++;
    499575        } else { // isRelative
    500576
     
    517593        /// consumed or writable item count
    518594        /// ----------------------------------------------------
    519         Value * const arg = &*(args++);
     595        Value * const items = *arg++;
    520596        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
    521             mConsumedOutputItems[i] = arg;
     597            mConsumedOutputItems[i] = items;
    522598        } else {
    523             mWritableOutputItems[i] = arg;
    524             Value * const capacity = b->CreateAdd(produced, arg);
     599            mWritableOutputItems[i] = items;
     600            Value * const capacity = b->CreateAdd(produced, items);
    525601            buffer->setCapacity(b.get(), capacity);
    526602        }
    527 
    528     }
    529     assert (args == mCurrentMethod->arg_end());
     603    }
     604    assert (arg == args.end());
    530605
    531606    // initialize the termination signal if this kernel can set it
     
    536611    }
    537612
    538     initializeLocalScalarValues(b);
    539     generateKernelMethod(b);
    540 
    541     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
    542         b->CreateMProtect(mHandle, CBuilder::Protect::READ);
    543     }
    544 
     613}
     614
     615/** ------------------------------------------------------------------------------------------------------------- *
     616 * @brief getDoSegmentProperties
     617 *
     618 * Reverse of the setDoSegmentProperties operation; used by the PipelineKernel when constructing internal threads
     619 * to simplify passing of the state data.
     620 ** ------------------------------------------------------------------------------------------------------------- */
     621std::vector<Value *> Kernel::getDoSegmentProperties(const std::unique_ptr<KernelBuilder> & b) const {
     622
     623    std::vector<Value *> props;
     624    if (LLVM_LIKELY(isStateful())) {
     625        props.push_back(mHandle);
     626    }
     627    props.push_back(mNumOfStrides);
     628
     629    const auto numOfInputs = getNumOfStreamInputs();
    545630    for (unsigned i = 0; i < numOfInputs; i++) {
    546         if (updatableProcessedInputItems[i]) {
    547             Value * const items = b->CreateLoad(mProcessedInputItemPtr[i]);
    548             b->CreateStore(items, updatableProcessedInputItems[i]);
    549         }
    550     }
     631        /// ----------------------------------------------------
     632        /// logical buffer base address
     633        /// ----------------------------------------------------
     634        const auto & buffer = mStreamSetInputBuffers[i];
     635        props.push_back(buffer->getBaseAddress(b.get()));
     636        /// ----------------------------------------------------
     637        /// processed item count
     638        /// ----------------------------------------------------
     639        const Binding & input = mInputStreamSets[i];
     640        if (isParamAddressable(input)) {
     641            props.push_back(mProcessedInputItemPtr[i]);
     642        } else if (LLVM_LIKELY(isParamConstant(input))) {
     643            props.push_back(b->CreateLoad(mProcessedInputItemPtr[i]));
     644        }
     645        /// ----------------------------------------------------
     646        /// accessible item count
     647        /// ----------------------------------------------------
     648        props.push_back(mAccessibleInputItems[i]);
     649        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
     650            props.push_back(mPopCountRateArray[i]);
     651        }
     652        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
     653            props.push_back(mNegatedPopCountRateArray[i]);
     654        }
     655    }
     656
     657    // set all of the output buffers
     658    const auto numOfOutputs = getNumOfStreamOutputs();
     659    const auto canTerminate = canSetTerminateSignal();
    551660
    552661    for (unsigned i = 0; i < numOfOutputs; i++) {
    553         if (updatableProducedOutputItems[i]) {
    554             Value * const items = b->CreateLoad(mProducedOutputItemPtr[i]);
    555             b->CreateStore(items, updatableProducedOutputItems[i]);
    556         }
    557     }
    558 
    559     // return the termination signal (if one exists)
    560     if (mTerminationSignalPtr) {
    561         b->CreateRet(b->CreateLoad(mTerminationSignalPtr));
    562         mTerminationSignalPtr = nullptr;
    563     } else {
    564         b->CreateRetVoid();
    565     }
    566 
    567     // Clean up all of the constructed buffers.
    568     b->setKernel(storedKernel);
    569     mHandle = storedHandle;
    570     mCurrentMethod = nullptr;
    571     mIsFinal = nullptr;
    572     mNumOfStrides = nullptr;
     662        /// ----------------------------------------------------
     663        /// logical buffer base address
     664        /// ----------------------------------------------------
     665        const auto & buffer = mStreamSetOutputBuffers[i];
     666        const Binding & output = mOutputStreamSets[i];
     667        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
     668            // If an output is a managed buffer, the address is stored within the state instead
     669            // of being passed in through the function call.
     670            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
     671            props.push_back(handle);
     672        } else {
     673            props.push_back(buffer->getBaseAddress(b.get()));
     674        }
     675        /// ----------------------------------------------------
     676        /// produced item count
     677        /// ----------------------------------------------------
     678        if (LLVM_LIKELY(canTerminate || isParamAddressable(output))) {
     679            props.push_back(mProducedOutputItemPtr[i]);
     680        } else if (LLVM_LIKELY(isParamConstant(output))) {
     681            props.push_back(b->CreateLoad(mProducedOutputItemPtr[i]));
     682        }
     683        /// ----------------------------------------------------
     684        /// consumed or writable item count
     685        /// ----------------------------------------------------
     686        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
     687            props.push_back(mConsumedOutputItems[i]);
     688        } else {
     689            props.push_back(mWritableOutputItems[i]);
     690        }
     691    }
     692
     693    return props;
    573694}
    574695
     
    576697 * @brief addFinalizeDeclaration
    577698 ** ------------------------------------------------------------------------------------------------------------- */
    578 void Kernel::addFinalizeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
     699inline void Kernel::addFinalizeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
    579700    Type * resultType = nullptr;
    580701    if (mOutputScalars.empty()) {
     
    610731 * @brief callGenerateFinalizeMethod
    611732 ** ------------------------------------------------------------------------------------------------------------- */
    612 void Kernel::callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) {
     733inline void Kernel::callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) {
    613734
    614735    const Kernel * const storedKernel = b->getKernel();
     
    703824    Function * f = module->getFunction(name);
    704825    if (LLVM_UNLIKELY(f == nullptr)) {
    705         report_fatal_error("Cannot find " + name);
     826        llvm_unreachable("cannot find Initialize function");
    706827    }
    707828    return f;
     
    715836    Function * f = module->getFunction(name);
    716837    if (LLVM_UNLIKELY(f == nullptr)) {
    717         report_fatal_error("Cannot find " + name);
     838        llvm_unreachable("cannot find DoSegment function");
    718839    }
    719840    return f;
     
    727848    Function * f = module->getFunction(name);
    728849    if (LLVM_UNLIKELY(f == nullptr)) {
    729         report_fatal_error("Cannot find " + name);
     850        llvm_unreachable("cannot find Terminate function");
    730851    }
    731852    return f;
    732853}
     854
     855/** ------------------------------------------------------------------------------------------------------------- *
     856 * @brief isStateful
     857 ** ------------------------------------------------------------------------------------------------------------- */
     858LLVM_READNONE bool Kernel::isStateful() const {
     859    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
     860        llvm_unreachable("kernel state must be constructed prior to calling isStateful");
     861    }
     862    return !mKernelStateType->isEmptyTy();
     863}
     864
    733865
    734866/** ------------------------------------------------------------------------------------------------------------- *
     
    862994        }
    863995        return b->CreatePointerCast(handle, mKernelStateType->getPointerTo());
    864     } else {
    865         llvm_unreachable("createInstance should not be called on stateless kernels");
    866         return nullptr;
    867     }
     996    }
     997    llvm_unreachable("createInstance should not be called on stateless kernels");
     998    return nullptr;
    868999}
    8691000
     
    8811012
    8821013/** ------------------------------------------------------------------------------------------------------------- *
    883  * @brief generateKernel
    884  ** ------------------------------------------------------------------------------------------------------------- */
    885 void Kernel::generateKernel(const std::unique_ptr<KernelBuilder> & b) {
    886     if (LLVM_UNLIKELY(mIsGenerated)) return;
    887     b->setKernel(this);
    888     b->setModule(mModule);
    889     addKernelDeclarations(b);
    890     callGenerateInitializeMethod(b);
    891     callGenerateKernelMethod(b);
    892     callGenerateFinalizeMethod(b);
    893     addAdditionalFunctions(b);
    894     mIsGenerated = true;
    895 }
    896 
    897 /** ------------------------------------------------------------------------------------------------------------- *
    8981014 * @brief finalizeInstance
    8991015 ** ------------------------------------------------------------------------------------------------------------- */
    9001016Value * Kernel::finalizeInstance(const std::unique_ptr<KernelBuilder> & b) {
    901     assert (mHandle && "was not set");
    902     Value * result = b->CreateCall(getTerminateFunction(b->getModule()), { mHandle });
     1017    Value * result = nullptr;
     1018    Function * const termFunc = getTerminateFunction(b->getModule());
     1019    if (LLVM_LIKELY(isStateful())) {
     1020        result = b->CreateCall(termFunc, { mHandle });
     1021    } else {
     1022        result = b->CreateCall(termFunc);
     1023    }
    9031024    mHandle = nullptr;
    9041025    if (mOutputScalars.empty()) {
     
    9351056        case ScalarType::Local:
    9361057            return mLocalScalarPtr[index];
    937             case ScalarType::Internal:
     1058        case ScalarType::Internal:
    9381059            index += mOutputScalars.size();
    9391060        case ScalarType::Output:
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r6275 r6288  
    148148    virtual bool isCachable() const { return false; }
    149149
    150     LLVM_READNONE bool isStateful() const {
    151         if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
    152             llvm_unreachable("kernel state must be constructed prior to calling isStateful");
    153         }
    154         return !mKernelStateType->isEmptyTy();
    155     }
     150    LLVM_READNONE bool isStateful() const;
    156151
    157152    unsigned getStride() const { return mStride; }
     
    354349
    355350    // Add ExternalLinkage method declarations for the kernel to a given client module.
    356     void addKernelDeclarations(const std::unique_ptr<KernelBuilder> & b);
     351    virtual void addKernelDeclarations(const std::unique_ptr<KernelBuilder> & b);
    357352
    358353    llvm::Value * createInstance(const std::unique_ptr<KernelBuilder> & b);
     
    360355    virtual void initializeInstance(const std::unique_ptr<KernelBuilder> & b, std::vector<llvm::Value *> & args);
    361356
    362     virtual llvm::Value * finalizeInstance(const std::unique_ptr<KernelBuilder> & b);
     357    llvm::Value * finalizeInstance(const std::unique_ptr<KernelBuilder> & b);
    363358
    364359    void generateKernel(const std::unique_ptr<KernelBuilder> & b);
     
    401396
    402397    virtual void addInternalKernelProperties(const std::unique_ptr<KernelBuilder> &) { }
    403 
    404     void addInitializeDeclaration(const std::unique_ptr<KernelBuilder> & b);
    405 
    406     void addDoSegmentDeclaration(const std::unique_ptr<KernelBuilder> & b);
    407 
    408     void addFinalizeDeclaration(const std::unique_ptr<KernelBuilder> & b);
    409398
    410399    virtual void linkExternalMethods(const std::unique_ptr<KernelBuilder> &) { }
     
    466455        std::tie(port, index) = getStreamPort(name);
    467456        assert (port == Port::Input);
     457        return getProcessedInputItemsPtr(index);
     458    }
     459
     460    LLVM_READNONE llvm::Value * getProcessedInputItemsPtr(const unsigned index) const {
    468461        return mProcessedInputItemPtr[index];
    469462    }
     
    473466        std::tie(port, index) = getStreamPort(name);
    474467        assert (port == Port::Output);
     468        return getProducedOutputItemsPtr(index);
     469    }
     470
     471    LLVM_READNONE llvm::Value * getProducedOutputItemsPtr(const unsigned index) const {
    475472        return mProducedOutputItemPtr[index];
     473    }
     474
     475    LLVM_READNONE llvm::Value * getWritableOutputItems(const llvm::StringRef name) const {
     476        Port port; unsigned index;
     477        std::tie(port, index) = getStreamPort(name);
     478        assert (port == Port::Output);
     479        return getWritableOutputItems(index);
     480    }
     481
     482    LLVM_READNONE llvm::Value * getWritableOutputItems(const unsigned index) const {
     483        return mWritableOutputItems[index];
    476484    }
    477485
     
    480488        std::tie(port, index) = getStreamPort(name);
    481489        assert (port == Port::Output);
     490        return getConsumedOutputItems(index);
     491    }
     492
     493    LLVM_READNONE llvm::Value * getConsumedOutputItems(const unsigned index) const {
    482494        return mConsumedOutputItems[index];
    483495    }
     496
    484497
    485498    LLVM_READNONE llvm::Value * isFinal() const {
     
    498511    void initializeLocalScalarValues(const std::unique_ptr<KernelBuilder> & b);
    499512
     513    void addInitializeDeclaration(const std::unique_ptr<KernelBuilder> & b);
     514
    500515    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & b);
    501516
    502     void callGenerateKernelMethod(const std::unique_ptr<KernelBuilder> & b);
     517    void addDoSegmentDeclaration(const std::unique_ptr<KernelBuilder> & b);
     518
     519    std::vector<llvm::Type *> getDoSegmentFields(const std::unique_ptr<KernelBuilder> & b) const;
     520
     521    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b);
     522
     523    void setDoSegmentProperties(const std::unique_ptr<KernelBuilder> & b, const std::vector<llvm::Value *> & args);
     524
     525    std::vector<llvm::Value *> getDoSegmentProperties(const std::unique_ptr<KernelBuilder> & b) const;
     526
     527    void addFinalizeDeclaration(const std::unique_ptr<KernelBuilder> & b);
    503528
    504529    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b);
     
    545570    std::vector<llvm::Value *>      mLocalScalarPtr;
    546571
     572    std::vector<llvm::Value *>      mUpdatableProcessedInputItemPtr;
    547573    std::vector<llvm::Value *>      mProcessedInputItemPtr;
     574
    548575    std::vector<llvm::Value *>      mAccessibleInputItems;
    549576    std::vector<llvm::Value *>      mAvailableInputItems;
    550577    std::vector<llvm::Value *>      mPopCountRateArray;
    551578    std::vector<llvm::Value *>      mNegatedPopCountRateArray;
     579
     580    std::vector<llvm::Value *>      mUpdatableProducedOutputItemPtr;
    552581    std::vector<llvm::Value *>      mProducedOutputItemPtr;
     582
    553583    std::vector<llvm::Value *>      mWritableOutputItems;
    554584    std::vector<llvm::Value *>      mConsumedOutputItems;
     
    595625class MultiBlockKernel : public Kernel {
    596626    friend class BlockOrientedKernel;
     627    friend class OptimizationBranch;
    597628public:
    598629
  • icGREP/icgrep-devel/icgrep/kernels/multiblock_kernel.cpp

    r6261 r6288  
    3636 ** ------------------------------------------------------------------------------------------------------------- */
    3737void MultiBlockKernel::generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
     38    assert (mIsFinal);
     39    assert (mNumOfStrides);
    3840    Value * const numOfStrides = b->CreateSelect(mIsFinal, b->getSize(1), mNumOfStrides);
    3941    generateMultiBlockLogic(b, numOfStrides);
  • icGREP/icgrep-devel/icgrep/kernels/optimizationbranch.h

    r6273 r6288  
    88namespace kernel {
    99
    10 class OptimizationBranch final : public Kernel {
     10struct OptimizationBranchCompiler;
     11
     12class OptimizationBranch final : public MultiBlockKernel {
    1113    friend class OptimizationBranchBuilder;
    1214public:
     15
     16    static bool classof(const Kernel * const k) {
     17        switch (k->getTypeId()) {
     18            case TypeId::MultiBlock:
     19            case TypeId::OptimizationBranch:
     20                return true;
     21            default:
     22                return false;
     23        }
     24    }
    1325
    1426    const static std::string CONDITION_TAG;
     
    2840                       Bindings && scalar_outputs);
    2941
    30     void linkExternalMethods(const std::unique_ptr<KernelBuilder> & b) final;
     42    void addKernelDeclarations(const std::unique_ptr<KernelBuilder> & b) final;
    3143
    3244    void generateInitializeMethod(const std::unique_ptr<KernelBuilder> & b) final;
    3345
    34     void initializeInstance(const std::unique_ptr<KernelBuilder> & b, std::vector<llvm::Value *> & args) final;
    35 
    36     void generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) final;
     46    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) final;
    3747
    3848    void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) final;
    39 
    40     void addAdditionalFunctions(const std::unique_ptr<KernelBuilder> & b) final;
    41 
    42     llvm::Value * finalizeInstance(const std::unique_ptr<KernelBuilder> & b) final;
    43 
    44     void addInternalKernelProperties(const std::unique_ptr<kernel::KernelBuilder> & b) final;
    45 
    46     std::vector<llvm::Value *> getFinalOutputScalars(const std::unique_ptr<KernelBuilder> & b) final;
    4749
    4850private:
    4951
    5052    llvm::Value * getItemCountIncrement(const std::unique_ptr<KernelBuilder> & b, const Binding & binding,
    51                                         llvm::Value * const first, llvm::Value * const last) const;
     53                                        llvm::Value * const first, llvm::Value * const last,
     54                                        llvm::Value * const defaultValue = nullptr) const;
    5255
    5356    void callKernel(const std::unique_ptr<KernelBuilder> & b,
     
    5760private:
    5861
    59     Relationship * const         mCondition;
    60     Kernel * const               mTrueKernel;
    61     Kernel * const               mFalseKernel;
    62 
    63     std::vector<llvm::Value *>   mProcessedInputItems;
    64     std::vector<llvm::PHINode *> mAccessibleInputItemPhi;
    65 
    66     std::vector<llvm::Value *>   mProducedOutputItems;
    67     std::vector<llvm::PHINode *> mWritableOrConsumedOutputItemPhi;
     62    Relationship * const                                mCondition;
     63    Kernel * const                                      mNonZeroKernel;
     64    Kernel * const                                      mAllZeroKernel;
     65    mutable std::unique_ptr<OptimizationBranchCompiler> mCompiler;
    6866};
    6967
  • icGREP/icgrep-devel/icgrep/kernels/optimizationbranch/optimizationbranch.cpp

    r6286 r6288  
    1 #include "optimizationbranch.h"
     1#include <kernels/optimizationbranch.h>
     2#include "optimizationbranch_compiler.hpp"
    23#include <kernels/kernel_builder.h>
    3 #include <boost/scoped_ptr.hpp>
     4#include <boost/graph/adjacency_list.hpp>
     5#include <boost/container/flat_map.hpp>
     6#include <llvm/Support/raw_ostream.h>
     7
    48
    59#warning at compilation, this must verify that the I/O rates of the branch permits the rates of the branches
    610
     11#warning move most of this logic this into the optimizationbranch compiler
     12
    713using namespace llvm;
     14using namespace boost;
     15using namespace boost::container;
    816
    917namespace kernel {
     
    1119using AttrId = Attribute::KindId;
    1220
     21using ScalarDependencyGraph = adjacency_list<vecS, vecS, bidirectionalS, Value *, unsigned>;
     22using ScalarVertex = ScalarDependencyGraph::vertex_descriptor;
     23using ScalarDependencyMap = flat_map<const Relationship *, ScalarVertex>;
     24
    1325const std::string OptimizationBranch::CONDITION_TAG = "@condition";
    1426
    15 /** ------------------------------------------------------------------------------------------------------------- *
    16  * @brief linkExternalMethods
    17  ** ------------------------------------------------------------------------------------------------------------- */
    18 void OptimizationBranch::linkExternalMethods(const std::unique_ptr<KernelBuilder> & b) {
    19     mTrueKernel->linkExternalMethods(b);
    20     mFalseKernel->linkExternalMethods(b);
    21 }
    22 
    23 /** ------------------------------------------------------------------------------------------------------------- *
    24  * @brief generateInitializeMethod
    25  ** ------------------------------------------------------------------------------------------------------------- */
    26 void OptimizationBranch::generateInitializeMethod(const std::unique_ptr<KernelBuilder> & b) {
    27     mTrueKernel->generateInitializeMethod(b);
    28     mFalseKernel->generateInitializeMethod(b);
    29 }
    30 
    31 /** ------------------------------------------------------------------------------------------------------------- *
    32  * @brief initializeInstance
    33  ** ------------------------------------------------------------------------------------------------------------- */
    34 void OptimizationBranch::initializeInstance(const std::unique_ptr<KernelBuilder> & b, std::vector<llvm::Value *> & args) {
    35     mTrueKernel->initializeInstance(b, args);
    36     mFalseKernel->initializeInstance(b, args);
    37 }
     27const static std::string BRANCH_PREFIX = "@B";
    3828
    3929/** ------------------------------------------------------------------------------------------------------------- *
     
    4939
    5040/** ------------------------------------------------------------------------------------------------------------- *
     41 * @brief isParamAddressable
     42 ** ------------------------------------------------------------------------------------------------------------- */
     43inline bool isParamAddressable(const Binding & binding) {
     44    if (binding.isDeferred()) {
     45        return true;
     46    }
     47    const ProcessingRate & rate = binding.getRate();
     48    return (rate.isBounded() || rate.isUnknown());
     49}
     50
     51/** ------------------------------------------------------------------------------------------------------------- *
    5152 * @brief isLocalBuffer
    5253 ** ------------------------------------------------------------------------------------------------------------- */
     
    5657
    5758/** ------------------------------------------------------------------------------------------------------------- *
    58  * @brief generateKernelMethod
    59  ** ------------------------------------------------------------------------------------------------------------- */
    60 void OptimizationBranch::generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
     59 * @brief loadKernelHandle
     60 ** ------------------------------------------------------------------------------------------------------------- */
     61void loadHandle(const std::unique_ptr<KernelBuilder> & b, Kernel * const kernel, const std::string suffix) {
     62    if (LLVM_LIKELY(kernel->isStateful())) {
     63        Value * handle = b->getScalarField(BRANCH_PREFIX + suffix);
     64        if (kernel->hasFamilyName()) {
     65            handle = b->CreatePointerCast(handle, kernel->getKernelType()->getPointerTo());
     66        }
     67        kernel->setHandle(b, handle);
     68    }
     69}
     70
     71/** ------------------------------------------------------------------------------------------------------------- *
     72 * @brief generateMultiBlockLogic
     73 ** ------------------------------------------------------------------------------------------------------------- */
     74void OptimizationBranch::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, Value * const numOfStrides) {
    6175
    6276    BasicBlock * const loopCond = b->CreateBasicBlock("cond");
     
    6680    BasicBlock * const exit = b->CreateBasicBlock("exit");
    6781
     82
    6883    Constant * const ZERO = b->getSize(0);
    6984    Constant * const ONE = b->getSize(1);
    7085
    71     const auto numOfConditionInputs = isa<StreamSet>(mCondition) ? 1 : 0;
    72     const auto numOfInputs = getNumOfStreamInputs() - numOfConditionInputs;
    73     std::vector<llvm::Value *> initialProcessedInputItems(numOfInputs, nullptr);
    74     for (unsigned i = 0; i < numOfInputs; ++i) {
    75         if (isParamConstant(mInputStreamSets[i])) {
    76             initialProcessedInputItems[i] = b->CreateLoad(mProcessedInputItemPtr[i]);
    77         }
    78     }
    79 
    80     const auto numOfOutputs = getNumOfStreamOutputs();
    81     std::vector<llvm::Value *> initialProducedOutputItems(numOfOutputs, nullptr);
    82     for (unsigned i = 0; i < numOfOutputs; ++i) {
    83         if (isParamConstant(mOutputStreamSets[i])) {
    84             initialProducedOutputItems[i] = b->CreateLoad(mProducedOutputItemPtr[i]);
    85         }
    86     }
     86    loadHandle(b, mAllZeroKernel, "0");
     87    loadHandle(b, mNonZeroKernel, "1");
    8788
    8889    BasicBlock * const entry = b->GetInsertBlock();
     
    9798    b->SetInsertPoint(loopCond);
    9899    IntegerType * const sizeTy = b->getSizeTy();
    99     PHINode * const first = b->CreatePHI(sizeTy, 3);
     100    PHINode * const first = b->CreatePHI(sizeTy, 3, "firstStride");
    100101    first->addIncoming(ZERO, entry);
    101     PHINode * const last = b->CreatePHI(sizeTy, 3);
    102     PHINode * const state = b->CreatePHI(b->getInt1Ty(), 3);
    103     state->addIncoming(b->getFalse(), entry);
    104 
    105     mProcessedInputItems.resize(numOfInputs);
    106     mAccessibleInputItemPhi.resize(numOfInputs);
    107     for (unsigned i = 0; i < numOfInputs; ++i) {
    108         if (initialProcessedInputItems[i]) {
    109             PHINode * const inputPhi = b->CreatePHI(sizeTy, 2);
    110             inputPhi->addIncoming(initialProcessedInputItems[i], entry);
    111             mProcessedInputItems[i] = inputPhi;
    112         } else {
    113             mProcessedInputItems[i] = mProcessedInputItemPtr[i];
    114         }
    115         PHINode * const accessiblePhi = b->CreatePHI(sizeTy, 2);
    116         accessiblePhi->addIncoming(mAccessibleInputItems[i], entry);
    117         mAccessibleInputItemPhi[i] = accessiblePhi;
    118     }
    119 
    120     mProducedOutputItems.resize(numOfOutputs);
    121     mWritableOrConsumedOutputItemPhi.resize(numOfOutputs);
    122     for (unsigned i = 0; i < numOfOutputs; ++i) {
    123         if (initialProducedOutputItems[i]) {
    124             PHINode * const outputPhi = b->CreatePHI(sizeTy, 2);
    125             outputPhi->addIncoming(initialProducedOutputItems[i], entry);
    126             mProducedOutputItems[i] = outputPhi;
    127         } else {
    128             mProducedOutputItems[i] = mProducedOutputItemPtr[i];
    129         }
    130         PHINode * const writablePhi = b->CreatePHI(sizeTy, 2);
    131         if (isLocalBuffer(mOutputStreamSets[i])) {
    132             writablePhi->addIncoming(mConsumedOutputItems[i], entry);
    133         } else {
    134             writablePhi->addIncoming(mWritableOutputItems[i], entry);
    135         }
    136         mWritableOrConsumedOutputItemPhi[i] = writablePhi;
    137     }
     102    PHINode * const last = b->CreatePHI(sizeTy, 3, "lastStride");
     103    PHINode * const currentState = b->CreatePHI(b->getInt1Ty(), 3);
     104    currentState->addIncoming(UndefValue::get(b->getInt1Ty()), entry);
     105    Value * finished = nullptr;
    138106
    139107    if (LLVM_LIKELY(isa<StreamSet>(mCondition))) {
     
    145113        BasicBlock * const processStrides = b->CreateBasicBlock("processStrides", nonZeroPath);
    146114
    147         Constant * const strideCount = b->getSize(getStride() / b->getBitBlockWidth());
    148 
    149         Value * const streamCount = b->getInputStreamSetCount(CONDITION_TAG);
    150         Value * const blocksPerStride = b->CreateMul(streamCount, strideCount);
    151 
    152         Value * const offset = b->CreateMul(last, strideCount);
     115        Constant * const blocksPerStride = b->getSize(getStride() / b->getBitBlockWidth());
     116        Value * const numOfConditionStreams = b->getInputStreamSetCount(CONDITION_TAG);
     117        Value * const numOfConditionBlocks = b->CreateMul(numOfConditionStreams, blocksPerStride);
     118
     119        Value * const offset = b->CreateMul(last, blocksPerStride);
    153120        Value * basePtr = b->getInputStreamBlockPtr(CONDITION_TAG, ZERO, offset);
    154121        Type * const BitBlockTy = b->getBitBlockType();
     
    166133        merged->addIncoming(value, summarizeOneStride);
    167134        Value * const nextIteration = b->CreateAdd(iteration, ONE);
    168         Value * const more = b->CreateICmpNE(nextIteration, blocksPerStride);
     135        Value * const more = b->CreateICmpNE(nextIteration, numOfConditionBlocks);
     136        iteration->addIncoming(nextIteration, b->GetInsertBlock());
    169137        b->CreateCondBr(more, summarizeOneStride, checkStride);
    170138
     
    174142        // ignore the first stride unless it is also our last.
    175143        b->SetInsertPoint(checkStride);
    176         Value * const nextState = b->bitblock_any(merged);
    177         Value * const sameState = b->CreateICmpEQ(nextState, state);
     144        Value * const nextState = b->bitblock_any(value);
     145        Value * const sameState = b->CreateICmpEQ(nextState, currentState);
    178146        Value * const firstStride = b->CreateICmpEQ(last, ZERO);
    179147        Value * const continuation = b->CreateOr(sameState, firstStride);
    180148        Value * const nextIndex = b->CreateAdd(last, ONE);
    181         Value * const notLastStride = b->CreateICmpNE(nextIndex, mNumOfStrides);
     149        Value * const notLastStride = b->CreateICmpNE(nextIndex, numOfStrides);
    182150        Value * const checkNextStride = b->CreateAnd(continuation, notLastStride);
    183151        last->addIncoming(nextIndex, checkStride);
    184152        first->addIncoming(first, checkStride);
    185         state->addIncoming(nextState, checkStride);
    186         for (unsigned i = 0; i < numOfInputs; ++i) {
    187             if (initialProcessedInputItems[i]) {
    188                 PHINode * const inputPhi = cast<PHINode>(mProcessedInputItems[i]);
    189                 inputPhi->addIncoming(inputPhi, checkStride);
    190             }
    191             PHINode * const accessiblePhi = mAccessibleInputItemPhi[i];
    192             accessiblePhi->addIncoming(accessiblePhi, checkStride);
    193         }
    194 
    195         for (unsigned i = 0; i < numOfOutputs; ++i) {
    196             if (initialProducedOutputItems[i]) {
    197                 PHINode * const outputPhi = cast<PHINode>(mProducedOutputItems[i]);
    198                 outputPhi->addIncoming(outputPhi, checkStride);
    199             }
    200             PHINode * const writablePhi = mWritableOrConsumedOutputItemPhi[i];
    201             writablePhi->addIncoming(writablePhi, checkStride);
    202         }
    203 
     153        currentState->addIncoming(nextState, checkStride);
    204154        b->CreateLikelyCondBr(checkNextStride, loopCond, processStrides);
    205155
     
    207157        b->SetInsertPoint(processStrides);
    208158        // state is implicitly "indeterminate" during our first stride
    209         Value * const selectedPath = b->CreateSelect(firstStride, nextState, state);
     159        Value * const selectedPath = b->CreateSelect(firstStride, nextState, currentState);
     160        finished = b->CreateNot(notLastStride);
    210161        b->CreateCondBr(selectedPath, nonZeroPath, allZeroPath);
    211162
     163        first->addIncoming(last, mergePaths);
     164        last->addIncoming(nextIndex, mergePaths);
     165        currentState->addIncoming(nextState, mergePaths);
    212166    } else {
    213         last->addIncoming(mNumOfStrides, entry);
    214 
    215167        Value * const cond = b->getScalarField(CONDITION_TAG);
    216168        b->CreateCondBr(b->CreateIsNotNull(cond), nonZeroPath, allZeroPath);
     169
     170        last->addIncoming(numOfStrides, entry);
     171        last->addIncoming(numOfStrides, mergePaths);
     172        first->addIncoming(ZERO, mergePaths);
     173        currentState->addIncoming(b->getFalse(), mergePaths);
     174        finished = b->getTrue();
    217175    }
    218176
    219177    // make the actual calls and take any potential termination signal
    220178    b->SetInsertPoint(nonZeroPath);
    221     callKernel(b, mTrueKernel, first, last, terminatedPhi);
     179    callKernel(b, mNonZeroKernel, first, last, terminatedPhi);
    222180    b->CreateBr(mergePaths);
    223181
    224182    b->SetInsertPoint(allZeroPath);
    225     callKernel(b, mFalseKernel, first, last, terminatedPhi);
     183    callKernel(b, mAllZeroKernel, first, last, terminatedPhi);
    226184    b->CreateBr(mergePaths);
    227185
    228186    b->SetInsertPoint(mergePaths);
    229     last->addIncoming(last, mergePaths);
    230     first->addIncoming(last, mergePaths);
    231     state->addIncoming(b->getFalse(), mergePaths);
    232     for (unsigned i = 0; i < numOfInputs; ++i) {
    233         const Binding & input = mInputStreamSets[i];
    234         Value * updatedInputCount = nullptr;
    235         if (isParamConstant(input)) {
    236             Value * const itemCount = getItemCountIncrement(b, input, first, last);
    237             PHINode * const inputPhi = cast<PHINode>(mProcessedInputItems[i]);
    238             updatedInputCount = b->CreateAdd(inputPhi, itemCount);
    239             inputPhi->addIncoming(updatedInputCount, mergePaths);
    240         }
    241         PHINode * const accessiblePhi = mAccessibleInputItemPhi[i];
    242         if (updatedInputCount == nullptr) {
    243             updatedInputCount = b->CreateLoad(mProducedOutputItems[i]);
    244         }
    245         Value * const remaining = b->CreateSub(accessiblePhi, updatedInputCount);
    246         accessiblePhi->addIncoming(remaining, mergePaths);
    247     }
    248 
    249     for (unsigned i = 0; i < numOfOutputs; ++i) {
    250         const Binding & output = mOutputStreamSets[i];
    251         Value * updatedOutputCount = nullptr;
    252         if (isParamConstant(output)) {
    253             Value * const itemCount = getItemCountIncrement(b, output, first, last);
    254             PHINode * const outputPhi = cast<PHINode>(mProducedOutputItems[i]);
    255             updatedOutputCount = b->CreateAdd(outputPhi, itemCount);
    256             outputPhi->addIncoming(updatedOutputCount, mergePaths);
    257         }
    258         PHINode * const writablePhi = mWritableOrConsumedOutputItemPhi[i];
    259         if (isLocalBuffer(output)) {
    260             writablePhi->addIncoming(writablePhi, mergePaths);
    261         } else {
    262             if (updatedOutputCount == nullptr) {
    263                 updatedOutputCount = b->CreateLoad(mProducedOutputItems[i]);
    264             }
    265             Value * const remaining = b->CreateSub(writablePhi, updatedOutputCount);
    266             writablePhi->addIncoming(remaining, mergePaths);
    267         }
    268     }
    269 
    270     Value * const lastStride = b->CreateICmpNE(last, mNumOfStrides);
    271     Value * const finished = b->CreateOr(lastStride, terminatedPhi);
     187    // Value * finished = b->CreateICmpEQ(last, numOfStrides);
     188    if (terminatedPhi) {
     189        finished = b->CreateOr(finished, terminatedPhi);
     190    }
    272191    b->CreateLikelyCondBr(finished, exit, loopCond);
    273192
    274193    b->SetInsertPoint(exit);
     194
     195    b->CallPrintInt("branch_exit --------------------", numOfStrides);
    275196
    276197}
     
    284205                                    PHINode * const terminatedPhi) {
    285206
     207    Function * const doSegment = kernel->getDoSegmentFunction(b->getModule());
     208
     209    BasicBlock * incrementItemCounts = nullptr;
     210    BasicBlock * kernelExit = nullptr;
     211    if (kernel->canSetTerminateSignal()) {
     212        incrementItemCounts = b->CreateBasicBlock("incrementItemCounts");
     213        kernelExit = b->CreateBasicBlock("kernelExit");
     214    }
     215
     216
     217    b->CallPrintInt("branch_first -------------------", first);
     218    b->CallPrintInt("branch_last --------------------", last);
     219
    286220    std::vector<Value *> args;
    287     args.reserve(mCurrentMethod->arg_size());
    288     args.push_back(kernel->getHandle()); // handle
    289     args.push_back(b->CreateSub(last, first)); // numOfStrides
     221    args.reserve(doSegment->arg_size());
     222    if (kernel->isStateful()) {
     223        args.push_back(kernel->getHandle()); // handle
     224    }
     225    Value * const numOfStrides = b->CreateSub(last, first);
     226    args.push_back(numOfStrides); // numOfStrides
    290227    const auto numOfInputs = kernel->getNumOfStreamInputs();
     228
    291229    for (unsigned i = 0; i < numOfInputs; i++) {
    292230
     
    296234        args.push_back(buffer->getBaseAddress(b.get()));
    297235        // processed input items
    298         args.push_back(mProcessedInputItems[i]);
     236        Value * processed = mProcessedInputItemPtr[i];
     237        if (isParamConstant(input)) {
     238            processed = b->CreateLoad(processed);
     239        }
     240        args.push_back(processed);
    299241        // accessible input items (after non-deferred processed item count)
    300         args.push_back(mAccessibleInputItemPhi[i]);
     242        args.push_back(getItemCountIncrement(b, input, first, last, mAccessibleInputItems[i]));
    301243        // TODO: What if one of the branches requires this but the other doesn't?
    302244        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
     
    311253    for (unsigned i = 0; i < numOfOutputs; ++i) {
    312254        const Binding & output = kernel->getOutputStreamSetBinding(i);
    313         if (!isLocalBuffer(output)) {
    314             const auto & buffer = mStreamSetOutputBuffers[i];
    315             args.push_back(buffer->getBaseAddress(b.get()));
    316         }
    317         args.push_back(mProducedOutputItems[i]);
    318         args.push_back(mWritableOrConsumedOutputItemPhi[i]);
    319     }
    320 
    321     Value * terminated = b->CreateCall(kernel->getDoSegmentFunction(b->getModule()), args);
    322     if (terminatedPhi) {
    323         if (!kernel->canSetTerminateSignal()) {
    324             terminated = b->getFalse();
    325         }
     255        const auto & buffer = mStreamSetOutputBuffers[i];
     256        args.push_back(buffer->getBaseAddress(b.get()));
     257        // produced
     258        Value * produced = mProducedOutputItemPtr[i];
     259        if (isParamConstant(output)) {
     260            produced = b->CreateLoad(produced);
     261        }
     262        args.push_back(produced);
     263        args.push_back(getItemCountIncrement(b, output, first, last, mWritableOutputItems[i]));
     264    }
     265
     266
     267
     268    Value * const terminated = b->CreateCall(doSegment, args);
     269    if (incrementItemCounts) {
     270        b->CreateUnlikelyCondBr(terminated, kernelExit, incrementItemCounts);
     271
     272        b->SetInsertPoint(incrementItemCounts);
     273    }
     274
     275    for (unsigned i = 0; i < numOfInputs; ++i) {
     276        const Binding & input = mInputStreamSets[i];
     277        if (isParamConstant(input)) {
     278            Value * const processed = b->CreateLoad(mProcessedInputItemPtr[i]);
     279            Value * const itemCount = getItemCountIncrement(b, input, first, last);
     280            Value * const updatedInputCount = b->CreateAdd(processed, itemCount);
     281            b->CreateStore(updatedInputCount, mProcessedInputItemPtr[i]);
     282        }
     283    }
     284
     285    for (unsigned i = 0; i < numOfOutputs; ++i) {
     286        const Binding & output = mOutputStreamSets[i];
     287        if (isParamConstant(output)) {
     288            Value * const produced = b->CreateLoad(mProducedOutputItemPtr[i]);
     289            Value * const itemCount = getItemCountIncrement(b, output, first, last);
     290            Value * const updatedOutputCount = b->CreateAdd(produced, itemCount);
     291            b->CreateStore(updatedOutputCount, mProducedOutputItemPtr[i]);
     292        }
     293    }
     294
     295    if (incrementItemCounts) {
    326296        terminatedPhi->addIncoming(terminated, b->GetInsertBlock());
    327     }
     297        b->CreateBr(kernelExit);
     298        b->SetInsertPoint(kernelExit);
     299    }
     300
     301    b->CallPrintInt("branch_exec --------------", numOfStrides);
    328302
    329303}
     
    333307 ** ------------------------------------------------------------------------------------------------------------- */
    334308Value * OptimizationBranch::getItemCountIncrement(const std::unique_ptr<KernelBuilder> & b, const Binding & binding,
    335                                                   Value * const first, Value * const last) const {
    336 
     309                                                  Value * const first, Value * const last, Value * const defaultValue) const {
    337310    const ProcessingRate & rate = binding.getRate();
    338     if (rate.isFixed()) {
     311    if (rate.isFixed() || rate.isBounded()) {
    339312        Constant * const strideLength = b->getSize(ceiling(getUpperBound(binding) * getStride()));
    340313        Value * const numOfStrides = b->CreateSub(last, first);
    341314        return b->CreateMul(numOfStrides, strideLength);
    342     } else { assert (rate.isPopCount() || rate.isNegatedPopCount());
     315    } else if (rate.isPopCount() || rate.isNegatedPopCount()) {
    343316        Port refPort;
    344317        unsigned refIndex = 0;
     
    358331        return b->CreateSub(currentSum, priorSum);
    359332    }
    360 
     333    return defaultValue;
     334}
     335
     336// TODO: abstract this. it's a near copy of the pipeline kernel logic
     337
     338void enumerateScalarProducerBindings(const std::unique_ptr<KernelBuilder> & b,
     339                                     const ScalarVertex producer,
     340                                     const Bindings & bindings,
     341                                     ScalarDependencyGraph & G,
     342                                     ScalarDependencyMap & M) {
     343    const auto n = bindings.size();
     344    for (unsigned i = 0; i < n; ++i) {
     345        const Binding & binding = bindings[i];
     346        const Relationship * const rel = binding.getRelationship();
     347        assert (M.count(rel) == 0);
     348        Value * const value = b->getScalarField(binding.getName());
     349        const auto buffer = add_vertex(value, G);
     350        add_edge(producer, buffer, i, G);
     351        M.emplace(rel, buffer);
     352    }
     353}
     354
     355ScalarVertex makeIfConstant(const Binding & binding,
     356                            ScalarDependencyGraph & G,
     357                            ScalarDependencyMap & M) {
     358    const Relationship * const rel = binding.getRelationship();
     359    const auto f = M.find(rel);
     360    if (LLVM_LIKELY(f != M.end())) {
     361        return f->second;
     362    } else if (LLVM_LIKELY(isa<ScalarConstant>(rel))) {
     363        const auto bufferVertex = add_vertex(cast<ScalarConstant>(rel)->value(), G);
     364        M.emplace(rel, bufferVertex);
     365        return bufferVertex;
     366    } else {
     367        report_fatal_error("unknown scalar value");
     368    }
     369}
     370
     371void enumerateScalarConsumerBindings(const ScalarVertex consumer,
     372                                     const Bindings & bindings,
     373                                     ScalarDependencyGraph & G,
     374                                     ScalarDependencyMap & M) {
     375    const auto n = bindings.size();
     376    for (unsigned i = 0; i < n; ++i) {
     377        const auto buffer = makeIfConstant(bindings[i], G, M);
     378        assert (buffer < num_vertices(G));
     379        add_edge(buffer, consumer, i, G);
     380    }
     381}
     382
     383/** ------------------------------------------------------------------------------------------------------------- *
     384 * @brief initKernel
     385 ** ------------------------------------------------------------------------------------------------------------- */
     386Value * initKernel(const std::unique_ptr<KernelBuilder> & b,
     387                   const unsigned index,
     388                   Kernel * const kernel,
     389                   Function * const initializer,
     390                   const ScalarDependencyGraph & G) {
     391    std::vector<Value *> args;
     392    const auto hasHandle = kernel->isStateful() ? 1U : 0U;
     393    args.resize(hasHandle + in_degree(index, G));
     394    if (LLVM_LIKELY(hasHandle)) {
     395        Value * handle = kernel->createInstance(b);
     396        if (LLVM_UNLIKELY(kernel->hasFamilyName())) {
     397            handle = b->CreatePointerCast(handle, b->getVoidPtrTy());
     398        }
     399        b->setScalarField(BRANCH_PREFIX + std::to_string(index - 1), handle);
     400        args[0] = handle;
     401    }
     402    for (const auto e : make_iterator_range(in_edges(index, G))) {
     403        const auto j = hasHandle + G[e];
     404        const auto scalar = source(e, G);
     405        args[j] = G[scalar];
     406    }
     407    return b->CreateCall(initializer, args);
     408}
     409
     410/** ------------------------------------------------------------------------------------------------------------- *
     411 * @brief generateInitializeMethod
     412 ** ------------------------------------------------------------------------------------------------------------- */
     413void OptimizationBranch::generateInitializeMethod(const std::unique_ptr<KernelBuilder> & b) {
     414
     415    ScalarDependencyGraph G(3);
     416    ScalarDependencyMap M;
     417
     418    enumerateScalarProducerBindings(b, 0, getInputScalarBindings(), G, M);
     419    enumerateScalarConsumerBindings(1, mAllZeroKernel->getInputScalarBindings(), G, M);
     420    enumerateScalarConsumerBindings(2, mNonZeroKernel->getInputScalarBindings(), G, M);
     421
     422    Module * const m = b->getModule();
     423    Value * const term2 = initKernel(b, 1, mAllZeroKernel, mAllZeroKernel->getInitFunction(m), G);
     424    Value * const term1 = initKernel(b, 2, mNonZeroKernel, mNonZeroKernel->getInitFunction(m), G);
     425    b->CreateStore(b->CreateOr(term1, term2), mTerminationSignalPtr);
    361426}
    362427
     
    364429 * @brief generateFinalizeMethod
    365430 ** ------------------------------------------------------------------------------------------------------------- */
     431inline Value * callTerminate(const std::unique_ptr<KernelBuilder> & b, Kernel * kernel, const std::string suffix) {
     432    loadHandle(b, kernel, suffix);
     433    return kernel->finalizeInstance(b);
     434}
     435
     436/** ------------------------------------------------------------------------------------------------------------- *
     437 * @brief generateFinalizeMethod
     438 ** ------------------------------------------------------------------------------------------------------------- */
    366439void OptimizationBranch::generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) {
    367     mTrueKernel->generateFinalizeMethod(b);
    368     mFalseKernel->generateFinalizeMethod(b);
    369 }
    370 
    371 /** ------------------------------------------------------------------------------------------------------------- *
    372  * @brief addAdditionalFunctions
    373  ** ------------------------------------------------------------------------------------------------------------- */
    374 void OptimizationBranch::addAdditionalFunctions(const std::unique_ptr<KernelBuilder> & b) {
    375     mTrueKernel->addAdditionalFunctions(b);
    376     mFalseKernel->addAdditionalFunctions(b);
    377 }
    378 
    379 /** ------------------------------------------------------------------------------------------------------------- *
    380  * @brief finalizeInstance
    381  ** ------------------------------------------------------------------------------------------------------------- */
    382 Value * OptimizationBranch::finalizeInstance(const std::unique_ptr<KernelBuilder> & b) {
    383 
    384     // TODO: to have a returnable result here, we need to store the
    385     // scalars in this kernel or the pipeline.
    386 
    387 //    Value * trueResult = mTrueKernel->finalizeInstance(b);
    388 //    Value * falseResult = mFalseKernel->finalizeInstance(b);
    389     return nullptr;
    390 }
    391 
    392 void OptimizationBranch::addInternalKernelProperties(const std::unique_ptr<kernel::KernelBuilder> & b) {
    393     mTrueKernel->addInternalKernelProperties(b);
    394     mFalseKernel->addInternalKernelProperties(b);
    395 }
    396 
    397 std::vector<Value *> OptimizationBranch::getFinalOutputScalars(const std::unique_ptr<KernelBuilder> & b) {
    398     return std::vector<Value *>{};
     440    Value * allZeroResult = callTerminate(b, mAllZeroKernel, "0");
     441    Value * nonZeroResult = callTerminate(b, mNonZeroKernel, "1");
     442    if (LLVM_UNLIKELY(nonZeroResult || allZeroResult)) {
     443        report_fatal_error("OptimizationBranch does not support output scalars yet");
     444    }
     445
     446}
     447
     448/** ------------------------------------------------------------------------------------------------------------- *
     449 * @brief addKernelDeclarations
     450 ** ------------------------------------------------------------------------------------------------------------- */
     451void OptimizationBranch::addKernelDeclarations(const std::unique_ptr<KernelBuilder> & b) {
     452    mNonZeroKernel->addKernelDeclarations(b);
     453    mAllZeroKernel->addKernelDeclarations(b);
     454    Kernel::addKernelDeclarations(b);
     455}
     456
     457void addHandle(const std::unique_ptr<KernelBuilder> & b, const Kernel * const kernel, Bindings & scalars, const std::string suffix) {
     458    if (LLVM_LIKELY(kernel->isStateful())) {
     459        Type * handleType = nullptr;
     460        if (LLVM_UNLIKELY(kernel->hasFamilyName())) {
     461            handleType = b->getVoidPtrTy();
     462        } else {
     463            handleType = kernel->getKernelType()->getPointerTo();
     464        }
     465        scalars.emplace_back(handleType, BRANCH_PREFIX + suffix);
     466    }
    399467}
    400468
     
    408476    Bindings && scalar_inputs,
    409477    Bindings && scalar_outputs)
    410 : Kernel(b, TypeId::OptimizationBranch, std::move(signature),
    411          std::move(stream_inputs), std::move(stream_outputs),
    412          std::move(scalar_inputs), std::move(scalar_outputs), {})
     478: MultiBlockKernel(b, TypeId::OptimizationBranch, std::move(signature),
     479                   std::move(stream_inputs), std::move(stream_outputs),
     480                   std::move(scalar_inputs), std::move(scalar_outputs),
     481                   // internal scalar
     482                   {Binding{b->getInt8Ty(), "priorState"}})
    413483, mCondition(condition.get())
    414 , mTrueKernel(nonZeroKernel.get())
    415 , mFalseKernel(allZeroKernel.get()) {
    416 
     484, mNonZeroKernel(nonZeroKernel.get())
     485, mAllZeroKernel(allZeroKernel.get()) {
     486    addHandle(b, mAllZeroKernel, mInternalScalars, "0");
     487    addHandle(b, mNonZeroKernel, mInternalScalars, "1");
    417488}
    418489
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/buffer_management_logic.hpp

    r6261 r6288  
    33
    44#include "pipeline_compiler.hpp"
     5#include <boost/algorithm/string/replace.hpp>
    56
    67// TODO: any buffers that exist only to satisfy the output dependencies are unnecessary.
     
    4849} // end of anonymous namespace
    4950
    50 inline const Binding & PipelineCompiler::getOutputBinding(const Kernel * const producer, const unsigned index) const {
    51     if (producer == mPipelineKernel) {
    52         return mPipelineKernel->getInputStreamSetBinding(index);
    53     } else {
    54         return producer->getOutputStreamSetBinding(index);
    55     }
    56 }
    57 
    58 inline const Binding & PipelineCompiler::getInputBinding(const Kernel * const consumer, const unsigned index) const {
    59     if (consumer == mPipelineKernel) {
    60         return mPipelineKernel->getOutputStreamSetBinding(index);
    61     } else {
    62         return consumer->getInputStreamSetBinding(index);
    63     }
    64 }
    65 
    66 BufferRateData PipelineCompiler::getBufferRateData(const Kernel * const kernel, const Binding & binding, const unsigned port) const {
     51BufferRateData PipelineCompiler::getBufferRateData(const StreamPort port, const Kernel * const kernel, const Binding & binding) const {
    6752    const auto ub = upperBound(kernel, binding);
    6853    const auto lb = isConsistentRate(kernel, binding) ? ub : lowerBound(kernel, binding);
     
    7055}
    7156
    72 void PipelineCompiler::enumerateBufferProducerBindings(const unsigned producer, const Bindings & bindings, BufferGraph & G, BufferMap & M) {
     57void PipelineCompiler::enumerateBufferProducerBindings(const Port type, const unsigned producer, const Bindings & bindings, BufferGraph & G, BufferMap & M) const {
    7358    const auto n = bindings.size();
    7459    const Kernel * const kernel = mPipeline[producer];
     
    7762        assert (M.count(rel) == 0);
    7863        const auto buffer = add_vertex(G);
    79         add_edge(producer, buffer, getBufferRateData(kernel, bindings[i], i), G); // producer -> buffer ordering
     64        add_edge(producer, buffer, getBufferRateData(StreamPort{type, i}, kernel, bindings[i]), G); // producer -> buffer ordering
    8065        M.emplace(rel, buffer);
    8166    }
    8267}
    8368
    84 void PipelineCompiler::enumerateBufferConsumerBindings(const unsigned consumer, const Bindings & bindings, BufferGraph & G, BufferMap & M) {
     69void PipelineCompiler::enumerateBufferConsumerBindings(const Port type, const unsigned consumer, const Bindings & bindings, BufferGraph & G, BufferMap & M) const {
    8570    const auto n = bindings.size();
    8671    const Kernel * const kernel = mPipeline[consumer];
     
    8974        const auto f = M.find(rel); assert (f != M.end());
    9075        const auto buffer = f->second;
    91         add_edge(buffer, consumer, getBufferRateData(kernel, bindings[i], i), G); // buffer -> consumer ordering
     76        add_edge(buffer, consumer, getBufferRateData(StreamPort{type, i}, kernel, bindings[i]), G); // buffer -> consumer ordering
    9277    }
    9378}
     
    10287BufferGraph PipelineCompiler::makeBufferGraph(BuilderRef b) {
    10388
    104     const auto pipelineInput = 0;
    105     const auto pipelineOutput = mLastKernel;
    106     const auto firstBuffer = mLastKernel + 1;
     89    const auto firstBuffer = mPipelineOutput + 1;
    10790
    10891    BufferGraph G(mLastKernel + 1);
     
    11093
    11194    // make an edge from each producing kernel to a buffer vertex
    112     enumerateBufferProducerBindings(pipelineInput, mPipelineKernel->getInputStreamSetBindings(), G, M);
     95    enumerateBufferProducerBindings(Port::Input, mPipelineInput, mPipelineKernel->getInputStreamSetBindings(), G, M);
    11396    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    114         enumerateBufferProducerBindings(i, mPipeline[i]->getOutputStreamSetBindings(), G, M);
     97        enumerateBufferProducerBindings(Port::Output, i, mPipeline[i]->getOutputStreamSetBindings(), G, M);
    11598    }
    11699    // make an edge from each buffer to its consuming kernel(s)
    117100    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    118         enumerateBufferConsumerBindings(i, mPipeline[i]->getInputStreamSetBindings(), G, M);
     101        enumerateBufferConsumerBindings(Port::Input, i, mPipeline[i]->getInputStreamSetBindings(), G, M);
    119102    }
    120103    // make an edge from a buffer vertex to each pipeline output
    121     enumerateBufferConsumerBindings(pipelineOutput, mPipelineKernel->getOutputStreamSetBindings(), G, M);
     104    enumerateBufferConsumerBindings(Port::Output, mPipelineOutput, mPipelineKernel->getOutputStreamSetBindings(), G, M);
    122105
    123106    const auto lastBuffer = num_vertices(G);
     
    164147
    165148    // fill in any known pipeline I/O buffers
    166     for (const auto e : make_iterator_range(out_edges(pipelineInput, G))) {
     149    for (const auto e : make_iterator_range(out_edges(mPipelineInput, G))) {
    167150        const auto bufferVertex = target(e, G);
    168151        BufferNode & bn = G[bufferVertex];
    169152        assert (bn.Buffer == nullptr);
    170         const auto inputPort = G[e].Port;
    171         bn.Buffer = mPipelineKernel->getInputStreamSetBuffer(inputPort);
     153        bn.Buffer = mPipelineKernel->getInputStreamSetBuffer(G[e].inputPort());
    172154        bn.Type = BufferType::External;
    173155    }
    174156
    175     for (const auto e : make_iterator_range(in_edges(pipelineOutput, G))) {
     157    for (const auto e : make_iterator_range(in_edges(mPipelineOutput, G))) {
    176158        const auto bufferVertex = source(e, G);
    177159        BufferNode & bn = G[bufferVertex];
    178160        assert (bn.Buffer == nullptr);
    179         const auto outputPort = G[e].Port;
    180         bn.Buffer = mPipelineKernel->getOutputStreamSetBuffer(outputPort);
     161        bn.Buffer = mPipelineKernel->getOutputStreamSetBuffer(G[e].outputPort());
    181162        bn.Type = BufferType::External;
    182163    }
     
    198179        const Kernel * const producer = mPipeline[producerVertex];
    199180        const BufferRateData & producerRate = G[pe];
    200         const Binding & output = getOutputBinding(producer, producerRate.Port);
     181        const Binding & output = getBinding(producer, producerRate.Port);
    201182
    202183        StreamSetBuffer * buffer = nullptr;
     
    224205                const BufferNode & consumerNode = G[c];
    225206                const Kernel * const consumer = mPipeline[c]; assert (consumer);
    226                 const Binding & input = getInputBinding(consumer, consumerRate.Port);
     207                const Binding & input = getBinding(consumer, consumerRate.Port);
    227208                facsimileSpace = std::max(facsimileSpace, getOutputOverflowSize(consumer, input, consumerRate));
    228209                // Could the consumption rate be less than the production rate?
     
    257238        bn.Buffer = buffer;
    258239        bn.Type = bufferType;
    259 
    260         mOwnedBuffers.emplace_back(buffer);
    261     }
    262 
    263 //    printBufferGraph(G, errs());
     240    }
     241
     242    // printBufferGraph(G, errs());
    264243
    265244    return G;
     
    272251void PipelineCompiler::printBufferGraph(const BufferGraph & G, raw_ostream & out) {
    273252
    274     const auto pipelineInput = 0;
    275     const auto pipelineOutput = mLastKernel;
    276     const auto firstBuffer = mLastKernel + 1;
    277     const auto lastBuffer = num_vertices(G);
    278 
    279     out << "digraph G {\n";
    280 
    281     out << "v" << pipelineInput << " [label=\"P_{in}\" shape=box];\n";
     253
     254    out << "digraph G {\n"
     255           "v" << mPipelineInput << " [label=\"P_{in}\" shape=box];\n";
    282256
    283257    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    284258        const Kernel * const kernel = mPipeline[i]; assert(kernel);
    285         out << "v" << i << " [label=\"" << i << ": K_{" << i << "}  " <<  kernel->getName()  << "\" shape=box];\n";
    286     }
    287 
    288     out << "v" << pipelineOutput << " [label=\"P_{out}\" shape=box];\n";
     259        std::string name = kernel->getName();
     260        boost::replace_all(name, "\"", "\\\"");
     261
     262        out << "v" << i << " [label=\"[" << i << "] " << name << "\" shape=box];\n";
     263    }
     264
     265    out << "v" << mPipelineOutput << " [label=\"P_{out}\" shape=box];\n";
     266
     267    const auto firstBuffer = mPipelineOutput + 1;
     268    const auto lastBuffer = num_vertices(G);
    289269
    290270    for (unsigned i = firstBuffer; i != lastBuffer; ++i) {
     
    312292        const BufferRateData & pd = G[e];
    313293
    314         out << " [label=\"";
     294        out << " [label=\"(" << pd.Port.second << ") ";
    315295        if (pd.Minimum.denominator() > 1 || pd.Maximum.denominator() > 1) {
    316296            out << pd.Minimum.numerator() << "/" << pd.Minimum.denominator()
     
    320300            out << pd.Minimum.numerator() << " - " << pd.Maximum.numerator();
    321301        }
    322         out << '\n';
    323 
    324         if (s < t) { // producer edge
    325             const Kernel * const k = mPipeline[s];
    326             out << k->getName() << "." << getOutputBinding(k, pd.Port).getName();
    327         } else { // consumer edge
    328             const Kernel * const k = mPipeline[t];
    329             out << k->getName() << "." << getInputBinding(k, pd.Port).getName();
    330         }
    331 
    332         out << "\"];\n";
     302
     303        std::string name = getBinding(mPipeline[s < t ? s : t], pd.Port).getName();
     304        boost::replace_all(name, "\"", "\\\"");
     305        out << '\n' << name << "\"];\n";
    333306    }
    334307
     
    342315inline void PipelineCompiler::addBufferHandlesToPipelineKernel(BuilderRef b, const unsigned index) {
    343316    const Kernel * const kernel = mPipeline[index];
    344     if (!kernel->hasFamilyName()) {
    345         PointerType * kernelPtrTy = kernel->getKernelType()->getPointerTo(0);
    346         mPipelineKernel->addInternalScalar(kernelPtrTy, makeKernelName(index));
    347     }
    348317    for (const auto e : make_iterator_range(out_edges(index, mBufferGraph))) {
    349318        const auto bufferVertex = target(e, mBufferGraph);
    350319        const BufferNode & bn = mBufferGraph[bufferVertex];
    351320        if (LLVM_LIKELY(bn.Type != BufferType::Managed)) {
    352             const auto outputPort = mBufferGraph[e].Port;
     321            const auto outputPort = mBufferGraph[e].outputPort();
    353322            const Binding & output = kernel->getOutputStreamSetBinding(outputPort);
    354323            const auto prefix = makeBufferName(index, output);
     
    374343            const auto p = source(pe, mBufferGraph);
    375344            const Kernel * const producer = mPipeline[p];
    376             const Binding & output = producer->getOutputStreamSetBinding(mBufferGraph[pe].Port);
     345            const auto outputPort = mBufferGraph[pe].outputPort();
     346            const Binding & output = producer->getOutputStreamSetBinding(outputPort);
    377347            const auto name = makeBufferName(p, output);
    378348            Value * const handle = b->getScalarFieldPtr(name);
     
    393363    for (const auto pe : make_iterator_range(out_edges(mKernelIndex, mBufferGraph))) {
    394364        const auto bufferVertex = target(pe, mBufferGraph);
    395         const auto outputPort = mBufferGraph[pe].Port;
     365        const auto outputPort = mBufferGraph[pe].outputPort();
    396366        const Binding & output = mKernel->getOutputStreamSetBinding(outputPort);
    397367        const BufferNode & bn = mBufferGraph[bufferVertex];
     
    475445    for (const auto e : make_iterator_range(out_edges(mKernelIndex, mBufferGraph))) {
    476446        const auto bufferVertex = target(e, mBufferGraph);
    477         const auto outputPort = mBufferGraph[e].Port;
     447        const auto outputPort = mBufferGraph[e].outputPort();
    478448        Value * fullyProduced = mFullyProducedItemCount[outputPort];
    479         BufferNode & bn = mBufferGraph[bufferVertex];
    480         assert (bn.TotalItems == nullptr);
    481         bn.TotalItems = fullyProduced;
    482         initializeConsumedItemCount(b, bufferVertex, fullyProduced);
     449        mTotalItems[getBufferIndex(bufferVertex)] = fullyProduced;
     450        initializeConsumedItemCount(bufferVertex, fullyProduced);
    483451        initializePopCountReferenceItemCount(b, bufferVertex, fullyProduced);
    484452        #ifdef PRINT_DEBUG_MESSAGES
     
    679647}
    680648
     649
     650/** ------------------------------------------------------------------------------------------------------------- *
     651 * @brief getInputBufferVertex
     652 ** ------------------------------------------------------------------------------------------------------------- */
     653inline unsigned PipelineCompiler::getInputBufferVertex(const unsigned inputPort) const {
     654    return getInputBufferVertex(mKernelIndex, inputPort);
     655}
     656
     657/** ------------------------------------------------------------------------------------------------------------- *
     658 * @brief getInputBufferVertex
     659 ** ------------------------------------------------------------------------------------------------------------- */
     660unsigned PipelineCompiler::getInputBufferVertex(const unsigned kernelVertex, const unsigned inputPort) const {
     661    for (const auto e : make_iterator_range(in_edges(kernelVertex, mBufferGraph))) {
     662        if (mBufferGraph[e].inputPort() == inputPort) {
     663            return source(e, mBufferGraph);
     664        }
     665    }
     666    llvm_unreachable("input buffer not found");
     667}
     668
     669/** ------------------------------------------------------------------------------------------------------------- *
     670 * @brief getInputBuffer
     671 ** ------------------------------------------------------------------------------------------------------------- */
     672inline StreamSetBuffer * PipelineCompiler::getInputBuffer(const unsigned inputPort) const {
     673    return mBufferGraph[getInputBufferVertex(inputPort)].Buffer;
     674}
     675
     676/** ------------------------------------------------------------------------------------------------------------- *
     677 * @brief getOutputBufferVertex
     678 ** ------------------------------------------------------------------------------------------------------------- */
     679inline unsigned PipelineCompiler::getOutputBufferVertex(const unsigned outputPort) const {
     680    return getOutputBufferVertex(mKernelIndex, outputPort);
     681}
     682
     683/** ------------------------------------------------------------------------------------------------------------- *
     684 * @brief getOutputBufferVertex
     685 ** ------------------------------------------------------------------------------------------------------------- */
     686unsigned PipelineCompiler::getOutputBufferVertex(const unsigned kernelVertex, const unsigned outputPort) const {
     687    for (const auto e : make_iterator_range(out_edges(kernelVertex, mBufferGraph))) {
     688        if (mBufferGraph[e].outputPort() == outputPort) {
     689            return target(e, mBufferGraph);
     690        }
     691    }
     692    llvm_unreachable("output buffer not found");
     693}
     694
     695/** ------------------------------------------------------------------------------------------------------------- *
     696 * @brief getOutputBuffer
     697 ** ------------------------------------------------------------------------------------------------------------- */
     698inline StreamSetBuffer * PipelineCompiler::getOutputBuffer(const unsigned outputPort) const {
     699    return mBufferGraph[getOutputBufferVertex(outputPort)].Buffer;
     700}
     701
     702/** ------------------------------------------------------------------------------------------------------------- *
     703 * @brief getOutputBuffer
     704 ** ------------------------------------------------------------------------------------------------------------- */
     705inline unsigned PipelineCompiler::getBufferIndex(const unsigned bufferVertex) const {
     706    return bufferVertex - (mPipelineOutput + 1);
     707}
     708
     709/** ------------------------------------------------------------------------------------------------------------- *
     710 * @brief isPipelineInput
     711 ** ------------------------------------------------------------------------------------------------------------- */
     712bool PipelineCompiler::isPipelineInput(const unsigned kernel, const unsigned inputPort) const {
     713    return !is_parent(getInputBufferVertex(kernel, inputPort), mPipelineInput, mBufferGraph);
     714}
     715
     716/** ------------------------------------------------------------------------------------------------------------- *
     717 * @brief isPipelineOutput
     718 ** ------------------------------------------------------------------------------------------------------------- */
     719bool PipelineCompiler::isPipelineOutput(const unsigned kernel, const unsigned outputPort) const {
     720    return !has_child(getOutputBufferVertex(kernel, outputPort), mPipelineOutput, mBufferGraph);
     721}
     722
     723/** ------------------------------------------------------------------------------------------------------------- *
     724 * @brief nestedPipeline
     725 ** ------------------------------------------------------------------------------------------------------------- */
     726bool PipelineCompiler::nestedPipeline() const {
     727    return out_degree(mPipelineInput, mBufferGraph) != 0 || in_degree(mPipelineOutput, mBufferGraph) != 0;
     728}
     729
     730
    681731} // end of kernel namespace
    682732
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/consumer_logic.hpp

    r6275 r6288  
    99 * @brief initializeConsumedItemCount
    1010 ** ------------------------------------------------------------------------------------------------------------- */
    11 inline void PipelineCompiler::initializeConsumedItemCount(BuilderRef b, const unsigned bufferVertex, Value * const produced) {
    12     // If this stream has no consumers, immediately store the consumed item count.
    13     if (LLVM_UNLIKELY(in_degree(bufferVertex, mConsumerGraph) == 0)) {
    14         return;
    15     }
    16     if (LLVM_UNLIKELY(out_degree(bufferVertex, mConsumerGraph) != 0)) {
    17         ConsumerNode & cn = mConsumerGraph[bufferVertex];
    18         assert (cn.Consumed == nullptr);
    19         cn.Consumed = produced;
    20     }
     11inline void PipelineCompiler::initializeConsumedItemCount(const unsigned bufferVertex, Value * const produced) {
     12    ConsumerNode & cn = mConsumerGraph[bufferVertex];
     13    cn.Consumed = produced;
    2114}
    2215
     
    2821        if (LLVM_UNLIKELY(mConsumerGraph[e] == FAKE_CONSUMER)) continue;
    2922        const auto bufferVertex = source(e, mConsumerGraph);
    30         ConsumerNode & cn = mConsumerGraph[bufferVertex]; assert (cn.Consumed);
    31         PHINode * const consumedPhi = b->CreatePHI(b->getSizeTy(), 2);
    32         consumedPhi->addIncoming(cn.Consumed, mKernelEntry);
    33         cn.PhiNode = consumedPhi;
     23        ConsumerNode & cn = mConsumerGraph[bufferVertex];
     24        if (LLVM_LIKELY(cn.PhiNode == nullptr)) {
     25            PHINode * const consumedPhi = b->CreatePHI(b->getSizeTy(), 2, "consumed." + std::to_string(bufferVertex) + ".");
     26            consumedPhi->addIncoming(cn.Consumed, mKernelEntry);
     27            cn.PhiNode = consumedPhi;
     28        }
    3429    }
    3530}
     
    4136    for (const auto e : make_iterator_range(in_edges(mKernelIndex, mConsumerGraph))) {
    4237        if (LLVM_UNLIKELY(mConsumerGraph[e] == FAKE_CONSUMER)) continue;
    43         Value * const processed = mFullyProcessedItemCount[mConsumerGraph[e]];
     38        const auto inputPort = InputPort(mConsumerGraph[e]);
     39        Value * const processed = mFullyProcessedItemCount[inputPort];
    4440        const auto bufferVertex = source(e, mConsumerGraph);
    4541        ConsumerNode & cn = mConsumerGraph[bufferVertex]; assert (cn.Consumed);
    4642        cn.Consumed = b->CreateUMin(cn.Consumed, processed);
    4743    }
    48 }
    49 
    50 /** ------------------------------------------------------------------------------------------------------------- *
    51  * @brief writeFinalConsumedItemCounts
    52  ** ------------------------------------------------------------------------------------------------------------- */
    53 inline void PipelineCompiler::writeFinalConsumedItemCounts(BuilderRef b) {
    54     for (const auto e : make_iterator_range(in_edges(mKernelIndex, mConsumerGraph))) {
    55         const auto bufferVertex = source(e, mConsumerGraph);
    56         ConsumerNode & cn = mConsumerGraph[bufferVertex]; assert (cn.Consumed);
    57         if (LLVM_LIKELY(mConsumerGraph[e] != FAKE_CONSUMER)) {
    58             cn.PhiNode->addIncoming(cn.Consumed, mKernelLoopExitPhiCatch);
    59             cn.Consumed = cn.PhiNode;
    60         }
    61         // Is this kernel the last consumer? If so, store the consumed count
    62         if (out_degree(bufferVertex, mConsumerGraph) == 1) {
    63             setConsumedItemCount(b, bufferVertex, cn.Consumed);
    64         }
    65     }
    66     clear_in_edges(mKernelIndex, mConsumerGraph);
    6744}
    6845
     
    7350    IntegerType * const sizeTy = b->getSizeTy();
    7451    const Kernel * const kernel = mPipeline[kernelIndex];
    75     const auto numOfOutputs = kernel->getNumOfStreamOutputs();
    76     for (unsigned i = 0; i < numOfOutputs; i++) {
    77         const Binding & output = kernel->getOutputStreamSetBinding(i);
    78         const auto prefix = makeBufferName(kernelIndex, output);
    79         const auto bufferVertex = getOutputBufferVertex(kernelIndex, i);
    80         // If the out-degree for this buffer is zero, then either the stream has no consumers
    81         // or we've proven that its consumption rate is identical to its production rate.
    82         if (out_degree(bufferVertex, mConsumerGraph) != 0) {
     52    for (const auto & e : make_iterator_range(out_edges(kernelIndex, mConsumerGraph))) {
     53        const auto bufferVertex = target(e, mConsumerGraph);
     54        // If the out-degree for this buffer is zero, then we've proven that its consumption rate
     55        // is identical to its production rate.
     56        const auto comsumedItemCountMatchesProducedItemCount = (out_degree(bufferVertex, mConsumerGraph) == 0);
     57        const auto isPipelineInput = (kernelIndex == mPipelineInput);
     58        if (LLVM_UNLIKELY(comsumedItemCountMatchesProducedItemCount && !isPipelineInput)) {
     59            continue;
     60        }
     61        const Binding & binding = getBinding(kernel, mConsumerGraph[e]);
     62        const auto prefix = makeBufferName(kernelIndex, binding);
     63//        if (LLVM_UNLIKELY(isPipelineInput)) {
     64//            mPipelineKernel->addLocalScalar(sizeTy, prefix + CONSUMED_ITEM_COUNT_SUFFIX);
     65//        } else {
    8366            mPipelineKernel->addInternalScalar(sizeTy, prefix + CONSUMED_ITEM_COUNT_SUFFIX);
    84         }
     67//        }
     68
    8569    }
    86 
    87 }
    88 
    89 /** ------------------------------------------------------------------------------------------------------------- *
    90  * @brief getConsumedItemCount
    91  ** ------------------------------------------------------------------------------------------------------------- */
    92 Value * PipelineCompiler::getConsumedItemCount(BuilderRef b, const unsigned outputPort) {
    93     Value * consumed = nullptr;
    94     const auto bufferVertex = getOutputBufferVertex(outputPort);
    95     if (LLVM_UNLIKELY(out_degree(bufferVertex, mConsumerGraph) == 0)) {
    96         // This stream either has no consumers or we've proven that its consumption rate
    97         // is identical to its production rate.
    98         consumed = mInitiallyProducedItemCount[outputPort];
    99     } else {
    100         const BufferNode & bn = mBufferGraph[bufferVertex];
    101         if (LLVM_UNLIKELY(bn.Type == BufferType::External)) {
    102             consumed = b->getSize(0);
    103         } else {
    104             b->setKernel(mPipelineKernel);
    105             const Binding & output = mKernel->getOutputStreamSetBinding(outputPort);
    106             const auto prefix = makeBufferName(mKernelIndex, output);
    107             consumed = b->getScalarField(prefix + CONSUMED_ITEM_COUNT_SUFFIX);
    108             b->setKernel(mKernel);
    109         }
    110     }
    111     return consumed;
    11270}
    11371
     
    11674 ** ------------------------------------------------------------------------------------------------------------- */
    11775void PipelineCompiler::readConsumedItemCounts(BuilderRef b) {
    118     const auto numOfOutputs = mKernel->getNumOfStreamOutputs();
    119     for (unsigned i = 0; i < numOfOutputs; i++) {
    120         mConsumedItemCount[i] = getConsumedItemCount(b, i);
     76    b->setKernel(mPipelineKernel);
     77    for (const auto & e : make_iterator_range(out_edges(mKernelIndex, mConsumerGraph))) {
     78        const auto port = OutputPort(mConsumerGraph[e]);
     79        const auto bufferVertex = target(e, mConsumerGraph);
     80        Value * consumed = nullptr;
     81        if (LLVM_UNLIKELY(out_degree(bufferVertex, mConsumerGraph) == 0)) {
     82            // This stream either has no consumers or we've proven that its consumption rate
     83            // is identical to its production rate.
     84            consumed = mInitiallyProducedItemCount[port];
     85        } else {
     86            const Binding & output = mKernel->getOutputStreamSetBinding(port);
     87            const auto prefix = makeBufferName(mKernelIndex, output);
     88            consumed = b->getScalarField(prefix + CONSUMED_ITEM_COUNT_SUFFIX);
     89        }
     90        mConsumedItemCount[port] = consumed;
    12191    }
     92    b->setKernel(mKernel);
     93}
     94
     95/** ------------------------------------------------------------------------------------------------------------- *
     96 * @brief writeFinalConsumedItemCounts
     97 ** ------------------------------------------------------------------------------------------------------------- */
     98inline void PipelineCompiler::writeFinalConsumedItemCounts(BuilderRef b) {
     99
     100    flat_set<unsigned> buffers;
     101    buffers.reserve(in_degree(mKernelIndex, mConsumerGraph));
     102
     103    for (const auto e : make_iterator_range(in_edges(mKernelIndex, mConsumerGraph))) {
     104        const auto buffer = source(e, mConsumerGraph);
     105        buffers.insert(buffer);
     106        if (LLVM_UNLIKELY(mConsumerGraph[e] == FAKE_CONSUMER)) continue;
     107        ConsumerNode & cn = mConsumerGraph[buffer];
     108        if (LLVM_LIKELY(cn.PhiNode != nullptr)) {
     109            cn.PhiNode->addIncoming(cn.Consumed, mKernelLoopExitPhiCatch);
     110            cn.Consumed = cn.PhiNode;
     111            cn.PhiNode = nullptr;
     112        }
     113    }
     114    clear_in_edges(mKernelIndex, mConsumerGraph);
     115
     116    b->setKernel(mPipelineKernel);
     117    // check to see if we've fully finished processing any stream
     118    for (const auto buffer : buffers) {
     119        if (out_degree(buffer, mConsumerGraph) == 0) {
     120            ConsumerNode & cn = mConsumerGraph[buffer];
     121            setConsumedItemCount(b, buffer, cn.Consumed);
     122        }
     123    }
     124    b->setKernel(mKernel);
    122125}
    123126
     
    125128 * @brief setConsumedItemCount
    126129 ** ------------------------------------------------------------------------------------------------------------- */
    127 void PipelineCompiler::setConsumedItemCount(BuilderRef b, const unsigned bufferVertex, Value * const consumed) const {
    128     const BufferNode & bn = mBufferGraph[bufferVertex];
    129     if (LLVM_LIKELY(bn.Type == BufferType::External)) {
    130         return;
    131     }
    132     const auto pe = in_edge(bufferVertex, mConsumerGraph);
     130inline void PipelineCompiler::setConsumedItemCount(BuilderRef b, const unsigned buffer, not_null<Value *> consumed) const {
     131    const auto pe = in_edge(buffer, mConsumerGraph);
    133132    const auto producerVertex = source(pe, mConsumerGraph);
    134133    const Kernel * const producer = mPipeline[producerVertex];
    135     const auto outputPort = mConsumerGraph[pe];
    136     const Binding & output = producer->getOutputStreamSetBinding(outputPort);
    137     const auto prefix = makeBufferName(producerVertex, output);
    138     #ifdef PRINT_DEBUG_MESSAGES
    139     b->CallPrintInt(prefix + CONSUMED_ITEM_COUNT_SUFFIX, consumed);
    140     #endif
    141     b->setKernel(mPipelineKernel);
     134    const Binding & binding = getBinding(producer, mConsumerGraph[pe]);
     135    const auto prefix = makeBufferName(producerVertex, binding);
    142136    b->setScalarField(prefix + CONSUMED_ITEM_COUNT_SUFFIX, consumed);
    143     b->setKernel(mKernel);
    144137}
    145138
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/core_logic.hpp

    r6272 r6288  
    3030    mProgressCounter->addIncoming(ZERO, entryBlock);
    3131    mPipelineProgress = b->getFalse();
     32    mHalted = b->getFalse();
    3233    #ifdef PRINT_DEBUG_MESSAGES
    3334    b->CallPrintInt("+++ pipeline start +++", mSegNo);
     
    188189    readFinalProducedItemCounts(b);
    189190    updateOptionalCycleCounter(b);
    190 
     191    mHalted = mHaltedPhi;
     192    #ifdef PRINT_DEBUG_MESSAGES
     193    b->CallPrintInt("--- " + prefix + ".halted ---", mHalted);
     194    #endif
    191195    assert (mKernel == mPipeline[mKernelIndex] && b->getKernel() == mKernel);
    192196}
    193197
    194198/** ------------------------------------------------------------------------------------------------------------- *
     199 * @brief isParamAddressable
     200 ** ------------------------------------------------------------------------------------------------------------- */
     201inline bool isParamAddressable(const Binding & binding) {
     202    if (binding.isDeferred()) {
     203        return true;
     204    }
     205    const ProcessingRate & rate = binding.getRate();
     206    return (rate.isBounded() || rate.isUnknown());
     207}
     208
     209/** ------------------------------------------------------------------------------------------------------------- *
    195210 * @brief end
    196211 ** ------------------------------------------------------------------------------------------------------------- */
     
    199214    // A pipeline will end for one or two reasons:
    200215
    201     // 1) No progress can be made by any kernel. This ought to only occur
    202     // if the pipeline itself has I/O streams.
     216    // 1) Process has *halted* due to insufficient pipeline I/O.
    203217
    204218    // 2) All pipeline sinks have terminated (i.e., any kernel that writes
     
    217231
    218232    Value * const terminated = pipelineTerminated(b);
    219     Value * done = terminated;
    220     if (nestedPipeline()) {
    221         done = b->CreateOr(terminated, noProgress);
    222     } else if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     233    Value * const done = b->CreateOr(mHalted, terminated);
     234
     235    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    223236        b->CreateAssertZero(noProgress,
    224237            "Dead lock detected: pipeline could not progress after two iterations");
     
    226239
    227240    #ifdef PRINT_DEBUG_MESSAGES
    228     Constant * const ONES = Constant::getAllOnesValue(mSegNo->getType());
    229     b->CallPrintInt("+++ pipeline end +++", b->CreateSelect(done, ONES, mSegNo));
     241    b->CallPrintInt("+++ pipeline end +++", mSegNo);
    230242    #endif
    231243
     
    239251    mSegNo = nullptr;
    240252    b->setKernel(mPipelineKernel);
     253
     254    writePipelineIOItemCounts(b);
     255
    241256    if (mPipelineTerminated) {
    242257        b->CreateStore(terminated, mPipelineTerminated);
    243258    }
     259}
     260
     261/** ------------------------------------------------------------------------------------------------------------- *
     262 * @brief pipelineTerminated
     263 ** ------------------------------------------------------------------------------------------------------------- */
     264inline Value * PipelineCompiler::pipelineTerminated(BuilderRef b) const {
     265    Value * terminated = b->getTrue();
     266    // check whether every sink has terminated
     267    for (const auto e : make_iterator_range(in_edges(mPipelineOutput, mTerminationGraph))) {
     268        const auto kernel = source(e, mTerminationGraph);
     269        terminated = b->CreateAnd(terminated, hasKernelTerminated(b, kernel));
     270    }
     271    return terminated;
     272}
     273
     274
     275/** ------------------------------------------------------------------------------------------------------------- *
     276 * @brief readPipelineIOItemCounts
     277 ** ------------------------------------------------------------------------------------------------------------- */
     278void PipelineCompiler::readPipelineIOItemCounts(BuilderRef b) {
     279
     280    // TODO: this needs to be considered more: if we have multiple consumers of a pipeline input and
     281    // they process the input data at differing rates, how do we ensure that we always resume processing
     282    // at the correct position? We can store the actual item counts / delta of the consumed count
     283    // internally but this would be problematic for optimization branches as we may have processed data
     284    // using the alternate path and any internally stored counts/deltas are irrelevant.
     285
     286    // Would a simple "reset" be enough?
     287
     288
     289    mTotalItems.resize(num_vertices(mBufferGraph) - mPipelineOutput, nullptr);
     290
     291    for (const auto e : make_iterator_range(out_edges(mPipelineInput, mBufferGraph))) {
     292
     293
     294
     295        const auto buffer = target(e, mBufferGraph);
     296        const auto inputPort = mBufferGraph[e].inputPort();
     297        Value * const available = mPipelineKernel->getAvailableInputItems(inputPort);
     298        mTotalItems[getBufferIndex(buffer)] = available;
     299        mConsumerGraph[buffer].Consumed = available;
     300
     301        Value * const inPtr = mPipelineKernel->getProcessedInputItemsPtr(inputPort);
     302        Value * const processed = b->CreateLoad(inPtr);
     303
     304        for (const auto e : make_iterator_range(out_edges(buffer, mBufferGraph))) {
     305            const auto inputPort = mBufferGraph[e].inputPort();
     306            const auto kernelIndex = target(e, mBufferGraph);
     307            Kernel * const kernel = mPipeline[kernelIndex];
     308            const Binding & input = kernel->getInputStreamSetBinding(inputPort);
     309            const auto prefix = makeBufferName(kernelIndex, input);
     310            Value * const ptr = b->getScalarFieldPtr(prefix + ITEM_COUNT_SUFFIX);
     311            b->CreateStore(processed, ptr);
     312        }
     313    }
     314
     315    for (const auto e : make_iterator_range(in_edges(mPipelineOutput, mBufferGraph))) {
     316        const auto buffer = source(e, mBufferGraph);
     317        const auto outputPort = mBufferGraph[e].outputPort();
     318
     319        Value * outPtr = mPipelineKernel->getProducedOutputItemsPtr(outputPort);
     320        Value * const produced = b->CreateLoad(outPtr);
     321
     322        for (const auto e : make_iterator_range(in_edges(buffer, mBufferGraph))) {
     323            const auto inputPort = mBufferGraph[e].outputPort();
     324            const auto kernelIndex = source(e, mBufferGraph);
     325            Kernel * const kernel = mPipeline[kernelIndex];
     326            const Binding & output = kernel->getOutputStreamSetBinding(inputPort);
     327            const auto prefix = makeBufferName(kernelIndex, output);
     328            Value * const ptr = b->getScalarFieldPtr(prefix + ITEM_COUNT_SUFFIX);
     329            b->CreateStore(produced, ptr);
     330        }
     331    }
     332
     333}
     334
     335/** ------------------------------------------------------------------------------------------------------------- *
     336 * @brief writePipelineIOItemCounts
     337 ** ------------------------------------------------------------------------------------------------------------- */
     338void PipelineCompiler::writePipelineIOItemCounts(BuilderRef b) {
     339
     340    for (const auto e : make_iterator_range(out_edges(mPipelineInput, mBufferGraph))) {
     341        const auto inputPort = mBufferGraph[e].inputPort();
     342        const Binding & input = mPipelineKernel->getInputStreamSetBinding(inputPort);
     343        Value * const ptr = mPipelineKernel->getProcessedInputItemsPtr(inputPort);
     344        const auto prefix = makeBufferName(mPipelineInput, input);
     345        Value * const consumed = b->getScalarField(prefix + CONSUMED_ITEM_COUNT_SUFFIX);
     346        b->CreateStore(consumed, ptr);
     347    }
     348
     349    for (const auto e : make_iterator_range(in_edges(mPipelineOutput, mBufferGraph))) {
     350        const auto externalPort = mBufferGraph[e].outputPort();
     351        const auto buffer = source(e, mBufferGraph);
     352        const auto pe = in_edge(buffer, mBufferGraph);
     353        const auto internalPort = mBufferGraph[pe].outputPort();
     354        const auto producer = source(pe, mBufferGraph);
     355        const Kernel * const kernel = mPipeline[producer];
     356        const Binding & output = kernel->getOutputStreamSetBinding(internalPort);
     357        Value * const ptr = mPipelineKernel->getProducedOutputItemsPtr(externalPort);
     358        const auto prefix = makeBufferName(producer, output);
     359        Value * const produced = b->getScalarField(prefix + ITEM_COUNT_SUFFIX);
     360        b->CreateStore(produced, ptr);
     361    }
     362
    244363}
    245364
     
    325444    mTerminatedPhi = b->CreatePHI(sizeTy, 2, prefix + "_terminated");
    326445    mHasProgressedPhi = b->CreatePHI(boolTy, 2, prefix + "_anyProgress");
     446    mHaltingPhi = b->CreatePHI(boolTy, 2, prefix + "_halting");
    327447    const auto numOfInputs = mKernel->getNumOfStreamInputs();
    328448    for (unsigned i = 0; i < numOfInputs; ++i) {
     
    348468    b->SetInsertPoint(mKernelExit);
    349469    const auto prefix = makeKernelName(mKernelIndex);
    350     Type * const sizeTy = b->getSizeTy();
     470    IntegerType * const sizeTy = b->getSizeTy();
    351471    mTerminatedAtExitPhi = b->CreatePHI(sizeTy, 2, prefix + "_terminated");
    352472    mTerminatedAtExitPhi->addIncoming(mTerminatedInitially, mKernelEntry);
    353473    mTerminatedAtExitPhi->addIncoming(mTerminatedPhi, mKernelLoopExitPhiCatch);
    354474
    355     PHINode * const pipelineProgress = b->CreatePHI(b->getInt1Ty(), 2, prefix + "_pipelineProgress");
     475    IntegerType * const boolTy = b->getInt1Ty();
     476
     477    mHaltedPhi = b->CreatePHI(boolTy, 2, prefix + "_halted");
     478    mHaltedPhi->addIncoming(mHalted, mKernelEntry);
     479    mHaltedPhi->addIncoming(mHaltingPhi, mKernelLoopExitPhiCatch);
     480
     481    PHINode * const pipelineProgress = b->CreatePHI(boolTy, 2, prefix + "_pipelineProgress");
    356482    pipelineProgress->addIncoming(mPipelineProgress, mKernelEntry);
    357483    pipelineProgress->addIncoming(mHasProgressedPhi, mKernelLoopExitPhiCatch);
     
    405531            mUpdatedProducedPhi[i]->addIncoming(mProducedItemCount[i], entryBlock);
    406532        }
     533        mTerminatedPhi->addIncoming(mTerminatedInitially, entryBlock);
    407534        mHasProgressedPhi->addIncoming(b->getTrue(), entryBlock);
    408         mTerminatedPhi->addIncoming(mTerminatedInitially, entryBlock);
     535        mHaltingPhi->addIncoming(mHalted, entryBlock);
    409536        b->CreateBr(mKernelLoopExit);
    410537    }
     
    418545    mTerminatedPhi->addIncoming(getTerminationSignal(b, mKernelIndex), exitBlock);
    419546    mHasProgressedPhi->addIncoming(b->getTrue(), exitBlock);
     547    mHaltingPhi->addIncoming(mHalted, exitBlock);
    420548    const auto numOfInputs = mKernel->getNumOfStreamInputs();
    421549    for (unsigned i = 0; i < numOfInputs; ++i) {
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/kernel_logic.hpp

    r6275 r6288  
    6868    #endif
    6969    mAccessibleInputItems[inputPort] = accessible;
     70
     71    Value * halting = mHalted;
     72    for (const auto & e : make_iterator_range(in_edges(mKernelIndex, mPipelineIOGraph))) {
     73        if (LLVM_LIKELY(mPipelineIOGraph[e] == inputPort)) {
     74            halting = b->getTrue();
     75            break;
     76        }
     77    }
     78
    7079    BasicBlock * const target = b->CreateBasicBlock(prefix + "_hasInputData", mKernelLoopCall);
    71     branchToTargetOrLoopExit(b, sufficientInput, target);
     80    branchToTargetOrLoopExit(b, sufficientInput, target, halting);
     81
    7282}
    7383
     
    102112inline void PipelineCompiler::checkForSufficientOutputSpaceOrExpand(BuilderRef b, const unsigned outputPort) {
    103113    // If the buffer is managed by the kernel, ignore it
    104     if (LLVM_LIKELY(getOutputBufferType(outputPort) != BufferType::Managed)) {
    105         const StreamSetBuffer * const buffer = getOutputBuffer(outputPort);
    106         Value * const writable = getWritableOutputItems(b, outputPort, true);
    107         Value * const strideLength = getOutputStrideLength(b, outputPort);
    108         const Binding & output = mKernel->getOutputStreamSetBinding(outputPort);
    109         const auto prefix = makeBufferName(mKernelIndex, output);
    110         Value * const hasEnough = b->CreateICmpULE(strideLength, writable, prefix + "_hasEnough");
    111         #ifdef PRINT_DEBUG_MESSAGES
    112         b->CallPrintInt(prefix + "_writable", writable);
    113         b->CallPrintInt(prefix + "_requiredOutput", strideLength);
    114         b->CallPrintInt(prefix + "_hasEnough", hasEnough);
    115         #endif
    116         BasicBlock * const target = b->CreateBasicBlock(prefix + "_hasOutputSpace", mKernelLoopCall);
    117         mWritableOutputItems[outputPort] = writable;
    118         if (LLVM_UNLIKELY(isa<DynamicBuffer>(buffer))) {
    119             expandOutputBuffer(b, outputPort, hasEnough, target);
    120         } else {
    121             branchToTargetOrLoopExit(b, hasEnough, target);
    122         }
    123     }
     114    if (LLVM_UNLIKELY(getOutputBufferType(outputPort) == BufferType::Managed)) {
     115        return;
     116    }
     117    const StreamSetBuffer * const buffer = getOutputBuffer(outputPort);
     118    Value * const writable = getWritableOutputItems(b, outputPort, true);
     119    Value * const strideLength = getOutputStrideLength(b, outputPort);
     120    const Binding & output = mKernel->getOutputStreamSetBinding(outputPort);
     121    const auto prefix = makeBufferName(mKernelIndex, output);
     122    Value * const hasEnough = b->CreateICmpULE(strideLength, writable, prefix + "_hasEnough");
     123    #ifdef PRINT_DEBUG_MESSAGES
     124    b->CallPrintInt(prefix + "_writable", writable);
     125    b->CallPrintInt(prefix + "_requiredOutput", strideLength);
     126    b->CallPrintInt(prefix + "_hasEnough", hasEnough);
     127    #endif
     128    BasicBlock * const target = b->CreateBasicBlock(prefix + "_hasOutputSpace", mKernelLoopCall);
     129    mWritableOutputItems[outputPort] = writable;
     130
     131    if (LLVM_UNLIKELY(isa<DynamicBuffer>(buffer))) {
     132        expandOutputBuffer(b, outputPort, hasEnough, target);
     133    } else {
     134        Value * halting = mHalted;
     135        for (const auto & e : make_iterator_range(out_edges(mKernelIndex, mPipelineIOGraph))) {
     136            if (LLVM_LIKELY(mPipelineIOGraph[e] == outputPort)) {
     137                halting = b->getTrue();
     138                break;
     139            }
     140        }
     141        branchToTargetOrLoopExit(b, hasEnough, target, halting);
     142    }
     143
    124144}
    125145
     
    127147 * @brief branchToTargetOrLoopExit
    128148 ** ------------------------------------------------------------------------------------------------------------- */
    129 void PipelineCompiler::branchToTargetOrLoopExit(BuilderRef b, Value * const cond, BasicBlock * const target) {
    130     b->CreateLikelyCondBr(cond, target, mKernelLoopExit);
     149void PipelineCompiler::branchToTargetOrLoopExit(BuilderRef b, Value * const cond, BasicBlock * const target, Value * const halting) {
     150
     151
     152
    131153    BasicBlock * const exitBlock = b->GetInsertBlock();
    132154    mTerminatedPhi->addIncoming(mTerminatedInitially, exitBlock);
    133155    mHasProgressedPhi->addIncoming(mAlreadyProgressedPhi, exitBlock);
     156    mHaltingPhi->addIncoming(halting, exitBlock);
     157
     158    b->CreateLikelyCondBr(cond, target, mKernelLoopExit);
     159
     160    // The lexicalOrderingOfStreamIO function will attempt to order the ports such that we test
     161    // the pipeline I/O first. If have not tested all of them and this stream does not have
     162    // enough I/O to progress, assume some pipeline I/O has been exhausted.
     163
     164    // NOTE: we may run into problems here if this stream's rate is a PopCountRate whose
     165    // reference is an internal stream that can progress still and it is the internal stream
     166    // that is insufficient.
     167
    134168    const auto numOfInputs = mKernel->getNumOfStreamInputs();
    135169    for (unsigned i = 0; i < numOfInputs; ++i) {
     
    350384    }
    351385
    352 
    353 
    354386    BasicBlock * const exitBlock = b->GetInsertBlock();
    355387    for (unsigned i = 0; i < numOfInputs; ++i) {
     
    440472    args.reserve((numOfInputs + numOfOutputs) * 4 + 2);
    441473    if (LLVM_LIKELY(mKernel->isStateful())) {
    442         args.push_back(mKernel->getHandle());
    443     }
    444     args.push_back(mNumOfLinearStrides);
     474        args.push_back(mKernel->getHandle()); assert (mKernel->getHandle());
     475    }
     476    args.push_back(mNumOfLinearStrides); assert (mNumOfLinearStrides);
    445477    for (unsigned i = 0; i < numOfInputs; ++i) {
    446478
     
    474506        mReturnedProcessedItemCountPtr[i] = addItemCountArg(b, input, deferred, processed, args);
    475507
    476         args.push_back(inputItems);
     508        args.push_back(inputItems); assert (inputItems);
    477509
    478510        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
     
    504536        mReturnedProducedItemCountPtr[i] = addItemCountArg(b, output, canTerminate, produced, args);
    505537        if (LLVM_LIKELY(nonManaged)) {
    506             args.push_back(writable);
     538            args.push_back(writable); assert (writable);
    507539        } else {
    508             args.push_back(mConsumedItemCount[i]);
     540            args.push_back(mConsumedItemCount[i]); assert (mConsumedItemCount[i]);
    509541        }
    510542    }
     
    523555        mTerminatedExplicitly = b->getFalse();
    524556    }
     557
     558    #ifdef PRINT_DEBUG_MESSAGES
     559    b->CallPrintInt("* " + prefix + "_executed", mNumOfLinearStrides);
     560    #endif
    525561
    526562    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
     
    696732        return mInputStrideLength[inputPort];
    697733    } else {
    698         Value * const strideLength = getInitialStrideLength(b, Port::Input, inputPort);
     734        Value * const strideLength = getInitialStrideLength(b, StreamPort{Port::Input, inputPort});
    699735        mInputStrideLength[inputPort] = strideLength;
    700736        return strideLength;
     
    710746        return mOutputStrideLength[outputPort];
    711747    } else {
    712         Value * const strideLength = getInitialStrideLength(b, Port::Output, outputPort);
     748        Value * const strideLength = getInitialStrideLength(b, StreamPort{Port::Output, outputPort});
    713749        mOutputStrideLength[outputPort] = strideLength;
    714750        return strideLength;
     
    719755 * @brief getInitialStrideLength
    720756 ** ------------------------------------------------------------------------------------------------------------- */
    721 Value * PipelineCompiler::getInitialStrideLength(BuilderRef b, const Port port, const unsigned portNum) {
    722     const Binding & binding = getBinding(mKernel, port, portNum);
     757Value * PipelineCompiler::getInitialStrideLength(BuilderRef b, const StreamPort port) {
     758    const Binding & binding = getBinding(mKernel, port);
    723759    const ProcessingRate & rate = binding.getRate();
    724760    if (LLVM_LIKELY(rate.isFixed() || rate.isBounded())) {
     
    727763        return getMinimumNumOfLinearPopCountItems(b, binding);
    728764    } else if (rate.isRelative()) {
    729         Port refPort; unsigned refPortNum;
    730         std::tie(refPort, refPortNum) = mKernel->getStreamPort(rate.getReference());
    731         Value * const baseRate = getInitialStrideLength(b, refPort, refPortNum);
     765        auto refPort = mKernel->getStreamPort(rate.getReference());
     766        Value * const baseRate = getInitialStrideLength(b, refPort);
    732767        return b->CreateMul2(baseRate, rate.getRate());
    733768    }
     
    773808 ** ------------------------------------------------------------------------------------------------------------- */
    774809inline Value * PipelineCompiler::getTotalItemCount(BuilderRef /* b */, const unsigned inputPort) const {
    775     return mBufferGraph[getInputBufferVertex(inputPort)].TotalItems;
     810    const auto bufferVertex = getInputBufferVertex(inputPort);
     811    Value * const items = mTotalItems[getBufferIndex(bufferVertex)]; assert (items);
     812    return items;
    776813}
    777814
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_analysis.hpp

    r6272 r6288  
    4242    using ScalarDependencyMap = RelationshipMap<ScalarDependencyGraph::vertex_descriptor>;
    4343
    44     void enumerateScalarProducerBindings(const unsigned producerVertex, const Bindings & bindings,
     44    void enumerateScalarProducerBindings(const unsigned producer, const Bindings & bindings,
    4545                                         ScalarDependencyGraph & G, ScalarDependencyMap & M) {
    4646        const auto n = bindings.size();
     
    4848            const Relationship * const rel = getRelationship(bindings[i]);
    4949            assert (M.count(rel) == 0);
    50             const auto bufferVertex = add_vertex(nullptr, G);
    51             add_edge(producerVertex, bufferVertex, i, G);
    52             M.emplace(rel, bufferVertex);
     50            const auto scalar = add_vertex(nullptr, G);
     51            add_edge(producer, scalar, i, G);
     52            M.emplace(rel, scalar);
    5353        }
    5454    }
     
    6060            return f->second;
    6161        } else if (LLVM_LIKELY(isa<ScalarConstant>(rel))) {
    62             const auto bufferVertex = add_vertex(cast<ScalarConstant>(rel)->value(), G);
    63             M.emplace(rel, bufferVertex);
    64             return bufferVertex;
     62            const auto scalar = add_vertex(cast<ScalarConstant>(rel), G);
     63            add_edge(0, scalar, -1U, G);
     64            M.emplace(rel, scalar);
     65            return scalar;
    6566        } else {
    6667            report_fatal_error("unknown scalar value");
     
    6970
    7071    template <typename Array>
    71     void enumerateScalarConsumerBindings(const unsigned consumerVertex, const Array & array,
     72    void enumerateScalarConsumerBindings(const unsigned consumer, const Array & array,
    7273                                         ScalarDependencyGraph & G, ScalarDependencyMap & M) {
    7374        const auto n = array.size();
    7475        for (unsigned i = 0; i < n; ++i) {
    75             const auto bufferVertex = makeIfConstant(getRelationship(array[i]), G, M);
    76             assert (bufferVertex < num_vertices(G));
    77             add_edge(bufferVertex, consumerVertex, i, G);
     76            const auto scalar = makeIfConstant(getRelationship(array[i]), G, M);
     77            assert (scalar < num_vertices(G));
     78            add_edge(scalar, consumer, i, G);
    7879        }
    7980    }
     
    8889ScalarDependencyGraph PipelineCompiler::makeScalarDependencyGraph() const {
    8990
    90     const auto pipelineInput = 0;
    91     const auto pipelineOutput = mLastKernel;
    9291    const auto & call = mPipelineKernel->getCallBindings();
    9392    const auto numOfCalls = call.size();
    94     const auto firstCall = mLastKernel + 1;
     93    const auto firstCall = mPipelineOutput + 1;
    9594    const auto initialSize = firstCall + numOfCalls;
    9695
     
    9897    ScalarDependencyMap M;
    9998
    100     enumerateScalarProducerBindings(pipelineInput, mPipelineKernel->getInputScalarBindings(), G, M);
     99    enumerateScalarProducerBindings(mPipelineInput, mPipelineKernel->getInputScalarBindings(), G, M);
    101100    // verify each scalar input of the kernel is an input to the pipeline
    102101    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
     
    112111    }
    113112    // enumerate the pipeline outputs
    114     enumerateScalarConsumerBindings(pipelineOutput, mPipelineKernel->getOutputScalarBindings(), G, M);
     113    enumerateScalarConsumerBindings(mPipelineOutput, mPipelineKernel->getOutputScalarBindings(), G, M);
    115114
    116115    return G;
     
    167166    using Graph = adjacency_list<hash_setS, vecS, bidirectionalS>;
    168167
    169     const auto pipelineInput = 0;
    170     const auto pipelineOutput = mLastKernel;
    171 
    172168    const auto numOfInputs = mKernel->getNumOfStreamInputs();
    173169    const auto numOfOutputs = mKernel->getNumOfStreamOutputs();
     
    199195    }
    200196    // check any pipeline input first
    201     if (out_degree(pipelineInput, mBufferGraph)) {
     197    if (out_degree(mPipelineInput, mBufferGraph)) {
    202198        for (unsigned i = 0; i < numOfInputs; ++i) {
    203199            const auto buffer = getInputBufferVertex(i);
    204             if (LLVM_UNLIKELY(parent(buffer, mBufferGraph) == pipelineInput)) {
     200            if (LLVM_UNLIKELY(is_parent(buffer, mPipelineInput, mBufferGraph))) {
    205201                for (unsigned j = 0; j < i; ++j) {
    206202                    add_edge_if_no_induced_cycle(i, j, G);
     
    214210
    215211    // ... and check any pipeline output first
    216     if (out_degree(pipelineInput, mBufferGraph)) {
     212    if (in_degree(mPipelineOutput, mBufferGraph)) {
    217213        for (unsigned i = 0; i < numOfOutputs; ++i) {
    218214            const auto buffer = getOutputBufferVertex(i);
    219             if (LLVM_UNLIKELY(has_child(buffer, pipelineOutput, mBufferGraph))) {
     215            if (LLVM_UNLIKELY(has_child(buffer, mPipelineOutput, mBufferGraph))) {
    220216                const auto k = firstOutput + i;
    221217                for (unsigned j = 0; j < k; ++j) {
     
    293289ConsumerGraph PipelineCompiler::makeConsumerGraph()  const {
    294290
    295     const auto firstBuffer = mLastKernel + 1;
     291    const auto firstBuffer = mPipelineOutput + 1;
    296292    const auto lastBuffer = num_vertices(mBufferGraph);
    297293    ConsumerGraph G(lastBuffer);
     
    318314
    319315    for (auto bufferVertex = firstBuffer; bufferVertex < lastBuffer; ++bufferVertex) {
    320 
    321         const BufferNode & bn = mBufferGraph[bufferVertex];
    322 
    323         if (LLVM_UNLIKELY(bn.Type == BufferType::External)) {
    324             continue;
    325         }
    326 
    327316        // copy the producing edge
    328317        const auto pe = in_edge(bufferVertex, mBufferGraph);
     
    527516}
    528517
     518/** ------------------------------------------------------------------------------------------------------------- *
     519 * @brief makePipelineIOGraph
     520 ** ------------------------------------------------------------------------------------------------------------- */
     521PipelineIOGraph PipelineCompiler::makePipelineIOGraph() const {
     522    PipelineIOGraph G((mPipelineOutput - mPipelineInput) + 1);
     523    for (const auto e : make_iterator_range(out_edges(mPipelineInput, mBufferGraph))) {
     524        const auto buffer = target(e, mBufferGraph);
     525        for (const auto e : make_iterator_range(out_edges(buffer, mBufferGraph))) {
     526            const auto consumer = target(e, mBufferGraph);
     527            add_edge(mPipelineInput, consumer, mBufferGraph[e].inputPort(), G);
     528        }
     529    }
     530    for (const auto e : make_iterator_range(in_edges(mPipelineOutput, mBufferGraph))) {
     531        const auto buffer = source(e, mBufferGraph);
     532        for (const auto e : make_iterator_range(in_edges(buffer, mBufferGraph))) {
     533            const auto producer = source(e, mBufferGraph);
     534            add_edge(producer, mPipelineOutput, mBufferGraph[e].outputPort(), G);
     535        }
     536    }
     537    return G;
     538}
     539
    529540
    530541} // end of namespace
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_builder.cpp

    r6275 r6288  
    1616#warning the pipeline ordering should be canonicalized to ensure that when multiple kernels could be scheduled the same one will always be chosen.
    1717
     18#warning the builders should detect if there is only one kernel in a pipeline / both branches are equivalent and return the single kernel. Modify addOrDeclareMainFunction.
     19
     20#warning make a templated compile method to automatically validate and cast the main function to the correct type
     21
    1822using namespace llvm;
    1923using namespace boost;
     
    2226namespace kernel {
    2327
    24 #warning TODO: make a templated compile method to automatically validate and cast the main function to the correct type?
    25 
    2628/** ------------------------------------------------------------------------------------------------------------- *
    2729 * @brief compile()
    2830 ** ------------------------------------------------------------------------------------------------------------- */
    2931void * ProgramBuilder::compile() {
    30     // generate any nested kernels
    31     mDriver.generateUncachedKernels();
    32     // generate the actual pipeline (unless we can extract it from the cache)
    3332    PipelineKernel * const pk = cast<PipelineKernel>(makeKernel());
    34     pk->initializeBindings(mDriver);
    3533    mDriver.addKernel(pk);
    3634    mDriver.generateUncachedKernels();
    37     Function * const main = addOrDeclareMainFunction(pk);
    38     return mDriver.finalizeObject(main);
     35    return mDriver.finalizeObject(pk);
    3936}
    4037
     
    4340 ** ------------------------------------------------------------------------------------------------------------- */
    4441Kernel * PipelineBuilder::initializeKernel(Kernel * const kernel) {
    45     kernel->initializeBindings(mDriver);
    4642    mDriver.addKernel(kernel);
    4743    mKernels.emplace_back(kernel);
     
    227223Kernel * PipelineBuilder::makeKernel() {
    228224
     225    mDriver.generateUncachedKernels();
    229226    for (const auto & builder : mNestedBuilders) {
    230         mKernels.push_back(builder->makeKernel());
    231     }
     227        Kernel * const kernel = builder->makeKernel();
     228        mDriver.addKernel(kernel);
     229        mKernels.push_back(kernel);
     230    }
     231    mDriver.generateUncachedKernels();
    232232
    233233    const auto numOfKernels = mKernels.size();
     
    317317    pipeline.reserve(ordering.size());
    318318
    319     const std::unique_ptr<kernel::KernelBuilder> & b = mDriver.getBuilder();
     319    const auto & b = mDriver.getBuilder();
    320320    Type * const addrPtrTy = b->getVoidPtrTy();
    321321    for (auto i : ordering) {
     
    400400Kernel * OptimizationBranchBuilder::makeKernel() {
    401401
    402     Kernel * const trueBranch = mTrueBranch->makeKernel();
    403 
    404     Kernel * const falseBranch = mFalseBranch->makeKernel();
     402    mDriver.generateUncachedKernels();
     403
     404    Kernel * const nonZero = mNonZeroBranch->makeKernel();
     405    mDriver.addKernel(nonZero);
     406
     407    Kernel * const allZero = mAllZeroBranch->makeKernel();
     408    mDriver.addKernel(allZero);
     409
     410    mDriver.generateUncachedKernels();
    405411
    406412    std::string name;
     
    413419    out << ";Z=\"";
    414420
    415     if (trueBranch->hasFamilyName()) {
    416         out << trueBranch->getFamilyName();
     421    if (nonZero->hasFamilyName()) {
     422        out << nonZero->getFamilyName();
    417423    } else {
    418         out << trueBranch->getName();
     424        out << nonZero->getName();
    419425    }
    420426
    421427    out << "\";N=\"";
    422428
    423     if (falseBranch->hasFamilyName()) {
    424         out << falseBranch->getFamilyName();
     429    if (allZero->hasFamilyName()) {
     430        out << allZero->getFamilyName();
    425431    } else {
    426         out << falseBranch->getName();
     432        out << allZero->getName();
    427433    }
    428434
     
    433439    // we could avoid sending it through.
    434440
    435     combineBindingAttributes(trueBranch->getInputStreamSetBindings(),
    436                              falseBranch->getInputStreamSetBindings(),
     441    combineBindingAttributes(nonZero->getInputStreamSetBindings(),
     442                             allZero->getInputStreamSetBindings(),
    437443                             mInputStreamSets);
    438444
    439     combineBindingAttributes(trueBranch->getOutputStreamSetBindings(),
    440                              falseBranch->getOutputStreamSetBindings(),
     445    combineBindingAttributes(nonZero->getOutputStreamSetBindings(),
     446                             allZero->getOutputStreamSetBindings(),
    441447                             mOutputStreamSets);
     448
     449    if (isa<StreamSet>(mCondition)) {
     450        mInputStreamSets.emplace_back(OptimizationBranch::CONDITION_TAG, mCondition);
     451    } else {
     452        mInputScalars.emplace_back(OptimizationBranch::CONDITION_TAG, mCondition);
     453    }
    442454
    443455    OptimizationBranch * const br =
    444456            new OptimizationBranch(mDriver.getBuilder(), std::move(name),
    445                                    mCondition, trueBranch, falseBranch,
     457                                   mCondition, nonZero, allZero,
    446458                                   std::move(mInputStreamSets), std::move(mOutputStreamSets),
    447459                                   std::move(mInputScalars), std::move(mOutputScalars));
    448460
    449     br->setStride(addKernelProperties({trueBranch, falseBranch}, br));
     461    br->setStride(addKernelProperties({nonZero, allZero}, br));
    450462
    451463    return br;
     
    457469inline void PipelineBuilder::addInputScalar(llvm::Type * type, std::string name) {
    458470    mInputScalars.emplace_back(name, CreateConstant(Constant::getNullValue(type)), FixedRate(1), Family());
    459 }
    460 
    461 /** ------------------------------------------------------------------------------------------------------------- *
    462  * @brief makeMainFunction
    463  ** ------------------------------------------------------------------------------------------------------------- */
    464 Function * PipelineBuilder::addOrDeclareMainFunction(PipelineKernel * const k) {
    465     auto & b = mDriver.getBuilder();
    466     b->setModule(mDriver.getMainModule());
    467     k->addKernelDeclarations(b);
    468     const auto method = k->hasStaticMain() ? PipelineKernel::DeclareExternal : PipelineKernel::AddInternal;
    469     return k->addOrDeclareMainFunction(b, method);
    470471}
    471472
     
    551552}
    552553
     554PipelineBuilder::PipelineBuilder(Internal, BaseDriver & driver,
     555    Bindings stream_inputs, Bindings stream_outputs,
     556    Bindings scalar_inputs, Bindings scalar_outputs, const unsigned numOfThreads)
     557: PipelineBuilder(driver,
     558                  std::move(stream_inputs), std::move(stream_outputs),
     559                  std::move(scalar_inputs), std::move(scalar_outputs),
     560                  numOfThreads) {
     561
     562}
     563
    553564ProgramBuilder::ProgramBuilder(
    554565    BaseDriver & driver,
     
    561572      codegen::ThreadNum) {
    562573
    563 }
    564 
    565 template <typename IfType>
    566 inline void addCondition(Relationship * const condition, Bindings & bindings) {
    567     if (isa<IfType>(condition)) {
    568         bindings.emplace_back(OptimizationBranch::CONDITION_TAG, condition, FixedRate(1));
    569     }
    570574}
    571575
     
    580584      std::move(scalar_inputs), std::move(scalar_outputs))
    581585, mCondition(condition)
    582 , mTrueBranch(nullptr)
    583 , mFalseBranch(nullptr) {
    584     addCondition<StreamSet>(condition, mInputStreamSets);
    585     addCondition<Scalar>(condition, mInputScalars);
     586, mNonZeroBranch(std::unique_ptr<PipelineBuilder>(
     587                    new PipelineBuilder(
     588                    PipelineBuilder::Internal{}, mDriver,
     589                    mInputStreamSets, mOutputStreamSets,
     590                    mInputScalars, mOutputScalars, 1)))
     591, mAllZeroBranch(std::unique_ptr<PipelineBuilder>(
     592                    new PipelineBuilder(
     593                    PipelineBuilder::Internal{}, mDriver,
     594                    mInputStreamSets, mOutputStreamSets,
     595                    mInputScalars, mOutputScalars, 1))) {
     596
    586597}
    587598
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_compiler.hpp

    r6275 r6288  
    99#include <boost/container/flat_map.hpp>
    1010#include <boost/graph/adjacency_list.hpp>
    11 #include <boost/graph/adjacency_matrix.hpp>
    1211#include <boost/range/adaptor/reversed.hpp>
    1312//#include <boost/serialization/strong_typedef.hpp>
     
    5251enum class BufferType : unsigned {
    5352    Internal = 0
    54     , External = 1
    55     , Managed = 2
     53    , Managed = 1
     54    , External = 2
    5655};
    5756
    5857struct BufferNode {
    59     Value *             TotalItems = nullptr;
    60     StreamSetBuffer *   Buffer = nullptr;
    61     RateValue           Lower{};
    62     RateValue           Upper{};
    63     unsigned            Overflow = 0;
    64     unsigned            Fasimile = 0;
    65     BufferType          Type = BufferType::Internal;
    66 };
     58    StreamSetBuffer * Buffer = nullptr;
     59    RateValue Lower{};
     60    RateValue Upper{};
     61    unsigned Overflow = 0;
     62    unsigned Fasimile = 0;
     63    BufferType Type = BufferType::Internal;
     64
     65    ~BufferNode() {
     66        if (LLVM_LIKELY(Type != BufferType::External)) {
     67            delete Buffer;
     68        }
     69    }
     70};
     71
     72inline unsigned InputPort(const StreamPort port) {
     73    assert (port.first == Kernel::Port::Input);
     74    return port.second;
     75}
     76
     77inline unsigned OutputPort(const StreamPort port) {
     78    assert (port.first == Kernel::Port::Output);
     79    return port.second;
     80}
    6781
    6882struct BufferRateData {
    6983
     84    StreamPort Port;
    7085    RateValue Minimum;
    7186    RateValue Maximum;
    72     unsigned  Port;
     87
     88    unsigned inputPort() const {
     89        return InputPort(Port);
     90    }
     91
     92    unsigned outputPort() const {
     93        return OutputPort(Port);
     94    }
    7395
    7496    BufferRateData() = default;
    7597
    76     BufferRateData(const unsigned port, RateValue min, RateValue max)
    77     : Minimum(std::move(min)), Maximum(std::move(max)), Port(port) { }
     98    BufferRateData(StreamPort port, RateValue min, RateValue max)
     99    : Port(port), Minimum(min), Maximum(max) { }
    78100};
    79101
     
    90112};
    91113
    92 enum : unsigned { FAKE_CONSUMER = (std::numeric_limits<unsigned>::max()) };
    93 
    94 using ConsumerGraph = adjacency_list<vecS, vecS, bidirectionalS, ConsumerNode, unsigned>;
     114using ConsumerGraph = adjacency_list<vecS, vecS, bidirectionalS, ConsumerNode, StreamPort>;
    95115
    96116template <typename Value>
     
    102122using TerminationGraph = adjacency_list<hash_setS, vecS, bidirectionalS, unsigned, unsigned>;
    103123
    104 using ScalarDependencyGraph = adjacency_list<vecS, vecS, bidirectionalS, Value *, unsigned>;
     124using ScalarDependencyGraph = adjacency_list<vecS, vecS, bidirectionalS, const ScalarConstant *, unsigned>;
     125
     126using ScalarCache = flat_map<ScalarDependencyGraph::vertex_descriptor, Value *>;
    105127
    106128struct OverflowRequirement {
     
    146168using PopCountGraph = adjacency_list<vecS, vecS, bidirectionalS, no_property, PopCountEdge>;
    147169
     170using PipelineIOGraph = adjacency_list<vecS, vecS, bidirectionalS, no_property, unsigned>;
     171
    148172const static std::string LOGICAL_SEGMENT_SUFFIX = ".LSN";
    149173const static std::string TERMINATION_PREFIX = "@TERM";
     
    160184    void addPipelineKernelProperties(BuilderRef b);
    161185    void generateInitializeMethod(BuilderRef b);
     186    void generateKernelMethod(BuilderRef b);
     187    void generateFinalizeMethod(BuilderRef b);
     188    std::vector<Value *> getFinalOutputScalars(BuilderRef b);
     189
     190protected:
     191
     192// internal pipeline state construction functions
     193
     194    void addInternalKernelProperties(BuilderRef b, const unsigned kernelIndex);
    162195    void generateSingleThreadKernelMethod(BuilderRef b);
    163196    void generateMultiThreadKernelMethod(BuilderRef b);
    164     void generateFinalizeMethod(BuilderRef b);
    165     std::vector<Value *> getFinalOutputScalars(BuilderRef b);
    166 
    167 protected:
    168 
    169 // internal pipeline state construction functions
    170 
    171     void addInternalKernelProperties(BuilderRef b, const unsigned kernelIndex);
    172197    void acquireCurrentSegment(BuilderRef b);
    173198    void releaseCurrentSegment(BuilderRef b);
     
    180205    void executeKernel(BuilderRef b);
    181206    void end(BuilderRef b, const unsigned step);
     207
     208    void readPipelineIOItemCounts(BuilderRef b);
     209    void writePipelineIOItemCounts(BuilderRef b);
    182210
    183211// internal pipeline functions
     
    206234
    207235    void checkForSufficientInputDataAndOutputSpace(BuilderRef b);
    208     void branchToTargetOrLoopExit(BuilderRef b, Value * const cond, BasicBlock * target);
     236    void branchToTargetOrLoopExit(BuilderRef b, Value * const cond, BasicBlock * target, Value * const halting);
    209237    void determineNumOfLinearStrides(BuilderRef b);
    210238    void calculateNonFinalItemCounts(BuilderRef b);
     
    241269    void writeUpdatedItemCounts(BuilderRef b);
    242270
     271    Value * getScalar(BuilderRef b, const ScalarDependencyGraph::vertex_descriptor scalar);
     272
    243273// intra-kernel functions
    244274
     
    247277    Value * getInputStrideLength(BuilderRef b, const unsigned inputPort);
    248278    Value * getOutputStrideLength(BuilderRef b, const unsigned outputPort);
    249     Value * getInitialStrideLength(BuilderRef b, const Port port, const unsigned portNum);
     279    Value * getInitialStrideLength(BuilderRef b, const StreamPort port);
    250280    static Value * getMaximumStrideLength(BuilderRef b, const Kernel * kernel, const Binding & binding);
    251281    Value * calculateNumOfLinearItems(BuilderRef b, const Binding & binding);
     
    326356    void addConsumerKernelProperties(BuilderRef b, const unsigned kernelIndex);
    327357    void createConsumedPhiNodes(BuilderRef b);
    328     void initializeConsumedItemCount(BuilderRef b, const unsigned bufferVertex, Value * const produced);
     358    void initializeConsumedItemCount(const unsigned bufferVertex, Value * const produced);
    329359    void readConsumedItemCounts(BuilderRef b);
    330     Value * getConsumedItemCount(BuilderRef b, const unsigned outputPort);
    331     void setConsumedItemCount(BuilderRef b, const unsigned bufferVertex, Value * const consumed) const;
     360    void setConsumedItemCount(BuilderRef b, const unsigned bufferVertex, not_null<Value *> consumed) const;
    332361
    333362// buffer analysis/management functions
    334363
    335364    BufferGraph makeBufferGraph(BuilderRef b);
     365    void enumerateBufferProducerBindings(const Port type, const unsigned producer, const Bindings & bindings, BufferGraph & G, BufferMap & M) const;
     366    void enumerateBufferConsumerBindings(const Port type, const unsigned consumer, const Bindings & bindings, BufferGraph & G, BufferMap & M) const;
     367    BufferRateData getBufferRateData(const StreamPort port, const Kernel * const kernel, const Binding & binding) const;
     368
    336369    void addBufferHandlesToPipelineKernel(BuilderRef b, const unsigned index);
    337     void enumerateBufferProducerBindings(const unsigned producer, const Bindings & bindings, BufferGraph & G, BufferMap & M);
    338     void enumerateBufferConsumerBindings(const unsigned consumer, const Bindings & bindings, BufferGraph & G, BufferMap & M);
    339     BufferRateData getBufferRateData(const Kernel * const kernel, const Binding &binding, const unsigned port) const;
    340370
    341371    void constructBuffers(BuilderRef b);
     
    362392    TerminationGraph makeTerminationGraph();
    363393    ScalarDependencyGraph makeScalarDependencyGraph() const;
     394    PipelineIOGraph makePipelineIOGraph() const;
    364395
    365396// misc. functions
     
    370401    Value * getFinalizeFunction(BuilderRef b) const;
    371402
    372     std::string makeKernelName(const unsigned kernelIndex) const;
    373     std::string makeBufferName(const unsigned kernelIndex, const Binding & binding) const;
     403    LLVM_READNONE std::string makeKernelName(const unsigned kernelIndex) const;
     404    LLVM_READNONE std::string makeBufferName(const unsigned kernelIndex, const Binding & binding) const;
     405
     406    LLVM_READNONE unsigned getInputBufferVertex(const unsigned kernelVertex, const unsigned inputPort) const;
    374407    unsigned getInputBufferVertex(const unsigned inputPort) const;
    375     unsigned getInputBufferVertex(const unsigned kernelVertex, const unsigned inputPort) const;
    376408    StreamSetBuffer * getInputBuffer(const unsigned inputPort) const;
     409
     410    LLVM_READNONE unsigned getOutputBufferVertex(const unsigned kernelVertex, const unsigned outputPort) const;
    377411    unsigned getOutputBufferVertex(const unsigned outputPort) const;
    378     unsigned getOutputBufferVertex(const unsigned kernelVertex, const unsigned outputPort) const;
    379412    StreamSetBuffer * getOutputBuffer(const unsigned outputPort) const;
    380413
    381     LLVM_READNONE bool nestedPipeline() const {
    382         return out_degree(0, mBufferGraph) != 0 || in_degree(mLastKernel, mBufferGraph) != 0;
    383     }
    384 
    385     static LLVM_READNONE const Binding & getBinding(const Kernel * kernel, const Port port, const unsigned i) {
    386         if (port == Port::Input) {
    387             return kernel->getInputStreamSetBinding(i);
    388         } else if (port == Port::Output) {
    389             return kernel->getOutputStreamSetBinding(i);
     414    LLVM_READNONE unsigned getBufferIndex(const unsigned bufferVertex) const;
     415
     416    LLVM_READNONE bool isPipelineInput(const unsigned kernelIndex, const unsigned inputPort) const;
     417    LLVM_READNONE bool isPipelineOutput(const unsigned kernelIndex, const unsigned outputPort) const;
     418    LLVM_READNONE bool nestedPipeline() const;
     419
     420    static LLVM_READNONE const Binding & getBinding(const Kernel * kernel, const StreamPort port) {
     421        if (port.first == Port::Input) {
     422            return kernel->getInputStreamSetBinding(port.second);
     423        } else if (port.first == Port::Output) {
     424            return kernel->getOutputStreamSetBinding(port.second);
    390425        }
    391426        llvm_unreachable("unknown port binding type!");
     
    394429    void printBufferGraph(const BufferGraph & G, raw_ostream & out);
    395430
    396     LLVM_READNONE const Binding & getInputBinding(const Kernel * const producer, const unsigned index) const;
    397 
    398     LLVM_READNONE const Binding & getOutputBinding(const Kernel * const consumer, const unsigned index) const;
    399 
    400431    void writeOutputScalars(BuilderRef b, const unsigned index, std::vector<Value *> & args);
    401432
     
    408439
    409440
     441private:
     442
     443    static constexpr StreamPort FAKE_CONSUMER{Port::Input, std::numeric_limits<unsigned>::max()};
    410444
    411445protected:
    412446
    413447    PipelineKernel * const                      mPipelineKernel;
     448
    414449    const Kernels                               mPipeline;
    415     const unsigned                              mFirstKernel;
     450    static constexpr unsigned                   mPipelineInput = 0;
     451    static constexpr unsigned                   mFirstKernel = 1;
    416452    const unsigned                              mLastKernel;
    417 
    418 
    419     OwnedStreamSetBuffers                       mOwnedBuffers;
     453    const unsigned                              mPipelineOutput;
     454
    420455    unsigned                                    mKernelIndex = 0;
    421456    Kernel *                                    mKernel = nullptr;
     
    423458    // pipeline state
    424459    PHINode *                                   mSegNo = nullptr;
     460    Value *                                     mHalted = nullptr;
    425461    PHINode *                                   mProgressCounter = nullptr;
    426462    Value *                                     mPipelineProgress = nullptr;
     
    439475    // kernel state
    440476    Value *                                     mTerminatedInitially = nullptr;
     477    PHINode *                                   mHaltingPhi = nullptr;
     478    PHINode *                                   mHaltedPhi = nullptr;
    441479    PHINode *                                   mHasProgressedPhi = nullptr;
    442480    PHINode *                                   mAlreadyProgressedPhi = nullptr;
     
    446484    Value *                                     mTerminatedExplicitly = nullptr;
    447485    std::vector<unsigned>                       mPortOrdering;
     486    std::vector<Value *>                        mTotalItems;
    448487
    449488    std::vector<Value *>                        mTerminationSignals;
     
    476515    std::vector<PHINode *>                      mFullyProducedItemCount; // *after* exiting the kernel
    477516
    478 
    479     // debug + misc state
     517    // cycle counter state
    480518    Value *                                     mCycleCountStart = nullptr;
    481519
     
    484522    flat_map<unsigned, PopCountData>            mPopCountData;
    485523
    486 
    487524    // analysis state
    488     BufferGraph                                 mBufferGraph;
     525    const BufferGraph                           mBufferGraph;
    489526    ConsumerGraph                               mConsumerGraph;
    490     ScalarDependencyGraph                       mScalarDependencyGraph;
     527    const ScalarDependencyGraph                 mScalarDependencyGraph;
     528    ScalarCache                                 mScalarCache;
     529    const PipelineIOGraph                       mPipelineIOGraph;
    491530    const TerminationGraph                      mTerminationGraph;
    492531    PopCountGraph                               mPopCountGraph;
    493532
    494533};
     534
     535// NOTE: these graph functions not safe for general use since they are intended for inspection of *edge-immutable* graphs.
     536
     537template <typename Graph>
     538LLVM_READNONE
     539inline typename graph_traits<Graph>::edge_descriptor first_in_edge(const typename graph_traits<Graph>::vertex_descriptor u, const Graph & G) {
     540    return *in_edges(u, G).first;
     541}
     542
     543template <typename Graph>
     544LLVM_READNONE
     545inline typename graph_traits<Graph>::edge_descriptor in_edge(const typename graph_traits<Graph>::vertex_descriptor u, const Graph & G) {
     546    assert (in_degree(u, G) == 1);
     547    return first_in_edge(u, G);
     548}
     549
     550template <typename Graph>
     551LLVM_READNONE
     552inline typename graph_traits<Graph>::vertex_descriptor parent(const typename graph_traits<Graph>::vertex_descriptor u, const Graph & G) {
     553    return source(in_edge(u, G), G);
     554}
     555
     556template <typename Graph>
     557LLVM_READNONE
     558inline typename graph_traits<Graph>::edge_descriptor first_out_edge(const typename graph_traits<Graph>::vertex_descriptor u, const Graph & G) {
     559    return *out_edges(u, G).first;
     560}
     561
     562template <typename Graph>
     563LLVM_READNONE
     564inline typename graph_traits<Graph>::edge_descriptor out_edge(const typename graph_traits<Graph>::vertex_descriptor u, const Graph & G) {
     565    assert (out_degree(u, G) == 1);
     566    return first_out_edge(u, G);
     567}
     568
     569template <typename Graph>
     570LLVM_READNONE
     571inline typename graph_traits<Graph>::vertex_descriptor child(const typename graph_traits<Graph>::vertex_descriptor u, const Graph & G) {
     572    return target(out_edge(u, G), G);
     573}
     574
     575template <typename Graph>
     576LLVM_READNONE
     577inline bool is_parent(const typename graph_traits<Graph>::vertex_descriptor u,
     578                      const typename graph_traits<Graph>::vertex_descriptor v,
     579                      const Graph & G) {
     580    return parent(u, G) == v;
     581}
     582
     583template <typename Graph>
     584LLVM_READNONE
     585inline bool has_child(const typename graph_traits<Graph>::vertex_descriptor u,
     586                      const typename graph_traits<Graph>::vertex_descriptor v,
     587                      const Graph & G) {
     588    for (const auto & e : make_iterator_range(out_edges(u, G))) {
     589        if (target(e, G) == v) {
     590            return true;
     591        }
     592    }
     593    return false;
     594}
    495595
    496596/** ------------------------------------------------------------------------------------------------------------- *
     
    515615: mPipelineKernel(pipelineKernel)
    516616, mPipeline(makePipelineList(pipelineKernel))
    517 , mFirstKernel(1)
    518617, mLastKernel(mPipeline.size() - 1)
     618, mPipelineOutput(mLastKernel)
    519619, mBufferGraph(makeBufferGraph(b))
    520620, mConsumerGraph(makeConsumerGraph())
    521621, mScalarDependencyGraph(makeScalarDependencyGraph())
     622, mPipelineIOGraph(makePipelineIOGraph())
    522623, mTerminationGraph(makeTerminationGraph())
    523624, mPopCountGraph(makePopCountGraph()) {
    524625    initializePopCounts();
    525 }
    526 
    527 /** ------------------------------------------------------------------------------------------------------------- *
    528  * @brief getInputBuffer
    529  ** ------------------------------------------------------------------------------------------------------------- */
    530 inline unsigned PipelineCompiler::getInputBufferVertex(const unsigned inputPort) const {
    531     return getInputBufferVertex(mKernelIndex, inputPort);
    532 }
    533 
    534 /** ------------------------------------------------------------------------------------------------------------- *
    535  * @brief getInputBuffer
    536  ** ------------------------------------------------------------------------------------------------------------- */
    537 inline unsigned PipelineCompiler::getInputBufferVertex(const unsigned kernelVertex, const unsigned inputPort) const {
    538     for (const auto e : make_iterator_range(in_edges(kernelVertex, mBufferGraph))) {
    539         if (mBufferGraph[e].Port == inputPort) {
    540             return source(e, mBufferGraph);
    541         }
    542     }
    543     assert (!"input buffer not found");
    544     llvm_unreachable("input buffer not found");
    545 }
    546 
    547 /** ------------------------------------------------------------------------------------------------------------- *
    548  * @brief getInputBuffer
    549  ** ------------------------------------------------------------------------------------------------------------- */
    550 inline StreamSetBuffer * PipelineCompiler::getInputBuffer(const unsigned inputPort) const {
    551     return mBufferGraph[getInputBufferVertex(inputPort)].Buffer;
    552 }
    553 
    554 /** ------------------------------------------------------------------------------------------------------------- *
    555  * @brief getOutputBufferVertex
    556  ** ------------------------------------------------------------------------------------------------------------- */
    557 inline unsigned PipelineCompiler::getOutputBufferVertex(const unsigned outputPort) const {
    558     return getOutputBufferVertex(mKernelIndex, outputPort);
    559 }
    560 
    561 /** ------------------------------------------------------------------------------------------------------------- *
    562  * @brief getOutputBufferVertex
    563  ** ------------------------------------------------------------------------------------------------------------- */
    564 inline unsigned PipelineCompiler::getOutputBufferVertex(const unsigned kernelVertex, const unsigned outputPort) const {
    565     for (const auto e : make_iterator_range(out_edges(kernelVertex, mBufferGraph))) {
    566         if (mBufferGraph[e].Port == outputPort) {
    567             return target(e, mBufferGraph);
    568         }
    569     }
    570     assert (!"output buffer not found");
    571     llvm_unreachable("output buffer not found");
    572 }
    573 
    574 
    575 /** ------------------------------------------------------------------------------------------------------------- *
    576  * @brief getOutputBuffer
    577  ** ------------------------------------------------------------------------------------------------------------- */
    578 inline StreamSetBuffer * PipelineCompiler::getOutputBuffer(const unsigned outputPort) const {
    579     return mBufferGraph[getOutputBufferVertex(outputPort)].Buffer;
    580626}
    581627
     
    648694}
    649695
    650 template <typename Graph>
    651 inline typename graph_traits<Graph>::edge_descriptor first_in_edge(const typename graph_traits<Graph>::vertex_descriptor u, const Graph & G) {
    652     return *in_edges(u, G).first;
    653 }
    654 
    655 template <typename Graph>
    656 inline typename graph_traits<Graph>::edge_descriptor in_edge(const typename graph_traits<Graph>::vertex_descriptor u, const Graph & G) {
    657     assert (in_degree(u, G) == 1);
    658     return first_in_edge(u, G);
    659 }
    660 
    661 template <typename Graph>
    662 inline typename graph_traits<Graph>::vertex_descriptor parent(const typename graph_traits<Graph>::vertex_descriptor u, const Graph & G) {
    663     return source(in_edge(u, G), G);
    664 }
    665 
    666 template <typename Graph>
    667 inline typename graph_traits<Graph>::edge_descriptor first_out_edge(const typename graph_traits<Graph>::vertex_descriptor u, const Graph & G) {
    668     return *out_edges(u, G).first;
    669 }
    670 
    671 template <typename Graph>
    672 inline typename graph_traits<Graph>::edge_descriptor out_edge(const typename graph_traits<Graph>::vertex_descriptor u, const Graph & G) {
    673     assert (out_degree(u, G) == 1);
    674     return first_out_edge(u, G);
    675 }
    676 
    677 template <typename Graph>
    678 inline typename graph_traits<Graph>::vertex_descriptor child(const typename graph_traits<Graph>::vertex_descriptor u, const Graph & G) {
    679     return target(out_edge(u, G), G);
    680 }
    681 
    682 template <typename Graph>
    683 inline bool has_child(const typename graph_traits<Graph>::vertex_descriptor u,
    684                       const typename graph_traits<Graph>::vertex_descriptor v,
    685                       const Graph & G) {
    686     for (const auto & e : make_iterator_range(out_edges(u, G))) {
    687         if (target(e, G) == v) {
    688             return true;
    689         }
    690     }
    691     return false;
    692 }
    693696
    694697} // end of namespace
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_kernel.cpp

    r6275 r6288  
    1010namespace kernel {
    1111
    12 #warning make sure all virtual methods are proxied for when only one kernel exists in the pipeline
    13 
    1412/** ------------------------------------------------------------------------------------------------------------- *
    1513 * @brief addInternalKernelProperties
    1614 ** ------------------------------------------------------------------------------------------------------------- */
    1715void PipelineKernel::addInternalKernelProperties(const std::unique_ptr<kernel::KernelBuilder> & b) {
    18     if (LLVM_UNLIKELY(isProxy())) {
    19         mKernels[0]->addInternalKernelProperties(b);
    20     } else { // add handles for each of unique streams
    21         mCompiler = llvm::make_unique<PipelineCompiler>(b, this);
    22         mCompiler->addPipelineKernelProperties(b);
    23     }
     16    mCompiler = llvm::make_unique<PipelineCompiler>(b, this);
     17    mCompiler->addPipelineKernelProperties(b);
    2418}
    2519
     
    2822 ** ------------------------------------------------------------------------------------------------------------- */
    2923void PipelineKernel::initializeInstance(const std::unique_ptr<KernelBuilder> & b, std::vector<Value *> & args) {
    30 
    31     if (LLVM_UNLIKELY(isProxy())) {
    32         mKernels[0]->initializeInstance(b, args);
    33     } else {
    34         assert (args[0] && "cannot initialize before creation");
    35         assert (args[0]->getType()->getPointerElementType() == mKernelStateType);
    36         b->setKernel(this);
    37 
    38         // append the kernel pointers for any kernel belonging to a family
    39         Module * const m = b->getModule();
    40         for (auto & kernel : mKernels) {
    41             if (kernel->hasFamilyName()) {
    42                 kernel->addKernelDeclarations(b);
    43                 PointerType * const voidPtrTy = b->getVoidPtrTy();
    44                 if (LLVM_UNLIKELY(kernel->isStateful())) {
    45                     Value * const handle = kernel->createInstance(b);
    46                     args.push_back(b->CreatePointerCast(handle, voidPtrTy));
    47                 }
    48                 args.push_back(b->CreatePointerCast(kernel->getInitFunction(m), voidPtrTy));
    49                 args.push_back(b->CreatePointerCast(kernel->getDoSegmentFunction(m), voidPtrTy));
    50                 args.push_back(b->CreatePointerCast(kernel->getTerminateFunction(m), voidPtrTy));
     24    assert (args[0] && "cannot initialize before creation");
     25    assert (args[0]->getType()->getPointerElementType() == mKernelStateType);
     26    b->setKernel(this);
     27
     28    // append the kernel pointers for any kernel belonging to a family
     29    Module * const m = b->getModule();
     30    for (Kernel * kernel : mKernels) {
     31        if (LLVM_UNLIKELY(kernel->hasFamilyName())) {
     32            PointerType * const voidPtrTy = b->getVoidPtrTy();
     33            if (LLVM_LIKELY(kernel->isStateful())) {
     34                Value * const handle = kernel->createInstance(b);
     35                args.push_back(b->CreatePointerCast(handle, voidPtrTy));
    5136            }
    52         }
    53 
    54         b->CreateCall(getInitFunction(m), args);
    55     }
     37            args.push_back(b->CreatePointerCast(kernel->getInitFunction(m), voidPtrTy));
     38            args.push_back(b->CreatePointerCast(kernel->getDoSegmentFunction(m), voidPtrTy));
     39            args.push_back(b->CreatePointerCast(kernel->getTerminateFunction(m), voidPtrTy));
     40        }
     41    }
     42
     43    b->CreateCall(getInitFunction(m), args);
     44}
     45
     46/** ------------------------------------------------------------------------------------------------------------- *
     47 * @brief addKernelDeclarations
     48 ** ------------------------------------------------------------------------------------------------------------- */
     49void PipelineKernel::addKernelDeclarations(const std::unique_ptr<KernelBuilder> & b) {
     50    for (Kernel * kernel : mKernels) {
     51        kernel->addKernelDeclarations(b);
     52    }
     53    Kernel::addKernelDeclarations(b);
    5654}
    5755
     
    6058 ** ------------------------------------------------------------------------------------------------------------- */
    6159void PipelineKernel::generateInitializeMethod(const std::unique_ptr<KernelBuilder> & b) {
    62     if (LLVM_UNLIKELY(isProxy())) {
    63         mKernels[0]->generateInitializeMethod(b);
    64     } else {
    65         // TODO: this isn't sufficient for composable PipelineKernel objects since would want to
    66         // allocate memory once during initialization but have the buffer/kernel struct visible in
    67         // the main kernel logic. This can be solved by heap allocating all structs or somehow
    68         // passing the structs via the function call but only reentrant pipelines require this
    69         // to maintain state.
    70         mCompiler->generateInitializeMethod(b);
    71     }
     60    mCompiler->generateInitializeMethod(b);
    7261}
    7362
     
    7665 ** ------------------------------------------------------------------------------------------------------------- */
    7766void PipelineKernel::generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
    78     if (LLVM_UNLIKELY(isProxy())) {
    79         mKernels[0]->generateKernelMethod(b);
    80     } else {
    81         if (mNumOfThreads == 1) {
    82             mCompiler->generateSingleThreadKernelMethod(b);
    83         } else {
    84             mCompiler->generateMultiThreadKernelMethod(b);
    85         }
    86     }
    87 }
    88 
    89 /** ------------------------------------------------------------------------------------------------------------- *
    90  * @brief finalizeInstance
    91  ** ------------------------------------------------------------------------------------------------------------- */
    92 Value * PipelineKernel::finalizeInstance(const std::unique_ptr<KernelBuilder> & b) {
    93     assert (mHandle && "was not set");
    94     if (LLVM_UNLIKELY(isProxy())) {
    95         return mKernels[0]->finalizeInstance(b);
    96     } else {
    97         Value * result = b->CreateCall(getTerminateFunction(b->getModule()), { mHandle });
    98         mHandle = nullptr;
    99         if (LLVM_LIKELY(mOutputScalars.empty())) {
    100             assert ("pipeline termination must have output scalars or a void return type!" && result->getType()->isVoidTy());
    101             result = nullptr;
    102         }
    103         return result;
    104     }
    105 }
     67    mCompiler->generateKernelMethod(b);
     68}
     69
     70///** ------------------------------------------------------------------------------------------------------------- *
     71// * @brief finalizeInstance
     72// ** ------------------------------------------------------------------------------------------------------------- */
     73//Value * PipelineKernel::finalizeInstance(const std::unique_ptr<KernelBuilder> & b) {
     74//    Value * result = b->CreateCall(getTerminateFunction(b->getModule()), { mHandle });
     75//    mHandle = nullptr;
     76//    if (LLVM_LIKELY(mOutputScalars.empty())) {
     77//        assert ("pipeline termination must have output scalars or a void return type!" && result->getType()->isVoidTy());
     78//        result = nullptr;
     79//    }
     80//    return result;
     81//}
    10682
    10783/** ------------------------------------------------------------------------------------------------------------- *
     
    10985 ** ------------------------------------------------------------------------------------------------------------- */
    11086void PipelineKernel::generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) {
    111     if (LLVM_UNLIKELY(isProxy())) {
    112         mKernels[0]->generateFinalizeMethod(b);
    113     } else {
    114         mCompiler->generateFinalizeMethod(b);
    115     }
     87    mCompiler->generateFinalizeMethod(b);
    11688}
    11789
     
    12092 ** ------------------------------------------------------------------------------------------------------------- */
    12193std::vector<Value *> PipelineKernel::getFinalOutputScalars(const std::unique_ptr<KernelBuilder> & b) {
    122     if (LLVM_UNLIKELY(isProxy())) {
    123         return mKernels[0]->getFinalOutputScalars(b);
    124     } else {
    125         return mCompiler->getFinalOutputScalars(b);
    126     }
     94    return mCompiler->getFinalOutputScalars(b);
    12795}
    12896
     
    13199 ** ------------------------------------------------------------------------------------------------------------- */
    132100void PipelineKernel::linkExternalMethods(const std::unique_ptr<KernelBuilder> & b) {
    133     if (LLVM_UNLIKELY(isProxy())) {
    134         return mKernels[0]->linkExternalMethods(b);
    135     } else {
    136         for (const auto & k : mKernels) {
    137             k->linkExternalMethods(b);
    138         }
    139         for (CallBinding & call : mCallBindings) {
    140             call.Callee = b->LinkFunction(call.Name, call.Type, call.FunctionPointer);
    141         }
     101    for (const auto & k : mKernels) {
     102        k->linkExternalMethods(b);
     103    }
     104    for (CallBinding & call : mCallBindings) {
     105        call.Callee = b->LinkFunction(call.Name, call.Type, call.FunctionPointer);
    142106    }
    143107}
     
    170134
    171135    b->setKernel(this);
     136
     137    addKernelDeclarations(b);
    172138
    173139    Module * const m = b->getModule();
     
    249215 ** ------------------------------------------------------------------------------------------------------------- */
    250216const std::string PipelineKernel::getName() const {
    251     if (LLVM_UNLIKELY(isProxy())) {
    252         return mKernels[0]->getName();
    253     }
    254217    return mKernelName;
    255218}
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_logic.hpp

    r6275 r6288  
    3838 ** ------------------------------------------------------------------------------------------------------------- */
    3939inline void PipelineCompiler::addPipelineKernelProperties(BuilderRef b) {
     40    // TODO: look into improving cache locality/false sharing of this struct
    4041    b->setKernel(mPipelineKernel);
    4142    addTerminationProperties(b);
     43    addConsumerKernelProperties(b, mPipelineInput);
    4244    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    4345        addBufferHandlesToPipelineKernel(b, i);
     
    5557inline void PipelineCompiler::addInternalKernelProperties(BuilderRef b, const unsigned kernelIndex) {
    5658    const Kernel * const kernel = mPipeline[kernelIndex];
     59    // If we've proven we do not need synchronization then we've already proven that
     60    // we can calculate the item count and num of strides from the input item counts
    5761    if (requiresSynchronization(kernelIndex)) {
    5862
     
    6064        const auto name = makeKernelName(kernelIndex);
    6165        mPipelineKernel->addInternalScalar(sizeTy, name + LOGICAL_SEGMENT_SUFFIX);
     66
     67        // TODO: if an kernel I/O stream is a pipeline I/O and the kernel processes it at the
     68        // rate the pipeline processes it, can use the local state instead of storing the
     69        // item count in the kernel.
    6270
    6371        // TODO: non deferred item count for fixed rates could be calculated from total # of segments.
     
    6977                mPipelineKernel->addInternalScalar(sizeTy, prefix + DEFERRED_ITEM_COUNT_SUFFIX);
    7078            }
    71             // If we've proven we do not need synchronization then we've already proven that
    72             // we can calculate the item count and num of strides from the input item counts
    73             mPipelineKernel->addInternalScalar(sizeTy, prefix + ITEM_COUNT_SUFFIX);
     79//            if (LLVM_UNLIKELY(onlyOne && isPipelineInput(kernelIndex, i))) {
     80//                mPipelineKernel->addLocalScalar(sizeTy, prefix + ITEM_COUNT_SUFFIX);
     81//            } else {
     82                mPipelineKernel->addInternalScalar(sizeTy, prefix + ITEM_COUNT_SUFFIX);
     83//            }
    7484        }
    7585
     
    7888            const Binding & output = kernel->getOutputStreamSetBinding(i);
    7989            const auto prefix = makeBufferName(kernelIndex, output);
     90//            if (LLVM_UNLIKELY(isPipelineOutput(kernelIndex, i))) {
     91//                mPipelineKernel->addLocalScalar(sizeTy, prefix + ITEM_COUNT_SUFFIX);
     92//            } else {
    8093                mPipelineKernel->addInternalScalar(sizeTy, prefix + ITEM_COUNT_SUFFIX);
    81         }
     94//            }
     95        }
     96    }
     97
     98    if (LLVM_LIKELY(kernel->isStateful() && !kernel->hasFamilyName())) {
     99        // if this is a family kernel, it's handle will be passed into the kernel
     100        // methods rather than stored within the pipeline state
     101        PointerType * kernelPtrTy = kernel->getKernelType()->getPointerTo(0);
     102        mPipelineKernel->addInternalScalar(kernelPtrTy, makeKernelName(kernelIndex));
    82103    }
    83104}
     
    87108 ** ------------------------------------------------------------------------------------------------------------- */
    88109void PipelineCompiler::generateInitializeMethod(BuilderRef b) {
    89     for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    90         mPipeline[i]->addKernelDeclarations(b);
    91     }
     110    mScalarCache.clear();
    92111    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    93112        Kernel * const kernel = mPipeline[i];
     
    103122        const auto hasHandle = mKernel->isStateful() ? 1U : 0U;
    104123        args.resize(hasHandle + in_degree(i, mScalarDependencyGraph));
    105         if (LLVM_LIKELY(hasHandle != 0U)) {
     124        if (LLVM_LIKELY(hasHandle)) {
    106125            args[0] = mKernel->getHandle();
    107126        }
    108127        b->setKernel(mPipelineKernel);
    109128        for (const auto ce : make_iterator_range(in_edges(i, mScalarDependencyGraph))) {
    110             const auto j = hasHandle + mScalarDependencyGraph[ce];
    111             const auto pe = in_edge(source(ce, mScalarDependencyGraph), mScalarDependencyGraph);
    112             const auto k = mScalarDependencyGraph[pe];
    113             const Binding & input = mPipelineKernel->getInputScalarBinding(k);
    114             args[j] = b->getScalarField(input.getName());
     129            const auto j = mScalarDependencyGraph[ce] + hasHandle;
     130            const auto scalar = source(ce, mScalarDependencyGraph);
     131            args[j] = getScalar(b, scalar);
    115132        }
    116133        b->setKernel(mKernel);
     
    127144
    128145        b->SetInsertPoint(kernelExit);
     146    }
     147}
     148
     149/** ------------------------------------------------------------------------------------------------------------- *
     150 * @brief generateKernelMethod
     151 ** ------------------------------------------------------------------------------------------------------------- */
     152inline void PipelineCompiler::generateKernelMethod(BuilderRef b) {
     153    mScalarCache.clear();
     154    readPipelineIOItemCounts(b);
     155    if (mPipelineKernel->getNumOfThreads() == 1) {
     156        generateSingleThreadKernelMethod(b);
     157    } else {
     158        generateMultiThreadKernelMethod(b);
    129159    }
    130160}
     
    216246    Value * const segmentOffset = setThreadState(b, threadStruct);
    217247    // generate the pipeline logic for this thread
     248    mPipelineKernel->initializeLocalScalarValues(b);
    218249    start(b, segmentOffset);
    219250    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
     
    245276 ** ------------------------------------------------------------------------------------------------------------- */
    246277void PipelineCompiler::generateFinalizeMethod(BuilderRef b) {
     278    mScalarCache.clear();
    247279    printOptionalCycleCounter(b);
    248280    std::vector<Value *> params;
     
    254286            params.push_back(mKernel->getHandle());
    255287        }
    256         mScalarDependencyGraph[i] = b->CreateCall(getFinalizeFunction(b), params);
     288        Value * const result = b->CreateCall(getFinalizeFunction(b), params);
     289        mScalarCache.emplace(i, result);
    257290    }
    258291    releaseBuffers(b);
     
    263296 ** ------------------------------------------------------------------------------------------------------------- */
    264297std::vector<Value *> PipelineCompiler::getFinalOutputScalars(BuilderRef b) {
    265 
    266298    const auto & calls = mPipelineKernel->getCallBindings();
    267299    const auto numOfCalls = calls.size();
    268300    std::vector<Value *> args;
    269301    b->setKernel(mPipelineKernel);
    270     const auto pipelineOutput = mLastKernel;
    271     const auto firstCall = pipelineOutput + 1;
     302    const auto firstCall = mPipelineOutput + 1;
    272303    for (unsigned k = 0; k < numOfCalls; ++k) {
    273304        writeOutputScalars(b, firstCall + k, args);
     
    279310        }
    280311        assert (i == f->arg_end());
    281         b->CreateCall(f, args);
    282     }
    283     writeOutputScalars(b, pipelineOutput, args);
     312        Value * const result = b->CreateCall(f, args);
     313        mScalarCache.emplace(firstCall + k, result);
     314    }
     315    writeOutputScalars(b, mPipelineOutput, args);
    284316    return args;
    285317}
     
    291323    const auto n = in_degree(index, mScalarDependencyGraph);
    292324    args.resize(n);
    293     const auto pipelineInput = 0;
    294325    for (const auto e : make_iterator_range(in_edges(index, mScalarDependencyGraph))) {
    295326        const auto scalar = source(e, mScalarDependencyGraph);
    296         // If we have not already retrieved the specific scalar, construct/load/extract it.
    297         if (LLVM_LIKELY(mScalarDependencyGraph[scalar] == nullptr)) {
    298             const auto producer = in_edge(scalar, mScalarDependencyGraph);
    299             const auto i = source(producer, mScalarDependencyGraph);
    300             const auto j = mScalarDependencyGraph[producer];
    301             Value * value = nullptr;
    302             if (LLVM_UNLIKELY(i == pipelineInput)) {
    303                 const Binding & input = mPipelineKernel->getInputScalarBinding(j);
    304                 const Relationship * const rel = getRelationship(input);
    305                 if (LLVM_UNLIKELY(isa<ScalarConstant>(rel))) {
    306                     value = cast<ScalarConstant>(rel)->value();
    307                 } else {
    308                     value = b->getScalarField(input.getName());
    309                 }
    310             } else { // output scalar of some kernel
    311                 Value * const outputScalars = mScalarDependencyGraph[i]; assert (outputScalars);
    312                 if (outputScalars->getType()->isAggregateType()) {
    313                     value = b->CreateExtractValue(outputScalars, {j});
    314                 } else { assert (j == 0 && "scalar type is not an aggregate");
    315                     value = outputScalars;
    316                 }
    317             }
    318             mScalarDependencyGraph[scalar] = value;
    319         }
    320327        const auto k = mScalarDependencyGraph[e];
    321         args[k] = mScalarDependencyGraph[scalar];
     328        args[k] = getScalar(b, scalar);
    322329    }
    323330}
     
    403410}
    404411
     412/** ------------------------------------------------------------------------------------------------------------- *
     413 * @brief getScalar
     414 ** ------------------------------------------------------------------------------------------------------------- */
     415Value * PipelineCompiler::getScalar(BuilderRef b, const ScalarDependencyGraph::vertex_descriptor scalar) {
     416    const auto f = mScalarCache.find(scalar);
     417    if (LLVM_UNLIKELY(f != mScalarCache.end())) {
     418        return f->second;
     419    }
     420    const auto producer = in_edge(scalar, mScalarDependencyGraph);
     421    const auto i = source(producer, mScalarDependencyGraph);
     422    const auto j = mScalarDependencyGraph[producer];
     423    Value * value = nullptr;
     424    if (i == mPipelineInput) {
     425        if (LLVM_UNLIKELY(j == -1U)) {
     426            value = mScalarDependencyGraph[scalar]->value();
     427        } else {
     428            const Binding & input = mPipelineKernel->getInputScalarBinding(j);
     429            value = b->getScalarField(input.getName());
     430        }
     431    } else { // output scalar of some kernel
     432        Value * const outputScalars = getScalar(b, i);
     433        if (LLVM_UNLIKELY(outputScalars == nullptr)) {
     434            report_fatal_error("Internal error: pipeline is unable to locate valid output scalar");
     435        }
     436        if (outputScalars->getType()->isAggregateType()) {
     437            value = b->CreateExtractValue(outputScalars, {j});
     438        } else { assert (j == 0 && "scalar type is not an aggregate");
     439            value = outputScalars;
     440        }
     441    }
     442    assert (value);
     443    mScalarCache.emplace(scalar, value);
     444    return value;
     445}
    405446
    406447enum : unsigned {
    407     HANDLE_INDEX = 0
    408     , SEGMENT_OFFSET_INDEX = 1
    409     , LOCAL_STATE_INDEX = 2
    410     , FIRST_INPUT_STREAM_INDEX = 3
     448    SEGMENT_OFFSET_INDEX = 0
     449    , LOCAL_STATE_INDEX = 1
     450    , SHARED_STATE_INDEX = 2
    411451};
    412452
     
    415455 ** ------------------------------------------------------------------------------------------------------------- */
    416456inline StructType * PipelineCompiler::getThreadStateType(BuilderRef b) {
    417     std::vector<Type *> threadStructFields;
    418     Type * const handleType = mPipelineKernel->getHandle()->getType();
    419     threadStructFields.push_back(handleType);
    420     threadStructFields.push_back(b->getSizeTy());
    421     threadStructFields.push_back(getLocalStateType(b));
    422     const auto numOfInputs = mPipelineKernel->getNumOfStreamInputs();
    423     for (unsigned i = 0; i < numOfInputs; ++i) {
    424         auto buffer = mPipelineKernel->getInputStreamSetBuffer(i);
    425         Value * const handle = buffer->getHandle();
    426         threadStructFields.push_back(handle->getType());
    427     }
    428     const auto numOfOutputs = mPipelineKernel->getNumOfStreamOutputs();
    429     for (unsigned i = 0; i < numOfOutputs; ++i) {
    430         auto buffer = mPipelineKernel->getOutputStreamSetBuffer(i);
    431         Value * const handle = buffer->getHandle();
    432         threadStructFields.push_back(handle->getType());
    433     }
    434     return StructType::get(b->getContext(), threadStructFields);
     457    std::vector<Type *> fields(3);
     458    fields[SEGMENT_OFFSET_INDEX] = b->getSizeTy(); // segment offset
     459    fields[LOCAL_STATE_INDEX] = getLocalStateType(b);
     460    LLVMContext & C = b->getContext();
     461    fields[SHARED_STATE_INDEX] = StructType::get(C, mPipelineKernel->getDoSegmentFields(b));
     462    return StructType::get(C, fields);
    435463}
    436464
     
    439467 ** ------------------------------------------------------------------------------------------------------------- */
    440468inline Value * PipelineCompiler::allocateThreadState(BuilderRef b, const unsigned segOffset) {
    441 
    442469    StructType * const threadStructType = getThreadStateType(b);
    443470    Value * const threadState = makeStateObject(b, threadStructType);
    444 
    445471    std::vector<Value *> indices(2);
    446472    indices[0] = b->getInt32(0);
    447     indices[1] = b->getInt32(HANDLE_INDEX);
    448     Value * const handle = mPipelineKernel->getHandle();
    449     b->CreateStore(handle, b->CreateGEP(threadState, indices));
    450473    indices[1] = b->getInt32(SEGMENT_OFFSET_INDEX);
    451474    b->CreateStore(b->getSize(segOffset), b->CreateGEP(threadState, indices));
    452475    indices[1] = b->getInt32(LOCAL_STATE_INDEX);
    453476    allocateThreadLocalState(b, b->CreateGEP(threadState, indices));
    454 
    455     const auto numOfInputs = mPipelineKernel->getNumOfStreamInputs();
    456     for (unsigned i = 0; i < numOfInputs; ++i) {
    457         const auto buffer = mPipelineKernel->getInputStreamSetBuffer(i);
    458         Value * const handle = buffer->getHandle();
    459         indices[1] = b->getInt32(FIRST_INPUT_STREAM_INDEX + i);
    460         b->CreateStore(handle, b->CreateGEP(threadState, indices));
    461     }
    462 
    463     const auto FIRST_OUTPUT_STREAM_INDEX = FIRST_INPUT_STREAM_INDEX + numOfInputs;
    464     const auto numOfOutputs = mPipelineKernel->getNumOfStreamOutputs();
    465     for (unsigned i = 0; i < numOfOutputs; ++i) {
    466         const auto buffer = mPipelineKernel->getOutputStreamSetBuffer(i);
    467         Value * const handle = buffer->getHandle();
    468         indices[1] = b->getInt32(FIRST_OUTPUT_STREAM_INDEX + i);
    469         b->CreateStore(handle, b->CreateGEP(threadState, indices));
    470     }
    471 
     477    const auto props = mPipelineKernel->getDoSegmentProperties(b);
     478    indices[1] = b->getInt32(SHARED_STATE_INDEX);
     479    indices.push_back(nullptr);
     480    const auto n = props.size();
     481    assert (threadStructType->getStructElementType(SHARED_STATE_INDEX)->getStructNumElements() == n);
     482    for (unsigned i = 0; i < n; ++i) {
     483        indices[2] = b->getInt32(i);
     484        b->CreateStore(props[i], b->CreateGEP(threadState, indices));
     485    }
    472486    return threadState;
    473487}
     
    477491 ** ------------------------------------------------------------------------------------------------------------- */
    478492inline Value * PipelineCompiler::setThreadState(BuilderRef b, Value * threadState) {
    479 
    480493    std::vector<Value *> indices(2);
    481494    indices[0] = b->getInt32(0);
    482     indices[1] = b->getInt32(HANDLE_INDEX);
    483 
    484     Value * handle = b->CreateLoad(b->CreateGEP(threadState, indices));
    485     mPipelineKernel->setHandle(b, handle);
    486 
    487495    indices[1] = b->getInt32(SEGMENT_OFFSET_INDEX);
    488496    Value * const segmentOffset = b->CreateLoad(b->CreateGEP(threadState, indices));
    489 
    490497    indices[1] = b->getInt32(LOCAL_STATE_INDEX);
    491498    setThreadLocalState(b, b->CreateGEP(threadState, indices));
    492 
    493     const auto numOfInputs = mPipelineKernel->getNumOfStreamInputs();
    494     for (unsigned i = 0; i < numOfInputs; ++i) {
    495         indices[1] = b->getInt32(FIRST_INPUT_STREAM_INDEX + i);
    496         Value * streamHandle = b->CreateLoad(b->CreateGEP(threadState, indices));
    497         auto buffer = mPipelineKernel->getInputStreamSetBuffer(i);
    498         buffer->setHandle(b, streamHandle);
    499     }
    500     const auto FIRST_OUTPUT_STREAM_INDEX = FIRST_INPUT_STREAM_INDEX + numOfInputs;
    501     const auto numOfOutputs = mPipelineKernel->getNumOfStreamOutputs();
    502     for (unsigned i = 0; i < numOfOutputs; ++i) {
    503         indices[1] = b->getInt32(FIRST_OUTPUT_STREAM_INDEX + i);
    504         Value * streamHandle = b->CreateLoad(b->CreateGEP(threadState, indices));
    505         auto buffer = mPipelineKernel->getOutputStreamSetBuffer(i);
    506         buffer->setHandle(b, streamHandle);
    507     }
    508 
     499    indices[1] = b->getInt32(SHARED_STATE_INDEX);
     500    indices.push_back(nullptr);
     501    Type * const kernelStructType = threadState->getType()->getPointerElementType();
     502    const auto n = kernelStructType->getStructElementType(SHARED_STATE_INDEX)->getStructNumElements();
     503    std::vector<Value *> args(n);
     504    args.reserve(n);
     505    for (unsigned i = 0; i < n; ++i) {
     506        indices[2] = b->getInt32(i);
     507        args[i] = b->CreateLoad(b->CreateGEP(threadState, indices));
     508    }
     509    mPipelineKernel->setDoSegmentProperties(b, args);
    509510    return segmentOffset;
    510511}
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/popcount_logic.hpp

    r6273 r6288  
    2828    forEachOutputBufferThatIsAPopCountReference(mKernelIndex, [&](const unsigned bufferVertex) {
    2929
    30         const auto bufferPort = mBufferGraph[in_edge(bufferVertex, mBufferGraph)].Port;
     30        const auto producerLink = in_edge(bufferVertex, mBufferGraph);
     31        const auto bufferPort = mBufferGraph[producerLink].outputPort();
    3132        const Binding & output = mKernel->getOutputStreamSetBinding(bufferPort);
    3233
     
    353354inline Value * PipelineCompiler::getPopCountNextBaseOffset(BuilderRef b, const unsigned bufferVertex) const {
    354355    const auto e = in_edge(bufferVertex, mBufferGraph);
    355     const auto outputPort = mBufferGraph[e].Port;
     356    const auto outputPort = mBufferGraph[e].outputPort();
    356357    const PopCountData & pc = getPopCountData(bufferVertex);
    357358    if (pc.UsesConsumedCount) {
     
    443444        if (LLVM_UNLIKELY(!pc.UsesConsumedCount)) {
    444445            const auto e = in_edge(bufferVertex, mBufferGraph);
    445             const auto port = mBufferGraph[e].Port;
     446            const auto port = mBufferGraph[e].outputPort();
    446447            const Binding & output = mPipeline[index]->getOutputStreamSetBinding(port);
    447448            const auto bufferName = makeBufferName(index, output);
     
    671672    // we can still use it.
    672673    for (const auto e : make_iterator_range(out_edges(bufferVertex, mBufferGraph))) {
    673         const auto port = mBufferGraph[e].Port;
     674        const auto port = mBufferGraph[e].inputPort();
    674675        const auto kernelVertex = target(e, mBufferGraph);
    675676        Kernel * const consumer = mPipeline[kernelVertex];
     
    737738        const auto bufferVertex = source(e, mBufferGraph);
    738739        if (LLVM_UNLIKELY(in_degree(bufferVertex, mPopCountGraph) != 0)) {
    739             func(bufferVertex, mBufferGraph[e].Port);
    740         }
    741     }
    742 }
    743 
    744 }
     740            func(bufferVertex, mBufferGraph[e].inputPort());
     741        }
     742    }
     743}
     744
     745}
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/termination_logic.hpp

    r6272 r6288  
    2323
    2424    const auto numOfCalls = mPipelineKernel->getCallBindings().size();
    25     const auto pipelineInput = 0;
    26     const auto pipelineOutput = mLastKernel;
    27     const auto firstCall = pipelineOutput + 1;
     25    const auto firstCall = mPipelineOutput + 1;
    2826    const auto lastCall = firstCall + numOfCalls;
    29     const auto n = pipelineOutput + 1;
     27    const auto n = mPipelineOutput + 1;
    3028
    3129    // TODO: if the lower bound of an input is 0 or a the input is zero-extended,
     
    4644
    4745    // 2) copy and summarize any output scalars of the pipeline or any calls
    48     for (unsigned i = pipelineOutput; i < lastCall; ++i) {
     46    for (unsigned i = mPipelineOutput; i < lastCall; ++i) {
    4947        for (auto relationship : make_iterator_range(in_edges(i, mScalarDependencyGraph))) {
    5048            const auto relationshipVertex = source(relationship, mScalarDependencyGraph);
    5149            for (auto producer : make_iterator_range(in_edges(relationshipVertex, mScalarDependencyGraph))) {
    5250                const auto kernel = source(producer, mScalarDependencyGraph);
    53                 assert ("cannot occur" && kernel != pipelineOutput);
    54                 add_edge(kernel, pipelineOutput, G);
     51                assert ("cannot occur" && kernel != mPipelineOutput);
     52                add_edge(kernel, mPipelineOutput, G);
    5553            }
    5654        }
     
    6563        }
    6664        if (LLVM_UNLIKELY(kernel->hasAttribute(AttrId::SideEffecting))) {
    67             add_edge(i, pipelineOutput, G);
    68         }
    69     }
    70 
    71 
    72 
     65            add_edge(i, mPipelineOutput, G);
     66        }
     67    }
    7368
    7469    // generate a transitive closure
     
    8883    // then take the transitive reduction
    8984    dynamic_bitset<> sources(n, false);
    90     for (unsigned u = pipelineOutput; u--; ) {
     85    for (unsigned u = mPipelineOutput; u--; ) {
    9186        for (auto e : make_iterator_range(in_edges(u, G))) {
    9287            sources.set(source(e, G), true);
     
    156151        for (auto e : make_iterator_range(in_edges(i, G))) {
    157152            const auto j = source(e, G);
    158             if (LLVM_UNLIKELY(j == pipelineInput)) continue;
     153            if (LLVM_UNLIKELY(j == mPipelineInput)) continue;
    159154            add_flow_edge(firstOut + j, firstIn + i);
    160155        }
     
    195190            if (j < mLastKernel) {
    196191                G[j] = pathCount;
    197                 add_edge(j, pipelineInput, ++k, G);
     192                add_edge(j, mPipelineInput, ++k, G);
    198193            }
    199194            ++pathCount;
     
    204199    // simultaneously in the constructor.
    205200    mTerminationSignals.resize(pathCount, nullptr);
     201
     202    assert ("a pipeline with no sinks ought to produce no observable data"
     203            && in_degree(mPipelineOutput, G) > 0);
     204    assert ("termination graph construction error?"
     205            && out_degree(mPipelineOutput, G) == 0);
    206206
    207207    return G;
     
    283283
    284284/** ------------------------------------------------------------------------------------------------------------- *
    285  * @brief pipelineTerminated
    286  ** ------------------------------------------------------------------------------------------------------------- */
    287 inline Value * PipelineCompiler::pipelineTerminated(BuilderRef b) const {
    288     const auto pipelineOutput = mLastKernel;
    289     assert ("a pipeline with no sinks ought to produce no observable data"
    290             && in_degree(pipelineOutput, mTerminationGraph) > 0);
    291     assert ("termination graph construction error?"
    292             && out_degree(pipelineOutput, mTerminationGraph) == 0);
    293     Value * terminated = b->getTrue();
    294     // check whether every sink has terminated
    295     for (const auto e : make_iterator_range(in_edges(pipelineOutput, mTerminationGraph))) {
    296         const auto kernel = source(e, mTerminationGraph);
    297         terminated = b->CreateAnd(terminated, hasKernelTerminated(b, kernel));
    298     }
    299     return terminated;
    300 }
    301 
    302 /** ------------------------------------------------------------------------------------------------------------- *
    303285 * @brief setTerminated
    304286 ** ------------------------------------------------------------------------------------------------------------- */
  • icGREP/icgrep-devel/icgrep/kernels/pipeline_builder.h

    r6261 r6288  
    6767                    Bindings && scalar_inputs, Bindings && scalar_outputs,
    6868                    const unsigned numOfThreads = 1);
    69    
     69
    7070    virtual ~PipelineBuilder() {}
    7171
    7272protected:
     73
     74
     75    // Internal pipeline constructor uses a zero-length tag struct to prevent
     76    // overloading errors. This paramater will be dropped by the compiler.
     77    struct Internal {};
     78    PipelineBuilder(Internal, BaseDriver & driver,
     79                    Bindings stream_inputs, Bindings stream_outputs,
     80                    Bindings scalar_inputs, Bindings scalar_outputs,
     81                    const unsigned numOfThreads = 1);
    7382
    7483    virtual Kernel * makeKernel();
     
    7786
    7887    void addInputScalar(llvm::Type * type, std::string name);
    79 
    80     llvm::Function * addOrDeclareMainFunction(PipelineKernel * const pk);
    8188
    8289protected:
     
    124131public:
    125132
    126     const std::unique_ptr<PipelineBuilder> & getTrueBranch() const {
    127         return mTrueBranch;
     133    const std::unique_ptr<PipelineBuilder> & getNonZeroBranch() const {
     134        return mNonZeroBranch;
    128135    }
    129136
    130     const std::unique_ptr<PipelineBuilder> & getFalseBranch() const {
    131         return mFalseBranch;
     137    const std::unique_ptr<PipelineBuilder> & getAllZeroBranch() const {
     138        return mAllZeroBranch;
    132139    }
    133140
     
    144151private:
    145152    Relationship * const             mCondition;
    146     std::unique_ptr<PipelineBuilder> mTrueBranch;
    147     std::unique_ptr<PipelineBuilder> mFalseBranch;
     153    std::unique_ptr<PipelineBuilder> mNonZeroBranch;
     154    std::unique_ptr<PipelineBuilder> mAllZeroBranch;
    148155};
    149156
  • icGREP/icgrep-devel/icgrep/kernels/pipeline_kernel.h

    r6261 r6288  
    7575    virtual ~PipelineKernel();
    7676
    77     bool isProxy() const {
    78         return mKernels.size() == 1;
    79     }
     77    enum MainMethodGenerationType {
     78        AddInternal
     79        , DeclareExternal
     80        , AddExternal
     81    };
     82
     83    llvm::Function * addOrDeclareMainFunction(const std::unique_ptr<kernel::KernelBuilder> & b, const MainMethodGenerationType method);
     84
     85    LLVM_READNONE bool hasStaticMain() const;
    8086
    8187protected:
     
    9399    void linkExternalMethods(const std::unique_ptr<KernelBuilder> & b) final;
    94100
     101    void addKernelDeclarations(const std::unique_ptr<KernelBuilder> & b) final;
     102
    95103    void generateInitializeMethod(const std::unique_ptr<KernelBuilder> & b) final;
    96104
     
    102110
    103111    void addAdditionalFunctions(const std::unique_ptr<KernelBuilder> & b) final;
    104 
    105     llvm::Value * finalizeInstance(const std::unique_ptr<KernelBuilder> & b) final;
    106112
    107113    void addInternalKernelProperties(const std::unique_ptr<kernel::KernelBuilder> & b) final;
     
    117123    std::vector<llvm::Value *> getFinalOutputScalars(const std::unique_ptr<KernelBuilder> & b) final;
    118124
    119     enum MainMethodGenerationType {
    120         AddInternal
    121         , DeclareExternal
    122         , AddExternal
    123     };
    124 
    125     llvm::Function * addOrDeclareMainFunction(const std::unique_ptr<kernel::KernelBuilder> & b, const MainMethodGenerationType method);
    126 
    127     LLVM_READNONE bool hasStaticMain() const;
    128 
    129125protected:
    130126
    131     std::unique_ptr<PipelineCompiler> mCompiler;
    132     const unsigned                     mNumOfThreads;
    133     const Kernels                      mKernels;
    134     CallBindings                       mCallBindings;
    135     const std::string                  mSignature;
     127    mutable std::unique_ptr<PipelineCompiler> mCompiler;
     128    const unsigned                            mNumOfThreads;
     129    const Kernels                             mKernels;
     130    CallBindings                              mCallBindings;
     131    const std::string                         mSignature;
    136132};
    137133
  • icGREP/icgrep-devel/icgrep/kernels/zeroextend.cpp

    r6286 r6288  
    5353
    5454    const auto inputVectorSize = (blockWidth / inputFieldWidth); assert (is_power_2(inputVectorSize));
    55     const auto outputVectorWidth = (blockWidth / outputFieldWidth); assert (is_power_2(outputVectorWidth));
     55    const auto outputVectorSize = (blockWidth / outputFieldWidth); assert (is_power_2(outputVectorSize));
    5656
    5757    IntegerType * const sizeTy = b->getSizeTy();
     
    6262    PointerType * const inputPtrTy = inputTy->getPointerTo();
    6363
     64    VectorType * const outputTy = VectorType::get(b->getIntNTy(outputFieldWidth), outputVectorSize);
     65    PointerType * const outputPtrTy = outputTy->getPointerTo();
     66
    6467    Value * const processed = b->getProcessedItemCount(input.getName());
    65     Value * const baseInputPtr = b->getRawInputPointer(input.getName(), processed);
     68    Value * const baseInputPtr = b->CreatePointerCast(b->getRawInputPointer(input.getName(), processed), inputPtrTy);
    6669
    6770    Value * const produced = b->getProducedItemCount(output.getName());
    68     Value * const baseOutputPtr = b->getRawOutputPointer(output.getName(), produced);
     71    Value * const baseOutputPtr = b->CreatePointerCast(b->getRawOutputPointer(output.getName(), produced), outputPtrTy);
    6972
    7073    BasicBlock * const entry = b->GetInsertBlock();
     
    7780
    7881    std::vector<Value *> inputBuffer(inputFieldWidth);
    79     Constant * const log2InputSize = b->getSize(std::log2<unsigned>(inputFieldWidth));
    80 
    8182    // read the values from the input stream
     83    Value * const baseInputOffset = b->CreateMul(index, b->getSize(inputFieldWidth));
    8284    for (unsigned i = 0; i < inputFieldWidth; ++i) {
    83         Value * offset = b->CreateShl(index, log2InputSize);
    84         offset = b->CreateAdd(offset, b->getSize(i * inputVectorSize));
    85         Value * ptr = b->CreateGEP(baseInputPtr, offset);
    86         ptr = b->CreatePointerCast(ptr, inputPtrTy);
     85        Value * const offset = b->CreateAdd(baseInputOffset, b->getSize(i));
     86        Value * const ptr = b->CreateGEP(baseInputPtr, offset);
    8787        inputBuffer[i] = b->CreateAlignedLoad(ptr, (inputFieldWidth / CHAR_BIT));
    8888    }
     
    136136
    137137    // write the values to the output stream
    138     VectorType * const outputTy = cast<VectorType>(outputBuffer[0]->getType());
    139     PointerType * const outputPtrTy = outputTy->getPointerTo();
    140     Constant * const log2OutputSize = b->getSize(std::log2<unsigned>(outputFieldWidth));
     138    Value * const baseOutputOffset = b->CreateMul(index, b->getSize(outputFieldWidth));
    141139    for (unsigned i = 0; i < outputFieldWidth; ++i) {
    142         Value * offset = b->CreateShl(index, log2OutputSize);
    143         offset = b->CreateAdd(offset, b->getSize(i * outputVectorWidth));
    144         Value * ptr = b->CreateGEP(baseOutputPtr, offset);
    145         ptr = b->CreatePointerCast(ptr, outputPtrTy);
     140        Value * const offset = b->CreateAdd(baseOutputOffset, b->getSize(i));
     141        Value * const ptr = b->CreateGEP(baseOutputPtr, offset);
    146142        b->CreateAlignedStore(outputBuffer[i], ptr, (outputFieldWidth / CHAR_BIT));
    147143    }
  • icGREP/icgrep-devel/icgrep/toolchain/NVPTXDriver.h

    r6184 r6288  
    99
    1010#include <toolchain/driver.h>
    11 #include <toolchain/object_cache_manager.h>
    1211
    1312class NVPTXDriver final : public BaseDriver {
     
    2221    void generateUncachedKernels() { }
    2322
    24     void * finalizeObject(llvm::Function * mainMethod) override;
     23    void * finalizeObject(kernel::PipelineKernel * pipeline) override;
    2524
    2625    bool hasExternalFunction(const llvm::StringRef /* functionName */) const override { return false; }
  • icGREP/icgrep-devel/icgrep/toolchain/cpudriver.cpp

    r6275 r6288  
    1111#include <llvm/InitializePasses.h>                 // for initializeCodeGencd .
    1212#include <llvm/PassRegistry.h>                     // for PassRegistry
    13 #include <llvm/Support/raw_ostream.h>              // for errs()
    1413#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
    1514#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
     
    6766, mEngine(nullptr)
    6867#endif
    69 , mPassManager(nullptr)
    70 , mUnoptimizedIROutputStream(nullptr)
    71 , mIROutputStream(nullptr)
    72 , mASMOutputStream(nullptr) {
     68, mPassManager{}
     69, mUnoptimizedIROutputStream{}
     70, mIROutputStream{}
     71, mASMOutputStream{} {
    7372
    7473    InitializeNativeTarget();
    7574    InitializeNativeTargetAsmPrinter();
    76 //    InitializeNativeTargetAsmParser();
    7775    llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr);
    78 
    7976
    8077    #ifdef ORCJIT
     
    138135Function * CPUDriver::addLinkFunction(Module * mod, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
    139136    if (LLVM_UNLIKELY(mod == nullptr)) {
    140         report_fatal_error("addLinkFunction(" + name + ") cannot be called until after addKernelCall or makeKernelCall");
     137        report_fatal_error("addLinkFunction(" + name + ") cannot be called until after addKernel");
    141138    }
    142139    Function * f = mod->getFunction(name);
     
    248245}
    249246
    250 void * CPUDriver::finalizeObject(llvm::Function * mainMethod) {
     247void * CPUDriver::finalizeObject(PipelineKernel * const pipeline) {
    251248
    252249    #ifdef ORCJIT
     
    277274
    278275    iBuilder->setModule(mMainModule);
     276    // write/declare the "main" method
     277    const auto method = pipeline->hasStaticMain() ? PipelineKernel::DeclareExternal : PipelineKernel::AddInternal;
     278    Function * const main = pipeline->addOrDeclareMainFunction(iBuilder, method);
     279
    279280    #ifdef ORCJIT
    280281    std::vector<std::unique_ptr<Module>> moduleSet;
     
    285286            report_fatal_error(kernel->getName() + " was neither loaded from cache nor generated prior to finalizeObject");
    286287        }
     288        kernel->addKernelDeclarations(iBuilder);
    287289        #ifndef ORCJIT
    288290        mEngine->addModule(std::unique_ptr<Module>(kernel->getModule()));
     
    299301    mCompileLayer->addModuleSet(std::move(moduleSet), make_unique<SectionMemoryManager>(), std::move(Resolver));
    300302    #endif
    301 
    302303    // return the compiled main method
    303304    #ifndef ORCJIT
    304     return mEngine->getPointerToFunction(mainMethod);
    305     #else
    306     auto MainSym = mCompileLayer->findSymbol(getMangledName(mMainMethod->getName()), false);
     305    return mEngine->getPointerToFunction(main);
     306    #else
     307    auto MainSym = mCompileLayer->findSymbol(getMangledName(main->getName()), false);
    307308    assert (MainSym && "Main not found");
    308309    return (void *)MainSym.getAddress();
  • icGREP/icgrep-devel/icgrep/toolchain/cpudriver.h

    r6275 r6288  
    4444    void generateUncachedKernels() override;
    4545
    46     void * finalizeObject(llvm::Function * mainMethod) override;
     46    void * finalizeObject(kernel::PipelineKernel * const pipeline) override;
    4747
    4848    bool hasExternalFunction(const llvm::StringRef functionName) const override;
  • icGREP/icgrep-devel/icgrep/toolchain/driver.cpp

    r6253 r6288  
    5353 ** ------------------------------------------------------------------------------------------------------------- */
    5454void BaseDriver::addKernel(Kernel * const kernel) {
     55    kernel->initializeBindings(*this);
    5556    if (ParabixObjectCache::checkForCachedKernel(iBuilder, kernel)) {
    5657        assert (kernel->getModule());
  • icGREP/icgrep-devel/icgrep/toolchain/driver.h

    r6253 r6288  
    5050    virtual void generateUncachedKernels() = 0;
    5151
    52     virtual void * finalizeObject(llvm::Function * mainMethod) = 0;
     52    virtual void * finalizeObject(kernel::PipelineKernel * pipeline) = 0;
    5353
    5454    virtual ~BaseDriver();
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r6246 r6288  
    55 */
    66
    7 #include <IR_Gen/idisa_target.h>                   // for GetIDISA_Builder
     7
     8#include <IR_Gen/idisa_target.h>
     9
    810#include <cc/alphabet.h>
    9 #include <cc/cc_compiler.h>                        // for CC_Compiler
     11#include <cc/cc_compiler.h>
     12#include <cc/cc_kernel.h>
     13#include <kernels/deletion.h>
     14#include <kernels/kernel_builder.h>
     15#include <kernels/p2s_kernel.h>
    1016#include <kernels/pipeline_builder.h>
    11 #include <kernels/deletion.h>                      // for DeletionKernel
    12 #include <kernels/swizzle.h>                      // for DeletionKernel
     17#include <kernels/s2p_kernel.h>
    1318#include <kernels/source_kernel.h>
    14 #include <kernels/p2s_kernel.h>                    // for P2S16KernelWithCom...
    15 #include <kernels/s2p_kernel.h>                    // for S2PKernel
    16 #include <kernels/stdout_kernel.h>                 // for StdOutKernel_
    17 #include <llvm/ExecutionEngine/ExecutionEngine.h>  // for ExecutionEngine
    18 #include <llvm/IR/Function.h>                      // for Function, Function...
    19 #include <llvm/IR/Module.h>                        // for Module
    20 #include <llvm/IR/Verifier.h>                      // for verifyModule
    21 #include <llvm/Support/CommandLine.h>              // for ParseCommandLineOp...
    22 #include <llvm/Support/Debug.h>                    // for dbgs
    23 #include <pablo/pablo_kernel.h>                    // for PabloKernel
    24 #include <pablo/pablo_toolchain.h>                 // for pablo_function_passes
    25 #include <kernels/kernel_builder.h>
     19#include <kernels/stdout_kernel.h>
     20#include <kernels/swizzle.h>
     21#include <kernels/zeroextend.h>
     22#include <pablo/builder.hpp>
     23#include <pablo/pablo_kernel.h>
     24#include <pablo/pablo_toolchain.h>
    2625#include <pablo/pe_zeroes.h>
     26#include <toolchain/cpudriver.h>
    2727#include <toolchain/toolchain.h>
    28 #include <toolchain/cpudriver.h>
    29 #include <kernels/streamset.h>
    30 #include <llvm/ADT/StringRef.h>
    31 #include <llvm/IR/CallingConv.h>
    32 #include <llvm/IR/DerivedTypes.h>
    33 #include <llvm/IR/LLVMContext.h>
    34 #include <llvm/IR/Value.h>
    35 #include <llvm/Support/Compiler.h>
    36 #include <pablo/builder.hpp>
    37 #include <boost/interprocess/anonymous_shared_memory.hpp>
    38 #include <boost/interprocess/mapped_region.hpp>
     28
     29#include <sys/stat.h>
     30#include <fcntl.h>
    3931#include <iostream>
    4032
     
    4335using namespace llvm;
    4436using namespace codegen;
     37using namespace re;
    4538
    4639static cl::OptionCategory u8u16Options("u8u16 Options", "Transcoding control options.");
     
    5043static cl::opt<bool> mMapBuffering("mmap-buffering", cl::desc("Enable mmap buffering."), cl::cat(u8u16Options));
    5144static cl::opt<bool> memAlignBuffering("memalign-buffering", cl::desc("Enable posix_memalign buffering."), cl::cat(u8u16Options));
     45
     46static cl::opt<bool> BranchingMode("branch", cl::desc("Use Experimental branching pipeline mode"), cl::cat(u8u16Options));
    5247
    5348inline bool useAVX2() {
     
    9691
    9792    // The logic for processing non-ASCII bytes will be embedded within an if-hierarchy.
    98     PabloAST * nonASCII = ccc.compileCC(re::makeByte(0x80, 0xFF));
     93    PabloAST * nonASCII = ccc.compileCC(makeByte(0x80, 0xFF));
    9994
    10095    // Builder for the if statement handling all non-ASCII logic
     
    109104
    110105    // Entry condition for 3 or 4 byte sequences: we have a prefix byte in the range 0xE0-0xFF.
    111     PabloAST * pfx34 = ccc.compileCC(re::makeByte(0xE0, 0xFF), nAb);
     106    PabloAST * pfx34 = ccc.compileCC(makeByte(0xE0, 0xFF), nAb);
    112107    // Builder for the if statement handling all logic for 3- and 4-byte sequences.
    113108    auto p34b = nAb.createScope();
     
    128123    //
    129124    // Entry condition  or 4 byte sequences: we have a prefix byte in the range 0xF0-0xFF.
    130     PabloAST * pfx4 = ccc.compileCC(re::makeByte(0xF0, 0xFF), p34b);
     125    PabloAST * pfx4 = ccc.compileCC(makeByte(0xF0, 0xFF), p34b);
    131126    // Builder for the if statement handling all logic for 4-byte sequences only.
    132127    auto p4b = p34b.createScope();
    133128    // Illegal 4-byte sequences
    134     PabloAST * F0 = ccc.compileCC(re::makeByte(0xF0), p4b);
    135     PabloAST * F4 = ccc.compileCC(re::makeByte(0xF4), p4b);
    136     PabloAST * F0_err = p4b.createAnd(p4b.createAdvance(F0, 1), ccc.compileCC(re::makeByte(0x80, 0x8F), p4b));
    137     PabloAST * F4_err = p4b.createAnd(p4b.createAdvance(F4, 1), ccc.compileCC(re::makeByte(0x90, 0xBF), p4b));
    138     PabloAST * F5_FF = ccc.compileCC(re::makeByte(0xF5, 0xFF), p4b);
     129    PabloAST * F0 = ccc.compileCC(makeByte(0xF0), p4b);
     130    PabloAST * F4 = ccc.compileCC(makeByte(0xF4), p4b);
     131    PabloAST * F0_err = p4b.createAnd(p4b.createAdvance(F0, 1), ccc.compileCC(makeByte(0x80, 0x8F), p4b));
     132    PabloAST * F4_err = p4b.createAnd(p4b.createAdvance(F4, 1), ccc.compileCC(makeByte(0x90, 0xBF), p4b));
     133    PabloAST * F5_FF = ccc.compileCC(makeByte(0xF5, 0xFF), p4b);
    139134
    140135    Var * FX_err = p34b.createVar("FX_err", zeroes);
     
    185180    // Combined logic for 3 and 4 byte sequences
    186181    //
    187     PabloAST * pfx3 = ccc.compileCC(re::makeByte(0xE0, 0xEF), p34b);
     182    PabloAST * pfx3 = ccc.compileCC(makeByte(0xE0, 0xEF), p34b);
    188183
    189184    p34b.createAssign(u8scope32, p34b.createAdvance(pfx3, 1));
     
    191186
    192187    // Illegal 3-byte sequences
    193     PabloAST * E0 = ccc.compileCC(re::makeByte(0xE0), p34b);
    194     PabloAST * ED = ccc.compileCC(re::makeByte(0xED), p34b);
    195     PabloAST * E0_err = p34b.createAnd(p34b.createAdvance(E0, 1), ccc.compileCC(re::makeByte(0x80, 0x9F), p34b));
    196     PabloAST * ED_err = p34b.createAnd(p34b.createAdvance(ED, 1), ccc.compileCC(re::makeByte(0xA0, 0xBF), p34b));
     188    PabloAST * E0 = ccc.compileCC(makeByte(0xE0), p34b);
     189    PabloAST * ED = ccc.compileCC(makeByte(0xED), p34b);
     190    PabloAST * E0_err = p34b.createAnd(p34b.createAdvance(E0, 1), ccc.compileCC(makeByte(0x80, 0x9F), p34b));
     191    PabloAST * ED_err = p34b.createAnd(p34b.createAdvance(ED, 1), ccc.compileCC(makeByte(0xA0, 0xBF), p34b));
    197192    Var * EX_FX_err = nAb.createVar("EX_FX_err", zeroes);
    198193
     
    221216    Var * u8lastscope = main.createVar("u8lastscope", zeroes);
    222217
    223     PabloAST * pfx2 = ccc.compileCC(re::makeByte(0xC0, 0xDF), nAb);
     218    PabloAST * pfx2 = ccc.compileCC(makeByte(0xC0, 0xDF), nAb);
    224219    PabloAST * u8scope22 = nAb.createAdvance(pfx2, 1);
    225220    nAb.createAssign(u8lastscope, nAb.createOr(u8scope22, nAb.createOr(u8scope33, u8scope44)));
    226221    PabloAST * u8anyscope = nAb.createOr(u8lastscope, p34del);
    227222
    228     PabloAST * C0_C1_err = ccc.compileCC(re::makeByte(0xC0, 0xC1), nAb);
    229     PabloAST * scope_suffix_mismatch = nAb.createXor(u8anyscope, ccc.compileCC(re::makeByte(0x80, 0xBF), nAb));
     223    PabloAST * C0_C1_err = ccc.compileCC(makeByte(0xC0, 0xC1), nAb);
     224    PabloAST * scope_suffix_mismatch = nAb.createXor(u8anyscope, ccc.compileCC(makeByte(0x80, 0xBF), nAb));
    230225    nAb.createAssign(error_mask, nAb.createOr(scope_suffix_mismatch, nAb.createOr(C0_C1_err, EX_FX_err)));
    231     nAb.createAssign(delmask, nAb.createOr(p34del, ccc.compileCC(re::makeByte(0xC0, 0xFF), nAb)));
     226    nAb.createAssign(delmask, nAb.createOr(p34del, ccc.compileCC(makeByte(0xC0, 0xFF), nAb)));
    232227
    233228    // The low 3 bits of the high byte of the UTF-16 code unit as well as the high bit of the
     
    245240    //
    246241    //
    247     PabloAST * ASCII = ccc.compileCC(re::makeByte(0x0, 0x7F));
     242    PabloAST * ASCII = ccc.compileCC(makeByte(0x0, 0x7F));
    248243    PabloAST * last_byte = main.createOr(ASCII, u8lastscope);
    249244    main.createAssign(u16_lo[6], main.createOr(main.createAnd(ASCII, u8_bits[6]), p234_lo6));
     
    267262
    268263typedef void (*u8u16FunctionType)(uint32_t fd, const char *);
     264
     265// ------------------------------------------------------
    269266
    270267u8u16FunctionType generatePipeline(CPUDriver & pxDriver) {
     
    313310}
    314311
     312// ------------------------------------------------------
     313
     314void makeNonAsciiBranch(const std::unique_ptr<PipelineBuilder> & P, const unsigned FieldWidth, StreamSet * const ByteStream, StreamSet * const u16bytes) {
     315
     316    // Transposed bits from s2p
     317    StreamSet * BasisBits = P->CreateStreamSet(8);
     318    P->CreateKernelCall<S2PKernel>(ByteStream, BasisBits);
     319
     320    // Calculate UTF-16 data bits through bitwise logic on u8-indexed streams.
     321    StreamSet * u8bits = P->CreateStreamSet(16);
     322    StreamSet * selectors = P->CreateStreamSet();
     323    P->CreateKernelCall<U8U16Kernel>(BasisBits, u8bits, selectors);
     324
     325    StreamSet * u16bits = P->CreateStreamSet(16);
     326    if (useAVX2()) {
     327        // Allocate space for fully compressed swizzled UTF-16 bit streams
     328        std::vector<StreamSet *> u16Swizzles(4);
     329        u16Swizzles[0] = P->CreateStreamSet(4);
     330        u16Swizzles[1] = P->CreateStreamSet(4);
     331        u16Swizzles[2] = P->CreateStreamSet(4);
     332        u16Swizzles[3] = P->CreateStreamSet(4);
     333        // Apply a deletion algorithm to discard all but the final position of the UTF-8
     334        // sequences (bit streams) for each UTF-16 code unit. Also compresses and swizzles the result.
     335        P->CreateKernelCall<SwizzledDeleteByPEXTkernel>(selectors, u8bits, u16Swizzles);
     336        // Produce unswizzled UTF-16 bit streams
     337        P->CreateKernelCall<SwizzleGenerator>(u16Swizzles, std::vector<StreamSet *>{u16bits});
     338        P->CreateKernelCall<P2S16Kernel>(u16bits, u16bytes);
     339    } else {
     340        P->CreateKernelCall<FieldCompressKernel>(FieldWidth, u8bits, selectors, u16bits);
     341        P->CreateKernelCall<P2S16KernelWithCompressedOutput>(u16bits, selectors, u16bytes);
     342    }
     343}
     344
     345void makeAllAsciiBranch(const std::unique_ptr<PipelineBuilder> & P, StreamSet * const ByteStream, StreamSet * const u16bytes) {
     346    P->CreateKernelCall<ZeroExtend>(ByteStream, u16bytes);
     347}
     348
     349u8u16FunctionType generatePipeline2(CPUDriver & pxDriver) {
     350
     351    auto & b = pxDriver.getBuilder();
     352    auto P = pxDriver.makePipeline({Binding{b->getInt32Ty(), "inputFileDecriptor"}, Binding{b->getInt8PtrTy(), "outputFileName"}}, {});
     353    Scalar * fileDescriptor = P->getInputScalar("inputFileDecriptor");
     354    // File data from mmap
     355    StreamSet * const ByteStream = P->CreateStreamSet(1, 8);
     356    StreamSet * const u16bytes = P->CreateStreamSet(1, 16);
     357    P->CreateKernelCall<MMapSourceKernel>(fileDescriptor, ByteStream);
     358
     359    StreamSet * const nonAscii =  P->CreateStreamSet();
     360
     361    CC * const nonAsciiCC = makeByte(0x80, 0xFF);
     362    P->CreateKernelCall<CharacterClassKernelBuilder>(
     363        "nonASCII", std::vector<CC *>{nonAsciiCC}, ByteStream, nonAscii);
     364
     365    auto B = P->CreateOptimizationBranch(nonAscii,
     366        {Binding{"ByteStream", ByteStream}}, {Binding{"u16bytes", u16bytes, BoundedRate(0, 1)}});
     367
     368    makeNonAsciiBranch(B->getNonZeroBranch(), b->getBitBlockWidth() / 16, ByteStream, u16bytes);
     369
     370    makeAllAsciiBranch(B->getAllZeroBranch(), ByteStream, u16bytes);
     371
     372    Scalar * outputFileName = P->getInputScalar("outputFileName");
     373    P->CreateKernelCall<FileSink>(outputFileName, u16bytes);
     374
     375    return reinterpret_cast<u8u16FunctionType>(P->compile());
     376}
     377
    315378size_t file_size(const int fd) {
    316379    struct stat st;
     
    324387    codegen::ParseCommandLineOptions(argc, argv, {&u8u16Options, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
    325388    CPUDriver pxDriver("u8u16");
    326     auto u8u16Function = generatePipeline(pxDriver);
     389    u8u16FunctionType u8u16Function = nullptr;
     390    if (BranchingMode) {
     391        u8u16Function = generatePipeline2(pxDriver);
     392    } else {
     393        u8u16Function = generatePipeline(pxDriver);
     394    }
    327395    const int fd = open(inputFile.c_str(), O_RDONLY);
    328396    if (LLVM_UNLIKELY(fd == -1)) {
Note: See TracChangeset for help on using the changeset viewer.