Changeset 6258


Ignore:
Timestamp:
Dec 26, 2018, 5:13:20 PM (3 months ago)
Author:
nmedfort
Message:

Safer fix for LLVM bug + minor fixes/changes

Location:
icGREP/icgrep-devel/icgrep
Files:
14 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r6253 r6258  
    366366add_custom_target (check
    367367  COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure
    368   DEPENDS icgrep abc_gen)
     368  DEPENDS icgrep u8u16 u32u8 base64 editd abc_gen)
    369369
    370370add_custom_target (perf_icgrep
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r6228 r6258  
    218218
    219219    Value * produced = b->getProducedItemCount("compressedOutput");
    220     Value * const pendingItemCount = b->CreateURem(produced, BlockWidth);
     220    Value * const pendingItemCount = b->CreateZExtOrTrunc(b->CreateURem(produced, BlockWidth), fwTy);
    221221
    222222    std::vector<Value *> pendingData(mStreamCount);
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r6253 r6258  
    305305
    306306/** ------------------------------------------------------------------------------------------------------------- *
    307  * @brief resolveStreamSetType
    308  ** ------------------------------------------------------------------------------------------------------------- */
    309 Type * KernelBuilder::resolveStreamSetType(Type * const streamSetType) {
    310     // TODO: Should this function be here? in StreamSetBuffer? or in Binding?
    311     unsigned numElements = 1;
    312     Type * type = streamSetType;
    313     if (LLVM_LIKELY(type->isArrayTy())) {
    314         numElements = type->getArrayNumElements();
    315         type = type->getArrayElementType();
    316     }
    317     if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
    318         type = type->getVectorElementType();
    319         if (LLVM_LIKELY(type->isIntegerTy())) {
    320             const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
    321             type = getBitBlockType();
    322             if (fieldWidth != 1) {
    323                 type = ArrayType::get(type, fieldWidth);
    324             }
    325             return ArrayType::get(type, numElements);
    326         }
    327     }
    328     std::string tmp;
    329     raw_string_ostream out(tmp);
    330     streamSetType->print(out);
    331     out << " is an unvalid stream set buffer type.";
    332     report_fatal_error(out.str());
    333 }
    334 
    335 /** ------------------------------------------------------------------------------------------------------------- *
    336307 * @brief getKernelName
    337308 ** ------------------------------------------------------------------------------------------------------------- */
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.h

    r6253 r6258  
    141141    llvm::Value * CreateCeilUMul2(llvm::Value * const number, const ProcessingRate::RateValue & factor, const llvm::Twine & Name = "");
    142142
    143     llvm::Type * resolveStreamSetType(llvm::Type * streamSetType);
    144 
    145143    unsigned getStride() const {
    146144        return mStride;
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.cpp

    r6255 r6258  
    152152{Binding{"marker", mask, FixedRate(), Principal()},
    153153Binding{"source", source, PopcountOf("marker")}},
    154 {Binding{"output", expanded, FixedRate()}},
     154{Binding{"output", expanded, FixedRate(), BlockSize(1)}},
    155155{}, {}, {})
    156156, mFieldWidth(FieldWidth)
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/buffer_management_logic.hpp

    r6255 r6258  
    664664
    665665/** ------------------------------------------------------------------------------------------------------------- *
    666  * @brief getLogicalInputBaseAddress
    667  ** ------------------------------------------------------------------------------------------------------------- */
    668 inline Value * PipelineCompiler::getLogicalInputBaseAddress(BuilderRef b, const unsigned inputPort) {
    669     const Binding & input = mKernel->getInputStreamSetBinding(inputPort);
    670     const StreamSetBuffer * const buffer = getInputBuffer(inputPort);
    671     Value * const processed = mAlreadyProcessedPhi[inputPort];
    672     return calculateLogicalBaseAddress(b, input, buffer, processed);
    673 }
    674 
    675 /** ------------------------------------------------------------------------------------------------------------- *
    676  * @brief getLogicalOutputBaseAddress
    677  ** ------------------------------------------------------------------------------------------------------------- */
    678 inline Value * PipelineCompiler::getLogicalOutputBaseAddress(BuilderRef b, const unsigned outputPort) {
    679     const Binding & output = mKernel->getOutputStreamSetBinding(outputPort);
    680     const StreamSetBuffer * const buffer = getOutputBuffer(outputPort);
    681     Value * const produced = mAlreadyProducedPhi[outputPort];
    682     return calculateLogicalBaseAddress(b, output, buffer, produced);
    683 }
    684 
    685 /** ------------------------------------------------------------------------------------------------------------- *
    686  * @brief calculateLogicalBaseAddress
     666 * @brief epoch
    687667 *
    688668 * Returns the address of the "zeroth" item of the (logically-unbounded) stream set.
    689669 ** ------------------------------------------------------------------------------------------------------------- */
    690 Value * PipelineCompiler::calculateLogicalBaseAddress(BuilderRef b, const Binding & binding, const StreamSetBuffer * const buffer, Value * const itemCount) {
     670Value * PipelineCompiler::epoch(BuilderRef b,
     671                                const Binding & binding,
     672                                const StreamSetBuffer * const buffer,
     673                                Value * const position,
     674                                Value * const available) const {
     675
    691676    Constant * const LOG_2_BLOCK_WIDTH = b->getSize(floor_log2(b->getBitBlockWidth()));
    692677    Constant * const ZERO = b->getSize(0);
    693     Value * const blockIndex = b->CreateLShr(itemCount, LOG_2_BLOCK_WIDTH);
     678    Value * const blockIndex = b->CreateLShr(position, LOG_2_BLOCK_WIDTH);
    694679    Value * address = buffer->getStreamLogicalBasePtr(b.get(), ZERO, blockIndex);
    695680    address = b->CreatePointerCast(address, buffer->getPointerType());
     
    700685        tmp.setHandle(b, handle);
    701686        tmp.setBaseAddress(b.get(), address);
     687        Value * const capacity = b->CreateAdd(position, available);
     688        tmp.setCapacity(b.get(), capacity);
    702689        Value * const A0 = buffer->getStreamBlockPtr(b.get(), ZERO, blockIndex);
    703690        Value * const B0 = tmp.getStreamBlockPtr(b.get(), ZERO, blockIndex);
    704691        Value * const C0 = b->CreatePointerCast(B0, A0->getType());
    705692        b->CreateAssert(b->CreateICmpEQ(A0, C0), prefix + ": logical start address is incorrect");
    706 //        Value * upToIndex = b->CreateAdd(blockIndex, b->CreateSub(mNumOfLinearStrides, b->getSize(1)));
    707 //        upToIndex = b->CreateSelect(b->CreateICmpEQ(mNumOfLinearStrides, ZERO), blockIndex, upToIndex);
    708 //        Value * const A1 = buffer->getStreamBlockPtr(b.get(), ZERO, upToIndex);
    709 //        Value * const B1 = tmp.getStreamBlockPtr(b.get(), ZERO, upToIndex);
    710 //        Value * const C1 = b->CreatePointerCast(B1, A1->getType());
    711 //        b->CreateAssert(b->CreateICmpEQ(A1, C1), prefix + ": logical end address is incorrect");
    712693    }
    713694    return address;
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/core_logic.hpp

    r6255 r6258  
    209209
    210210    b->SetInsertPoint(mKernelLoopCall);
    211     expandOutputBuffers(b);
    212211    writeKernelCall(b);
    213212
     
    266265    writePopCountComputationLogic(b);
    267266    computeFullyProducedItemCounts(b);
     267    mKernelLoopExitPhiCatch->moveAfter(b->GetInsertBlock());
    268268    b->CreateBr(mKernelLoopExitPhiCatch);
    269269    b->SetInsertPoint(mKernelLoopExitPhiCatch);
     
    275275
    276276    b->SetInsertPoint(mKernelExit);
     277    mKernelExit->moveAfter(mKernelLoopExitPhiCatch);
    277278    writeFinalConsumedItemCounts(b);
    278279    updatePopCountReferenceCounts(b);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/kernel_logic.hpp

    r6255 r6258  
    173173        const auto size = getCopyBack(getOutputBufferVertex(outputPort));
    174174        if (size) {
    175             copyBack = b->getSize(size);
     175            copyBack = b->getSize(size - 1);
    176176        }
    177177    }
     
    382382
    383383/** ------------------------------------------------------------------------------------------------------------- *
    384  * @brief expandOutputBuffers
    385  ** ------------------------------------------------------------------------------------------------------------- */
    386 void PipelineCompiler::expandOutputBuffers(BuilderRef b) {
    387 
    388 }
    389 
    390 /** ------------------------------------------------------------------------------------------------------------- *
    391384 * @brief expandOutputBuffer
    392385 ** ------------------------------------------------------------------------------------------------------------- */
     
    457450    args.push_back(mNumOfLinearStrides);
    458451    for (unsigned i = 0; i < numOfInputs; ++i) {
    459         args.push_back(getLogicalInputBaseAddress(b, i));
     452
     453        // calculate the deferred processed item count
     454        PHINode * processed = nullptr;
     455        bool deferred = false;
     456
    460457        const Binding & input = mKernel->getInputStreamSetBinding(i);
    461         // calculate the deferred processed item count
    462         PHINode * itemCount = nullptr;
    463         bool deferred = false;
     458        #ifdef PRINT_DEBUG_MESSAGES
     459        const auto prefix = makeBufferName(mKernelIndex, input);
     460        b->CallPrintInt(prefix + "_processed", mAlreadyProcessedPhi[i]);
     461        b->CallPrintInt(prefix + "_accessible", mLinearInputItemsPhi[i]);
     462        #endif
     463
    464464        if (mAlreadyProcessedDeferredPhi[i]) {
    465             itemCount = mAlreadyProcessedDeferredPhi[i];
     465            #ifdef PRINT_DEBUG_MESSAGES
     466            b->CallPrintInt(prefix + "_deferred", mAlreadyProcessedDeferredPhi[i]);
     467            #endif
     468            processed = mAlreadyProcessedDeferredPhi[i];
    466469            deferred = true;
    467470        } else {
    468             itemCount = mAlreadyProcessedPhi[i];
    469         }
    470         mReturnedProcessedItemCountPtr[i] = addItemCountArg(b, input, deferred, itemCount, args);
     471            processed = mAlreadyProcessedPhi[i];
     472        }
    471473        // calculate how many linear items are from the *deferred* position
    472         Value * linearItemCount = mLinearInputItemsPhi[i];
    473         if (mAlreadyProcessedDeferredPhi[i]) {
     474        Value * inputItems = mLinearInputItemsPhi[i];
     475        if (deferred) {
    474476            Value * diff = b->CreateSub(mAlreadyProcessedPhi[i], mAlreadyProcessedDeferredPhi[i]);
    475             linearItemCount = b->CreateAdd(linearItemCount, diff);
    476         }
    477         args.push_back(linearItemCount);
     477            inputItems = b->CreateAdd(inputItems, diff);
     478        }
     479        args.push_back(epoch(b, input, getInputBuffer(i), processed, inputItems));
     480        mReturnedProcessedItemCountPtr[i] = addItemCountArg(b, input, deferred, processed, args);
     481
     482        args.push_back(inputItems);
    478483        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
    479484            args.push_back(getPopCountArray(b, i));
     
    488493    for (unsigned i = 0; i < numOfOutputs; ++i) {
    489494        const auto nonManaged = getOutputBufferType(i) != BufferType::Managed;
     495        const Binding & output = mKernel->getOutputStreamSetBinding(i);
     496        PHINode * const produced = mAlreadyProducedPhi[i];
     497        Value * const writable = mLinearOutputItemsPhi[i];
     498
     499        #ifdef PRINT_DEBUG_MESSAGES
     500        const auto prefix = makeBufferName(mKernelIndex, output);
     501        b->CallPrintInt(prefix + "_produced", produced);
     502        b->CallPrintInt(prefix + "_writable", writable);
     503        #endif
     504
     505
    490506        if (LLVM_LIKELY(nonManaged)) {
    491             args.push_back(getLogicalOutputBaseAddress(b, i));
    492         }
    493         const Binding & output = mKernel->getOutputStreamSetBinding(i);
    494         PHINode * produced = mAlreadyProducedPhi[i];
     507            args.push_back(epoch(b, output, getOutputBuffer(i), produced, writable));
     508        }
    495509        mReturnedProducedItemCountPtr[i] = addItemCountArg(b, output, canTerminate, produced, args);
    496510        if (LLVM_LIKELY(nonManaged)) {
    497             args.push_back(mLinearOutputItemsPhi[i]);
     511            args.push_back(writable);
    498512        } else {
    499513            args.push_back(mConsumedItemCount[i]);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/lexographic_ordering.hpp

    r6228 r6258  
    4040        llvm::report_fatal_error(error);
    4141    }
     42
     43
     44
    4245    return L;
    4346}
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_analysis.hpp

    r6253 r6258  
    88#warning TODO: support call bindings that produce output that are inputs of other call bindings or become scalar outputs of the pipeline
    99
    10 #if 0
     10#if 1
    1111
    1212/** ------------------------------------------------------------------------------------------------------------- *
     
    109109    template <typename Graph, typename Vertex = typename graph_traits<Graph>::vertex_descriptor>
    110110    bool add_edge_if_no_induced_cycle(const Vertex s, const Vertex t, Graph & G) {
     111        // If s-t exists, skip adding this edge
     112        if (edge(s, t, G).second) {
     113            return true;
     114        }
    111115        // If G is a DAG and there is a t-s path, adding s-t will induce a cycle.
    112116        const auto d = in_degree(s, G);
     
    115119            V.reserve(num_vertices(G) - 1);
    116120            std::queue<Vertex> Q;
     121            // do a BFS to find one a path from t to s
    117122            Q.push(t);
    118             // do a BFS to find one a path to s
    119123            for (;;) {
    120124                const auto u = Q.front();
     
    122126                for (auto e : make_iterator_range(out_edges(u, G))) {
    123127                    const auto v = target(e, G);
    124                     if (V.contains(v)) continue;
     128                    if (V.count(v) != 0) continue;
    125129                    if (LLVM_UNLIKELY(v == s)) return false;
    126130                    Q.push(v);
     
    176180
    177181    // check any dynamic buffer last
    178 
    179 
     182    std::vector<unsigned> D;
     183    D.reserve(numOfOutputs);
     184    for (unsigned i = 0; i < numOfOutputs; ++i) {
     185        if (LLVM_UNLIKELY(isa<DynamicBuffer>(getOutputBuffer(i)))) {
     186            D.push_back(i);
     187        }
     188    }
     189    for (const auto i : D) {
     190        for (unsigned j = 0; j < numOfInputs; ++j) {
     191            add_edge_if_no_induced_cycle(j, numOfInputs + i, G);
     192        }
     193        auto Dj = D.begin();
     194        for (unsigned j = 0; j < numOfOutputs; ++j) {
     195            if (*Dj == j) {
     196                ++Dj;
     197            } else {
     198                add_edge_if_no_induced_cycle(numOfInputs + j, numOfInputs + i, G);
     199            }
     200        }
     201        assert (Dj == D.end());
     202    }
    180203
    181204    // TODO: add additional constraints on input ports to indicate the ones
    182205    // likely to have fewest number of strides?
    183 
    184206
    185207    return lexicalOrdering(std::move(G), mKernel->getName() + " has cyclic port dependencies.");
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_compiler.hpp

    r6255 r6258  
    196196
    197197    LLVM_READNONE StructType * getThreadStateType(BuilderRef b);
    198     AllocaInst * allocateThreadState(BuilderRef b, const unsigned segOffset);
     198    Value * allocateThreadState(BuilderRef b, const unsigned segOffset);
    199199    Value * setThreadState(BuilderRef b, Value * threadState);
    200200    void deallocateThreadState(BuilderRef b, Value * const threadState);
     
    253253// intra-kernel functions
    254254
    255     void expandOutputBuffers(BuilderRef b);
    256255    void expandOutputBuffer(BuilderRef b, const unsigned outputPort, Value * const hasEnough, BasicBlock * const target);
    257256
     
    352351    LLVM_READNONE unsigned getFacsimile(const unsigned bufferVertex) const;
    353352    BufferType getOutputBufferType(const unsigned outputPort) const;
    354 
    355     Value * getLogicalInputBaseAddress(BuilderRef b, const unsigned inputPort);
    356     Value * getLogicalOutputBaseAddress(BuilderRef b, const unsigned outputPort);
    357     Value * calculateLogicalBaseAddress(BuilderRef b, const Binding & binding, const StreamSetBuffer * const buffer, Value * const itemCount);
     353    Value * epoch(BuilderRef b, const Binding & binding, const StreamSetBuffer * const buffer, Value * const position, Value * const available) const;
    358354
    359355// cycle counter functions
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_logic.hpp

    r6249 r6258  
    66namespace kernel {
    77
     8// NOTE: the following is a workaround for an LLVM bug for 32-bit VMs on 64-bit architectures.
     9// When calculating the address of a local stack allocated object, the size of a pointer will
     10// be 32-bits but when performing a GEP on the same pointer as the result of a "malloc" or
     11// when passed as a function parameter, the size will be 64-bits. More investigation should be
     12// done to determine which versions of LLVM are affected by this bug.
     13
     14inline LLVM_READNONE bool useMalloc(BuilderRef b) {
     15    DataLayout DL(b->getModule());
     16    return (DL.getPointerSizeInBits() != b->getSizeTy()->getBitWidth());
     17}
     18
     19inline Value * makeStateObject(BuilderRef b, Type * type) {
     20    Value * ptr = nullptr;
     21    if (LLVM_UNLIKELY(useMalloc(b))) {
     22        ptr = b->CreateCacheAlignedMalloc(type);
     23    } else {
     24        ptr = b->CreateCacheAlignedAlloca(type);
     25    }
     26    b->CreateMemZero(ptr, ConstantExpr::getSizeOf(type), b->getCacheAlignment());
     27    return ptr;
     28}
     29
     30inline void destroyStateObject(BuilderRef b, Value * ptr) {
     31    if (LLVM_UNLIKELY(useMalloc(b))) {
     32        b->CreateFree(ptr);
     33    }
     34}
     35
    836/** ------------------------------------------------------------------------------------------------------------- *
    937 * @brief generateSingleThreadKernelMethod
     
    1240
    1341    StructType * const localStateType = getLocalStateType(b);
    14     Value * const localState = b->CreateCacheAlignedAlloca(localStateType);
     42    Value * const localState = makeStateObject(b, localStateType);
    1543    b->CreateMemZero(localState, ConstantExpr::getSizeOf(localStateType), b->getCacheAlignment());
    1644    allocateThreadLocalState(b, localState);
     
    2351    end(b, 1);
    2452    deallocateThreadLocalState(b, localState);
     53    destroyStateObject(b, localState);
    2554}
    2655
     
    171200inline StructType * PipelineCompiler::getThreadStateType(BuilderRef b) {
    172201    std::vector<Type *> threadStructFields;
    173 
    174     // NOTE: the following is a workaround for an LLVM bug for 32-bit VMs on 64-bit architectures
    175     DataLayout DL(b->getModule());
    176     Type * handleType = mPipelineKernel->getHandle()->getType();
    177     Type * handleIntType = DL.getIntPtrType(handleType);
    178 
    179     threadStructFields.push_back(handleIntType);
     202    Type * const handleType = mPipelineKernel->getHandle()->getType();
     203    threadStructFields.push_back(handleType);
    180204    threadStructFields.push_back(b->getSizeTy());
    181205    threadStructFields.push_back(getLocalStateType(b));
     
    198222 * @brief constructThreadState
    199223 ** ------------------------------------------------------------------------------------------------------------- */
    200 inline AllocaInst * PipelineCompiler::allocateThreadState(BuilderRef b, const unsigned segOffset) {
     224inline Value * PipelineCompiler::allocateThreadState(BuilderRef b, const unsigned segOffset) {
    201225
    202226    StructType * const threadStructType = getThreadStateType(b);
    203     AllocaInst * const threadState = b->CreateCacheAlignedAlloca(threadStructType);
    204     b->CreateMemZero(threadState, ConstantExpr::getSizeOf(threadStructType), b->getCacheAlignment());
     227    Value * const threadState = makeStateObject(b, threadStructType);
    205228
    206229    std::vector<Value *> indices(2);
    207230    indices[0] = b->getInt32(0);
    208231    indices[1] = b->getInt32(HANDLE_INDEX);
    209 
    210     // NOTE: this is a workaround for an LLVM bug for 32-bit VMs on 64-bit architectures
    211     DataLayout DL(b->getModule());
    212     Type * handleType = mPipelineKernel->getHandle()->getType();
    213     Type * handleIntType = DL.getIntPtrType(handleType);
    214     Value * const handleInt = b->CreatePtrToInt(mPipelineKernel->getHandle(), handleIntType);
    215     b->CreateStore(handleInt, b->CreateGEP(threadState, indices));
     232    Value * const handle = mPipelineKernel->getHandle();
     233    b->CreateStore(handle, b->CreateGEP(threadState, indices));
    216234    indices[1] = b->getInt32(SEGMENT_OFFSET_INDEX);
    217235    b->CreateStore(b->getSize(segOffset), b->CreateGEP(threadState, indices));
     
    250268    indices[1] = b->getInt32(HANDLE_INDEX);
    251269
    252     // NOTE: this is a workaround for an LLVM bug for 32-bit VMs on 64-bit architectures
    253     Value * handleInt = b->CreateLoad(b->CreateGEP(threadState, indices));
    254     Type * handleType = mPipelineKernel->getHandle()->getType();
    255     Value * handle = b->CreateIntToPtr(handleInt, handleType);
     270    Value * handle = b->CreateLoad(b->CreateGEP(threadState, indices));
    256271    mPipelineKernel->setHandle(b, handle);
    257272
     
    288303    indices[1] = b->getInt32(LOCAL_STATE_INDEX);
    289304    deallocateThreadLocalState(b, b->CreateGEP(threadState, indices));
     305    destroyStateObject(b, threadState);
    290306}
    291307
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/popcount_logic.hpp

    r6255 r6258  
    5858        Value * const rounding = b->CreateSelect(mTerminatedPhi, BLOCK_SIZE_MINUS_1, ZERO);
    5959        Value * const endIndex = b->CreateLShr(b->CreateAdd(produced, rounding), LOG2_BLOCK_WIDTH);
    60 
    61 
    62 
    63 
    6460
    6561        // TODO: if the source items of the consumes of this pop count ref
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r6255 r6258  
    173173
    174174inline void ExternalBuffer::assertValidBlockIndex(IDISA_Builder * const b, Value * blockIndex) const {
    175     // TODO: how should lookahead be handled? we technically allow the kernel to "peek" one block past the
    176     // reported limit. Should the capacity simply be set as such?
    177175    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    178176        Value * const blockCount = b->CreateCeilUDiv(getCapacity(b), b->getSize(b->getBitBlockWidth()));
     
    184182
    185183Value * ExternalBuffer::getStreamBlockPtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex) const {
    186 //    assertValidBlockIndex(b, blockIndex);
     184    //assertValidBlockIndex(b, blockIndex);
    187185    return StreamSetBuffer::getStreamBlockPtr(b, streamIndex, blockIndex);
    188186}
    189187
    190188Value * ExternalBuffer::getStreamPackPtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex, Value * const packIndex) const {
    191 //    assertValidBlockIndex(b, blockIndex);
     189    //assertValidBlockIndex(b, blockIndex);
    192190    return StreamSetBuffer::getStreamPackPtr(b, streamIndex, blockIndex, packIndex);
    193191}
     
    632630}
    633631
     632/** ------------------------------------------------------------------------------------------------------------- *
     633 * @brief resolveStreamSetType
     634 ** ------------------------------------------------------------------------------------------------------------- */
     635Type * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * const streamSetType) {
     636    unsigned numElements = 1;
     637    Type * type = streamSetType;
     638    if (LLVM_LIKELY(type->isArrayTy())) {
     639        numElements = type->getArrayNumElements();
     640        type = type->getArrayElementType();
     641    }
     642    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
     643        type = type->getVectorElementType();
     644        if (LLVM_LIKELY(type->isIntegerTy())) {
     645            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
     646            type = b->getBitBlockType();
     647            if (fieldWidth != 1) {
     648                type = ArrayType::get(type, fieldWidth);
     649            }
     650            return ArrayType::get(type, numElements);
     651        }
     652    }
     653    std::string tmp;
     654    raw_string_ostream out(tmp);
     655    streamSetType->print(out);
     656    out << " is an unvalid stream set buffer type.";
     657    report_fatal_error(out.str());
     658}
     659
    634660StreamSetBuffer::StreamSetBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b,
    635661                                 Type * const baseType, const unsigned AddressSpace)
    636662: mBufferKind(k)
    637663, mHandle(nullptr)
    638 , mType(b->resolveStreamSetType(baseType))
     664, mType(resolveStreamSetType(b, baseType))
    639665, mAddressSpace(AddressSpace)
    640666, mBaseType(baseType) {
Note: See TracChangeset for help on using the changeset viewer.