source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5998

Last change on this file since 5998 was 5998, checked in by nmedfort, 18 months ago

Added temporary buffer functionality to the pipeline for single stream source buffers. Fixed memory leak from UCD::UnicodeBreakRE()

File size: 40.1 KB
RevLine 
[5044]1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
[5260]6#include "streamset.h"
[5436]7#include <llvm/IR/Module.h>
[5307]8#include <llvm/Support/raw_ostream.h>
[5408]9#include <kernels/kernel.h>
[5436]10#include <kernels/kernel_builder.h>
[5425]11#include <toolchain/toolchain.h>
[5542]12#include <llvm/Support/Debug.h>
13#include <llvm/Support/Format.h>
[5307]14
[5260]15namespace llvm { class Constant; }
16namespace llvm { class Function; }
[5191]17
[5100]18using namespace parabix;
[5260]19using namespace llvm;
20using namespace IDISA;
[5044]21
[5755]22inline static bool is_power_2(const uint64_t n) {
23    return ((n & (n - 1)) == 0) && n;
24}
[5506]25
26Type * StreamSetBuffer::getStreamSetBlockType() const { return mType;}
27
[5436]28ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
[5320]29
[5436]30StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
[5320]31
[5755]32void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
[5706]33    assert (mBufferBlocks > 0);
[5985]34    assert ("allocate buffer was called twice" && !mStreamSetBufferPtr);
35    Type * const ty = getType();
36    const auto blocks = (mBufferBlocks + mOverflowBlocks);
37    if (mAddressSpace == 0) {
38        Constant * size = ConstantExpr::getSizeOf(ty);
39        size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), blocks));
40        mStreamSetBufferPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(size), ty->getPointerTo());
[5429]41    } else {
[5985]42        mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(ty, b->getSize(blocks));
[5429]43    }
[5044]44}
45
[5755]46void StreamSetBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
[5597]47    if (mAddressSpace == 0) {
[5755]48        b->CreateFree(mStreamSetBufferPtr);
[5597]49    }
50}
51
[5755]52inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
53    return isa<ConstantInt>(index) ? cast<ConstantInt>(index)->getLimitedValue() < capacity : false;
54}
55
56Value * StreamSetBuffer::modBufferSize(IDISA::IDISA_Builder * const b, Value * const offset) const {
57    assert (offset->getType()->isIntegerTy());
58    if (mBufferBlocks == 0 || isCapacityGuaranteed(offset, mBufferBlocks)) {
59        return offset;
60    } else if (mBufferBlocks == 1) {
61        return ConstantInt::getNullValue(offset->getType());
62    } else if (is_power_2(mBufferBlocks)) {
63        return b->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
64    } else {
65        return b->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
66    }
67}
68
[5985]69Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
[5721]70    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
[5755]71        Value * const count = getStreamSetCount(b, handle);
72        Value * const index = b->CreateZExtOrTrunc(streamIndex, count->getType());
73        Value * const cond = b->CreateICmpULT(index, count);
74        b->CreateAssert(cond, "out-of-bounds stream access");
[5486]75    }
[5985]76    return b->CreateGEP(getBaseAddress(b, handle), {modBufferSize(b, blockIndex), streamIndex});
[5260]77}
78
[5985]79Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
[5721]80    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
[5755]81        Value * const count = getStreamSetCount(b, handle);
82        Value * const index = b->CreateZExtOrTrunc(streamIndex, count->getType());
83        Value * const cond = b->CreateICmpULT(index, count);
84        b->CreateAssert(cond, "out-of-bounds stream access");
[5486]85    }
[5985]86    return b->CreateGEP(getBaseAddress(b, handle), {modBufferSize(b, blockIndex), streamIndex, packIndex});
[5260]87}
88
[5998]89void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const /* b */, Value * /* addr */, Value * /* handle */) const {
[5398]90    report_fatal_error("setBaseAddress is not supported by this buffer type");
91}
92
[5985]93Value * StreamSetBuffer::getOverflowAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
94    return b->CreateGEP(getBaseAddress(b, handle), b->getSize(mBufferBlocks));
95}
96
[5755]97Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * /* handle */) const {
98    return b->getSize(mBufferBlocks * b->getBitBlockWidth());
[5398]99}
100
[5755]101void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* size */) const {
[5398]102    report_fatal_error("setBufferedSize is not supported by this buffer type");
103}
104
[5757]105Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
106    return b->getSize(mBufferBlocks * b->getBitBlockWidth());
[5501]107}
108
[5755]109void StreamSetBuffer::setCapacity(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* c */) const {
[5501]110    report_fatal_error("setCapacity is not supported by this buffer type");
111}
112
[5755]113Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const b, Value *) const {
[5498]114    size_t count = 1;
[5329]115    if (isa<ArrayType>(mBaseType)) {
116        count = mBaseType->getArrayNumElements();
117    }
[5755]118    return b->getSize(count);
[5329]119}
120
[5856]121void StreamSetBuffer::doubleCapacity(IDISA::IDISA_Builder * const /* b */, Value */* handle */) const {
122    report_fatal_error("doubleCapacity is not supported by this buffer type");
123}
124
[5307]125/**
126 * @brief getRawItemPointer
127 *
128 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
129 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
130 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
131 */
[5755]132Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
133    Value * ptr = getBaseAddress(b, handle);
[5446]134    Value * relativePosition = absolutePosition;
[5755]135    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
136    const auto bw = elemTy->getPrimitiveSizeInBits();
137    assert (is_power_2(bw));
[5445]138    if (bw < 8) {
[5755]139        Constant * const fw = ConstantInt::get(relativePosition->getType(), 8 / bw);
140        if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
141            b->CreateAssertZero(b->CreateURem(absolutePosition, fw), "absolutePosition must be byte aligned");
142        }
143        relativePosition = b->CreateUDiv(relativePosition, fw);
144        ptr = b->CreatePointerCast(ptr, b->getInt8PtrTy());
[5446]145    } else {
[5755]146        ptr = b->CreatePointerCast(ptr, elemTy->getPointerTo());
[5445]147    }
[5755]148    return b->CreateGEP(ptr, relativePosition);
[5260]149}
150
[5782]151Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const /* handle */, Value * fromPosition, Value * availItems, bool reverse) const {
[5985]152    Type * const ty = fromPosition->getType();
153    const auto blockWidth = b->getBitBlockWidth();
154    Constant * const bufferSize = ConstantInt::get(ty, mBufferBlocks * blockWidth);
155    Value * const itemsFromBase = b->CreateURem(fromPosition, bufferSize);
[5639]156    if (reverse) {
[5985]157        Value * const bufAvail = b->CreateSelect(b->CreateIsNull(itemsFromBase), bufferSize, itemsFromBase);
158        return b->CreateUMin(availItems, bufAvail);
[5706]159    } else {
[5985]160        Constant * capacity = bufferSize;
161        if (mOverflowBlocks) {
162            capacity = ConstantInt::get(ty, (mBufferBlocks + mOverflowBlocks) * blockWidth - 1);
163        }
164        Value * const linearSpace = b->CreateSub(capacity, itemsFromBase);
165        return b->CreateUMin(availItems, linearSpace);
[5650]166    }
[5301]167}
168
[5782]169Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const /* handle */, Value * fromPosition, Value * consumed, bool reverse) const {
[5985]170    Type * const ty = fromPosition->getType();
171    const auto blockWidth = b->getBitBlockWidth();
172    Constant * const bufferSize = ConstantInt::get(ty, mBufferBlocks * blockWidth);
[5782]173    fromPosition = b->CreateURem(fromPosition, bufferSize);
[5618]174    if (reverse) {
[5985]175        return b->CreateSelect(b->CreateIsNull(fromPosition), bufferSize, fromPosition);
[5618]176    }
[5782]177    consumed = b->CreateURem(consumed, bufferSize);
[5793]178    Constant * capacity = bufferSize;
179    if (mOverflowBlocks) {
[5985]180        capacity = ConstantInt::get(ty, (mBufferBlocks + mOverflowBlocks) * blockWidth - 1);
[5793]181    }
182    Value * const limit = b->CreateSelect(b->CreateICmpULE(consumed, fromPosition), capacity, consumed);
[5985]183    return b->CreateSub(limit, fromPosition);
[5355]184}
[5301]185
[5755]186Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
187    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
188        b->CreateAssert(handle, "handle cannot be null");
189    }
190    return handle;
[5377]191}
192
[5755]193void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const b, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
194    Type * i8ptr = b->getInt8PtrTy();
195    unsigned alignment = b->getBitBlockWidth() / 8;
[5498]196    size_t numStreams = 1;
[5493]197    if (isa<ArrayType>(mBaseType)) {
198        numStreams = mBaseType->getArrayNumElements();
199    }
200    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
[5755]201    Value * blockCopyBytes = b->CreateMul(blocksToCopy, b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8));
202    b->CreateMemMove(b->CreateBitCast(targetBlockPtr, i8ptr), b->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
[5479]203}
204
[5398]205// Source File Buffer
[5506]206Type * SourceBuffer::getStreamSetBlockType() const {
[5985]207    return cast<PointerType>(mType->getStructElementType(BaseAddress))->getElementType();
[5506]208}
209
[5755]210Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle) const {
[5985]211    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BufferedSize)});
[5755]212    return b->CreateLoad(ptr);
[5398]213}
214
[5755]215void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle, Value * size) const {
[5985]216    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BufferedSize)});
[5755]217    b->CreateStore(size, ptr);
[5398]218}
219
[5755]220Value * SourceBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const handle) const {
[5985]221    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Capacity)});
[5755]222    return b->CreateLoad(ptr);
[5501]223}
224
[5755]225void SourceBuffer::setCapacity(IDISA::IDISA_Builder * const b, Value * const handle, Value * c) const {
[5985]226    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Capacity)});
[5755]227    b->CreateStore(c, ptr);
[5501]228}
229
[5755]230void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr) const {
231    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
232        b->CreateAssert(handle, "handle cannot be null");
233    }
[5985]234    Value * const ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
[5755]235    Type * const ptrTy = ptr->getType()->getPointerElementType();
236    if (LLVM_LIKELY(isa<PointerType>(addr->getType()))) {
237        const auto ptrSpace = cast<PointerType>(ptr->getType())->getAddressSpace();
238        const auto addrSpace = cast<PointerType>(ptrTy)->getAddressSpace();
239        if (LLVM_UNLIKELY(addrSpace != ptrSpace)) {
240            report_fatal_error("SourceBuffer: base address was declared with address space "
241                                     + std::to_string(ptrSpace)
242                                     + " but given a pointer in address space "
243                                     + std::to_string(addrSpace));
244        }
245    } else {
246        report_fatal_error("SourceBuffer: base address is not a pointer type");
247    }
248    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
249        b->CreateAssert(ptr, "SourceBuffer: base address cannot be zero");
250        DataLayout DL(b->getModule());
251        IntegerType * const intPtrTy = b->getIntPtrTy(DL, cast<PointerType>(ptrTy)->getAddressSpace());
252        Value * const notAligned = b->CreateURem(b->CreatePtrToInt(ptr, intPtrTy), ConstantInt::get(intPtrTy, b->getBitBlockWidth() / 8));
253        b->CreateAssertZero(notAligned, "SourceBuffer: base address is not aligned with the bit block width");
254    }
255    b->CreateStore(b->CreatePointerCast(addr, ptrTy), ptr);
[5398]256}
257
[5755]258Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
259    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
260        b->CreateAssert(handle, "handle cannot be null");
261    }
[5985]262    Value * const ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
[5755]263    return b->CreateLoad(ptr);
[5398]264}
265
[5755]266Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
[5618]267    if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
[5836]268    Value * maxAvail = b->CreateNUWSub(getBufferedSize(b, handle), fromPosition);
[5755]269    return b->CreateSelect(b->CreateICmpULT(availItems, maxAvail), availItems, maxAvail);
[5398]270}
271
[5782]272Value * SourceBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value *consumed, bool reverse) const {
[5650]273    report_fatal_error("SourceBuffers cannot be written");
[5503]274}
[5501]275
[5755]276void SourceBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
[5597]277    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
278        Type * const ty = getType();
[5755]279        mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(ty, b->getSize(mBufferBlocks));
280        b->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, b->getCacheAlignment());
[5597]281    } else {
282        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
283    }
284}
[5503]285
[5755]286void SourceBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
[5597]287
288}
289
[5429]290// External File Buffer
[5436]291void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
[5429]292    report_fatal_error("External buffers cannot be allocated.");
[5377]293}
294
[5597]295void ExternalBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> &) const {
296
297}
298
[5706]299Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value * availItems, const bool reverse) const {
300    // All available items can be accessed.
301    return reverse ? ConstantInt::getAllOnesValue(availItems->getType()) : availItems;
[5377]302}
303
[5782]304Value * ExternalBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const, Value *, Value * fromPosition, Value *consumed, const bool reverse) const {
[5706]305    // Trust that the buffer is large enough to write any amount
306    return reverse ? fromPosition : ConstantInt::getAllOnesValue(fromPosition->getType());
307}
308
[5985]309Value * ExternalBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
310    return ConstantInt::getAllOnesValue(b->getSizeTy());
311}
312
[5757]313Value * ExternalBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
314    return ConstantInt::getAllOnesValue(b->getSizeTy());
315}
316
[5260]317// Circular Buffer
[5755]318Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
319    Value * ptr = getBaseAddress(b, handle);
320    Value * relativePosition = b->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * b->getBitBlockWidth()));
321    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
322    const auto bw = elemTy->getPrimitiveSizeInBits();
323    assert (is_power_2(bw));
[5446]324    if (bw < 8) {
[5755]325        Constant * const fw = ConstantInt::get(relativePosition->getType(), 8 / bw);
326        relativePosition = b->CreateUDiv(relativePosition, fw);
327        ptr = b->CreatePointerCast(ptr, b->getInt8PtrTy());
[5446]328    } else {
[5755]329        ptr = b->CreatePointerCast(ptr, elemTy->getPointerTo());
[5446]330    }
[5755]331    return b->CreateGEP(ptr, relativePosition);
[5446]332}
333
[5260]334// Expandable Buffer
335
[5755]336void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
337    mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(getType());
338    Value * const capacityPtr = b->CreateGEP(mStreamSetBufferPtr, {b->getInt32(0), b->getInt32(0)});
339    b->CreateStore(b->getSize(mInitialCapacity), capacityPtr);
[5320]340    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
[5755]341    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), b->getSizeTy(), false);
342    Constant * const size = ConstantExpr::getMul(b->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
343    const auto alignment = std::max(b->getCacheAlignment(), b->getBitBlockWidth() / 8);
344    Value * const ptr = b->CreateAlignedMalloc(size, alignment);
345    b->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
346    Value * const streamSetPtr = b->CreateGEP(mStreamSetBufferPtr, {b->getInt32(0), b->getInt32(1)});
347    b->CreateStore(b->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
[5260]348}
349
[5755]350std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
[5340]351
[5320]352    // ENTRY
[5755]353    Value * const capacityPtr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(0)});
354    Value * const capacity = b->CreateLoad(capacityPtr);
355    Value * const streamSetPtr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(1)});
356    Value * const streamSet = b->CreateLoad(streamSetPtr);
357    blockIndex = modBufferSize(b, blockIndex);
[5311]358
[5353]359    assert (streamIndex->getType() == capacity->getType());
[5755]360    Value * const cond = b->CreateICmpULT(streamIndex, capacity);
[5353]361
[5320]362    // Are we guaranteed that we can access this stream?
[5353]363    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
[5755]364        if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
365            b->CreateAssert(cond, "out-of-bounds stream access");
366        }
367        Value * offset = b->CreateAdd(b->CreateMul(blockIndex, capacity), streamIndex);
[5353]368        return {streamSet, offset};
[5320]369    }
[5260]370
[5755]371    BasicBlock * const entry = b->GetInsertBlock();
372    BasicBlock * const expand = BasicBlock::Create(b->getContext(), "expand", entry->getParent());
373    BasicBlock * const resume = BasicBlock::Create(b->getContext(), "resume", entry->getParent());
[5311]374
[5755]375    b->CreateLikelyCondBr(cond, resume, expand);
[5353]376
[5320]377    // EXPAND
[5755]378    b->SetInsertPoint(expand);
[5353]379
380    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
381    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
382
[5755]383    Value * newCapacity = b->CreateAdd(streamIndex, b->getSize(1));
384    newCapacity = b->CreateCeilLog2(newCapacity);
385    newCapacity = b->CreateShl(b->getSize(1), newCapacity, "newCapacity");
[5361]386
[5353]387    std::string tmp;
388    raw_string_ostream out(tmp);
389    out << "__expand";
390    elementType->print(out);
391    std::string name = out.str();
392
[5755]393    Module * const m = b->getModule();
[5353]394    Function * expandFunction = m->getFunction(name);
395
396    if (expandFunction == nullptr) {
397
[5755]398        const auto ip = b->saveIP();
[5353]399
[5755]400        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), b->getSizeTy(), b->getSizeTy()}, false);
[5353]401        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
402
403        auto args = expandFunction->arg_begin();
404        Value * streamSet = &*args++;
405        Value * capacity = &*args++;
406        Value * newCapacity = &*args;
407
[5755]408        BasicBlock * entry = BasicBlock::Create(b->getContext(), "entry", expandFunction);
409        b->SetInsertPoint(entry);
[5353]410
[5755]411        Value * size = b->CreateMul(newCapacity, b->getSize(mBufferBlocks));
412        const auto memAlign = std::max(b->getCacheAlignment(), b->getBitBlockWidth() / 8);
[5486]413
[5755]414        Value * newStreamSet = b->CreatePointerCast(b->CreateAlignedMalloc(b->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
415        Value * const diffCapacity = b->CreateMul(b->CreateSub(newCapacity, capacity), vectorWidth);
[5353]416
417        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
418        for (unsigned i = 0; i < mBufferBlocks; ++i) {
[5755]419            ConstantInt * const offset = b->getSize(i);
420            Value * srcOffset = b->CreateMul(capacity, offset);
421            Value * srcPtr = b->CreateGEP(streamSet, srcOffset);
422            Value * destOffset = b->CreateMul(newCapacity, offset);
423            Value * destPtr = b->CreateGEP(newStreamSet, destOffset);
424            b->CreateMemCpy(destPtr, srcPtr, b->CreateMul(capacity, vectorWidth), alignment);
425            Value * destZeroOffset = b->CreateAdd(destOffset, capacity);
426            Value * destZeroPtr = b->CreateGEP(newStreamSet, destZeroOffset);
427            b->CreateMemZero(destZeroPtr, diffCapacity, alignment);
[5353]428        }
429
[5755]430        b->CreateFree(streamSet);
[5353]431
[5755]432        b->CreateRet(newStreamSet);
[5353]433
[5755]434        b->restoreIP(ip);
[5320]435    }
[5311]436
[5755]437    Value * newStreamSet = b->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
438    b->CreateStore(newStreamSet, streamSetPtr);
439    b->CreateStore(newCapacity, capacityPtr);
[5353]440
[5755]441    b->CreateBr(resume);
[5353]442
[5320]443    // RESUME
[5755]444    b->SetInsertPoint(resume);
[5320]445
[5755]446    PHINode * phiStreamSet = b->CreatePHI(streamSet->getType(), 2);
[5320]447    phiStreamSet->addIncoming(streamSet, entry);
448    phiStreamSet->addIncoming(newStreamSet, expand);
449
[5755]450    PHINode * phiCapacity = b->CreatePHI(capacity->getType(), 2);
[5320]451    phiCapacity->addIncoming(capacity, entry);
452    phiCapacity->addIncoming(newCapacity, expand);
453
[5755]454    Value * offset = b->CreateAdd(b->CreateMul(blockIndex, phiCapacity), streamIndex);
[5320]455
456    return {phiStreamSet, offset};
[5260]457}
458
[5985]459Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
[5706]460    report_fatal_error("temporarily not supported");
461//    Value * ptr, * offset;
[5755]462//    std::tie(ptr, offset) = getInternalStreamBuffer(b, handle, streamIndex, blockIndex, readOnly);
463//    return b->CreateGEP(ptr, offset);
[5320]464}
465
[5985]466Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
[5706]467    report_fatal_error("temporarily not supported");
468//    Value * ptr, * offset;
[5755]469//    std::tie(ptr, offset) = getInternalStreamBuffer(b, handle, streamIndex, blockIndex, readOnly);
470//    return b->CreateGEP(ptr, {offset, packIndex});
[5320]471}
472
[5755]473Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const b, Value * const handle) const {
474    return b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(0)}));
[5329]475}
476
[5755]477Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
478    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
479        b->CreateAssert(handle, "handle cannot be null");
480    }
481    Value * const baseAddr = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(1)}));
482    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
483        b->CreateAssert(handle, "base address cannot be 0");
484    }
[5446]485    return baseAddr;
[5377]486}
487
[5544]488void ExpandableBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
489    b->CreateFree(getBaseAddress(b.get(), mStreamSetBufferPtr));
[5386]490}
491
[5650]492Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value *, bool) const {
[5320]493    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
[5311]494}
495
[5260]496
[5541]497Value * DynamicBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
[5755]498    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
499        b->CreateAssert(handle, "handle cannot be null");
500    }
[5985]501    Value * const p = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
[5541]502    Value * const addr = b->CreateLoad(p);
[5755]503    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
504        b->CreateAssert(addr, "base address cannot be 0");
505    }
[5541]506    return addr;
507}
508
[5755]509Value * DynamicBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * blockIndex) const {
[5985]510    Value * const workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(WorkingBlocks)}));
[5755]511    assert (blockIndex->getType() == workingBlocks->getType());
512    return b->CreateGEP(getBaseAddress(b, handle), b->CreateURem(blockIndex, workingBlocks));
[5541]513}
514
[5985]515Value * DynamicBuffer::getOverflowAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
516    Value * const workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(WorkingBlocks)}));
517    return b->CreateGEP(getBaseAddress(b, handle), workingBlocks);
518}
519
[5755]520Value * DynamicBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
[5706]521    Constant * blockSize = ConstantInt::get(absolutePosition->getType(), b->getBitBlockWidth());
[5755]522    Value * const absBlock = b->CreateUDiv(absolutePosition, blockSize);
[5706]523    Value * blockPos = b->CreateURem(absolutePosition, blockSize);
524    Value * blockPtr = getBlockAddress(b, handle, absBlock);
[5755]525    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
526    const auto bw = elemTy->getPrimitiveSizeInBits();
527    assert (is_power_2(bw));
[5541]528    if (bw < 8) {
529        blockPos = b->CreateUDiv(blockPos, ConstantInt::get(blockPos->getType(), 8 / bw));
530        blockPtr = b->CreatePointerCast(blockPtr, b->getInt8PtrTy());
531    } else {
[5755]532        blockPtr = b->CreatePointerCast(blockPtr, elemTy->getPointerTo());
[5541]533    }
534    return b->CreateGEP(blockPtr, blockPos);
535}
536
[5755]537Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
[5985]538    Value * const bufferSize = getBufferedSize(b, handle);
539    Value * const itemsFromBase = b->CreateURem(fromPosition, bufferSize);
[5639]540    if (reverse) {
[5985]541        Value * const bufAvail = b->CreateSelect(b->CreateIsNull(itemsFromBase), bufferSize, itemsFromBase);
542        return b->CreateUMin(availItems, bufAvail);
[5706]543    } else {
[5985]544        Value * capacity = bufferSize;
545        if (mOverflowBlocks) {
546            Constant * const overflow = b->getSize(mOverflowBlocks * b->getBitBlockWidth() - 1);
547            capacity = b->CreateAdd(bufferSize, overflow);
548        }
549        Value * const linearSpace = b->CreateSub(capacity, itemsFromBase);
550        return b->CreateUMin(availItems, linearSpace);
[5650]551    }
[5541]552}
553
[5985]554Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * consumed, bool reverse) const {
555    Value * const bufferSize = getBufferedSize(b, handle);
556    fromPosition = b->CreateURem(fromPosition, bufferSize);
[5618]557    if (reverse) {
[5985]558        return b->CreateSelect(b->CreateIsNull(fromPosition), bufferSize, fromPosition);
[5618]559    }
[5985]560    consumed = b->CreateURem(consumed, bufferSize);
561    Value * capacity = bufferSize;
562    if (mOverflowBlocks) {
563        Constant * const overflow = b->getSize(mOverflowBlocks * b->getBitBlockWidth() - 1);
564        capacity = b->CreateAdd(bufferSize, overflow);
565    }
566    Value * const limit = b->CreateSelect(b->CreateICmpULE(consumed, fromPosition), capacity, consumed);
567    return b->CreateSub(limit, fromPosition);
[5541]568}
569
[5755]570Value * DynamicBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle) const {
[5985]571    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(WorkingBlocks)});
[5755]572    return b->CreateMul(b->CreateLoad(ptr), b->getSize(b->getBitBlockWidth()));
[5612]573}
574
[5985]575
576inline StructType * getDynamicBufferStructType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * baseType, const unsigned addrSpace) {
577    IntegerType * sizeTy = b->getSizeTy();
578    PointerType * typePtr = baseType->getPointerTo(addrSpace);
579    return StructType::get(b->getContext(), {typePtr, typePtr, sizeTy, sizeTy, sizeTy, sizeTy, sizeTy});
580}
581
[5541]582void DynamicBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
[5985]583    Type * const structTy = getDynamicBufferStructType(b, mType, mAddressSpace);
584    Value * const handle = b->CreateCacheAlignedAlloca(structTy);
[5541]585    size_t numStreams = 1;
586    if (isa<ArrayType>(mBaseType)) {
587        numStreams = mBaseType->getArrayNumElements();
588    }
589    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
590    Value * bufSize = b->getSize((mBufferBlocks + mOverflowBlocks) * b->getBitBlockWidth() * numStreams * fieldWidth/8);
[5543]591    bufSize = b->CreateRoundUp(bufSize, b->getSize(b->getCacheAlignment()));
[5985]592    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
[5612]593    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
594    Value * bufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufSize), bufPtrType);
[5618]595    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
596        b->CallPrintInt("allocated: ", bufPtr);
597        b->CallPrintInt("allocated capacity: ", bufSize);
598    }
[5541]599    b->CreateStore(bufPtr, bufBasePtrField);
[5985]600    b->CreateStore(ConstantPointerNull::getNullValue(bufPtrType), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(PriorBaseAddress)}));
601    b->CreateStore(bufSize, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(AllocatedCapacity)}));
602    b->CreateStore(b->getSize(mBufferBlocks), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(WorkingBlocks)}));
603    b->CreateStore(b->getSize(-1), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Length)}));
604    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(ProducedPosition)}));
605    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(ConsumedPosition)}));
[5541]606    mStreamSetBufferPtr = handle;
607}
608
[5544]609void DynamicBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
[5755]610    Value * const handle = mStreamSetBufferPtr;
[5541]611    /* Free the dynamically allocated buffer, but not the stack-allocated buffer struct. */
[5985]612    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
[5612]613    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
[5985]614    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(PriorBaseAddress)});
[5612]615    BasicBlock * freePrior = b->CreateBasicBlock("freePrior");
616    BasicBlock * freeCurrent = b->CreateBasicBlock("freeCurrent");
617    Value * priorBuf = b->CreateLoad(priorBasePtrField);
618    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
619    b->CreateCondBr(priorBufIsNonNull, freePrior, freeCurrent);
620    b->SetInsertPoint(freePrior);
[5618]621    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
622        b->CallPrintInt("releasing: ", priorBuf);
623    }
[5612]624    b->CreateFree(priorBuf);
625    b->CreateBr(freeCurrent);
626    b->SetInsertPoint(freeCurrent);
627    b->CreateFree(b->CreateLoad(bufBasePtrField));
[5541]628}
629
[5612]630//
631//  Simple capacity doubling.  Use the circular buffer property: duplicating buffer data
632//  ensures that we have correct data.   TODO: consider optimizing based on actual
633//  consumer and producer positions.
634//
[5856]635void DynamicBuffer::doubleCapacity(IDISA::IDISA_Builder * const b, Value * const handle) const {
[5612]636    size_t numStreams = 1;
637    if (isa<ArrayType>(mBaseType)) {
638        numStreams = mBaseType->getArrayNumElements();
639    }
640    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
641    Constant * blockBytes = b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8);
[5985]642    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
[5612]643    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
[5985]644    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(PriorBaseAddress)});
645    Value * workingBlocksField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(WorkingBlocks)});
646    Value * capacityField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(AllocatedCapacity)});
[5706]647
[5612]648    Value * oldBufPtr = b->CreateLoad(bufBasePtrField);
[5618]649    Value * currentWorkingBlocks = b->CreateLoad(workingBlocksField);
[5612]650    Value * workingBytes = b->CreateMul(currentWorkingBlocks, blockBytes);
651    Value * const curAllocated = b->CreateLoad(capacityField);
652    Value * neededCapacity = b->CreateAdd(workingBytes, workingBytes);
653    if (mOverflowBlocks > 0) {
654        Constant * overflowBytes = b->getSize(mOverflowBlocks * b->getBitBlockWidth() * numStreams * fieldWidth/8);
655        neededCapacity = b->CreateAdd(neededCapacity, overflowBytes);
656    }
657    neededCapacity = b->CreateRoundUp(neededCapacity, b->getSize(b->getCacheAlignment()));
658    BasicBlock * doubleEntry = b->GetInsertBlock();
659    BasicBlock * doRealloc = b->CreateBasicBlock("doRealloc");
660    BasicBlock * doCopy2 = b->CreateBasicBlock("doCopy2");
661    b->CreateCondBr(b->CreateICmpULT(curAllocated, neededCapacity), doRealloc, doCopy2);
662    b->SetInsertPoint(doRealloc);
663    // If there is a non-null priorBasePtr, free it.
664    Value * priorBuf = b->CreateLoad(priorBasePtrField);
665    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
666    BasicBlock * deallocatePrior = b->CreateBasicBlock("deallocatePrior");
667    BasicBlock * allocateNew = b->CreateBasicBlock("allocateNew");
668    b->CreateCondBr(priorBufIsNonNull, deallocatePrior, allocateNew);
669    b->SetInsertPoint(deallocatePrior);
[5618]670    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
671        b->CallPrintInt("deallocating: ", priorBuf);
672    }
[5612]673    b->CreateFree(priorBuf);
674    b->CreateBr(allocateNew);
[5985]675
[5612]676    b->SetInsertPoint(allocateNew);
677    b->CreateStore(oldBufPtr, priorBasePtrField);
678    Value * newBufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(neededCapacity), bufPtrType);
[5618]679    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
680        b->CallPrintInt("re-allocated: ", newBufPtr);
681        b->CallPrintInt("allocated capacity: ", neededCapacity);
682    }
[5612]683    b->CreateStore(newBufPtr, bufBasePtrField);
684    createBlockCopy(b, newBufPtr, oldBufPtr, currentWorkingBlocks);
685    b->CreateStore(neededCapacity, capacityField);
686    b->CreateBr(doCopy2);
[5985]687
[5612]688    b->SetInsertPoint(doCopy2);
689    PHINode * bufPtr = b->CreatePHI(oldBufPtr->getType(), 2);
690    bufPtr->addIncoming(oldBufPtr, doubleEntry);
[5615]691    bufPtr->addIncoming(newBufPtr, allocateNew);
[5612]692    createBlockCopy(b, b->CreateGEP(bufPtr, currentWorkingBlocks), bufPtr, currentWorkingBlocks);
[5618]693    currentWorkingBlocks = b->CreateAdd(currentWorkingBlocks, currentWorkingBlocks);
694    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
695        b->CallPrintInt("currentWorkingBlocks: ", currentWorkingBlocks);
696    }
697    b->CreateStore(currentWorkingBlocks, workingBlocksField);
[5612]698}
[5541]699
[5755]700inline StructType * getSourceBufferType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * const type, const unsigned MemoryAddressSpace) {
701    return StructType::get(b->getContext(), {resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), b->getSizeTy()});
702}
703
[5706]704SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
[5755]705: StreamSetBuffer(BufferKind::SourceBuffer, type, getSourceBufferType(b, type, MemoryAddressSpace), 0, 0, StructAddressSpace) {
[5706]706    mUniqueID = "B";
707    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
708        mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
709    }
710}
711
[5755]712ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, Value * addr, unsigned AddressSpace)
713: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, 0, AddressSpace) {
[5706]714    mUniqueID = "E";
715    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
716    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
717}
718
719CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
[5755]720: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, 0, AddressSpace) {
[5706]721    mUniqueID = "C" + std::to_string(bufferBlocks);
722    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
723}
724
[5755]725CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
726: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, overflowBlocks, AddressSpace) {
[5706]727
728}
729
730CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
[5755]731: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, overflowBlocks, AddressSpace) {
[5985]732    if (overflowBlocks < 1) {
733        report_fatal_error("CircularCopybackBuffer: overflowBlocks < 1");
734    }
[5706]735    if (bufferBlocks < 2 * overflowBlocks) {
736        report_fatal_error("CircularCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
737    }
738    mUniqueID = "CC" + std::to_string(bufferBlocks);
739    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
740    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
741}
742
743ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
[5755]744: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, 0, AddressSpace)
[5706]745, mInitialCapacity(type->getArrayNumElements()) {
746    mUniqueID = "XP" + std::to_string(bufferBlocks);
747    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
748}
749
[5541]750DynamicBuffer::DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t initialCapacity, size_t overflow, unsigned swizzle, unsigned addrSpace)
[5985]751: StreamSetBuffer(BufferKind::DynamicBuffer, type, resolveStreamSetType(b, type), initialCapacity, overflow, addrSpace) {
[5641]752    if (initialCapacity * b->getBitBlockWidth() < 2 * overflow) {
753        report_fatal_error("DynamicBuffer: initialCapacity * b->getBitBlockWidth() < 2 * overflow");
754    }
[5541]755    mUniqueID = "DB";
756    if (swizzle != 1) {
757        mUniqueID += "s" + std::to_string(swizzle);
758    }
759        if (overflow != 0) {
760        mUniqueID += "o" + std::to_string(overflow);
761    }
762    if (addrSpace != 0) {
763        mUniqueID += "@" + std::to_string(addrSpace);
764    }
765}
766
767
[5755]768inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned OverflowBlocks, unsigned AddressSpace)
[5320]769: mBufferKind(k)
770, mType(resolvedType)
[5446]771, mBufferBlocks(BufferBlocks)
[5755]772, mOverflowBlocks(OverflowBlocks)
[5320]773, mAddressSpace(AddressSpace)
774, mStreamSetBufferPtr(nullptr)
[5408]775, mBaseType(baseType)
776, mProducer(nullptr) {
[5755]777    assert((k == BufferKind::SourceBuffer || k == BufferKind::ExternalBuffer) ^ (BufferBlocks > 0));
778    assert ("A zero length buffer cannot have overflow blocks!" && ((BufferBlocks > 0) || (OverflowBlocks == 0)));
[5320]779}
780
[5377]781StreamSetBuffer::~StreamSetBuffer() { }
782
[5320]783// Helper routines
[5436]784ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
[5320]785    unsigned numElements = 1;
786    if (LLVM_LIKELY(type->isArrayTy())) {
787        numElements = type->getArrayNumElements();
788        type = type->getArrayElementType();
789    }
790    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
791        type = type->getVectorElementType();
792        if (LLVM_LIKELY(type->isIntegerTy())) {
793            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
794            type = b->getBitBlockType();
795            if (fieldWidth != 1) {
796                type = ArrayType::get(type, fieldWidth);
[5307]797            }
[5320]798            return ArrayType::get(type, numElements);
[5307]799        }
800    }
801    std::string tmp;
802    raw_string_ostream out(tmp);
803    type->print(out);
804    out << " is an unvalid stream set buffer type.";
805    report_fatal_error(out.str());
806}
[5301]807
[5436]808StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
[5320]809    if (LLVM_LIKELY(type->isArrayTy())) {
810        type = type->getArrayElementType();
811    }
812    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
813        type = type->getVectorElementType();
814        if (LLVM_LIKELY(type->isIntegerTy())) {
815            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
816            type = b->getBitBlockType();
817            if (fieldWidth != 1) {
818                type = ArrayType::get(type, fieldWidth);
819            }
[5733]820            return StructType::get(b->getContext(), {b->getSizeTy(), type->getPointerTo()});
[5320]821        }
822    }
823    std::string tmp;
824    raw_string_ostream out(tmp);
825    type->print(out);
826    out << " is an unvalid stream set buffer type.";
827    report_fatal_error(out.str());
[5260]828}
Note: See TracBrowser for help on using the repository browser.