source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5985

Last change on this file since 5985 was 5985, checked in by nmedfort, 12 months ago

Restructured MultiBlock? kernel. Removal of Swizzled buffers. Inclusion of PopCount? rates / non-linear access. Modifications to several kernels to better align them with the kernel and pipeline changes.

File size: 41.7 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <llvm/IR/Module.h>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/kernel.h>
10#include <kernels/kernel_builder.h>
11#include <toolchain/toolchain.h>
12#include <llvm/Support/Debug.h>
13#include <llvm/Support/Format.h>
14
15namespace llvm { class Constant; }
16namespace llvm { class Function; }
17
18using namespace parabix;
19using namespace llvm;
20using namespace IDISA;
21
22inline static bool is_power_2(const uint64_t n) {
23    return ((n & (n - 1)) == 0) && n;
24}
25
26Type * StreamSetBuffer::getStreamSetBlockType() const { return mType;}
27
28ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
29
30StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
31
32void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
33    assert (mBufferBlocks > 0);
34    assert ("allocate buffer was called twice" && !mStreamSetBufferPtr);
35    Type * const ty = getType();
36    const auto blocks = (mBufferBlocks + mOverflowBlocks);
37    if (mAddressSpace == 0) {
38        Constant * size = ConstantExpr::getSizeOf(ty);
39        size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), blocks));
40        mStreamSetBufferPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(size), ty->getPointerTo());
41    } else {
42        mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(ty, b->getSize(blocks));
43    }
44}
45
46void StreamSetBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
47    if (mAddressSpace == 0) {
48        b->CreateFree(mStreamSetBufferPtr);
49    }
50}
51
52inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
53    return isa<ConstantInt>(index) ? cast<ConstantInt>(index)->getLimitedValue() < capacity : false;
54}
55
56Value * StreamSetBuffer::modBufferSize(IDISA::IDISA_Builder * const b, Value * const offset) const {
57    assert (offset->getType()->isIntegerTy());
58    if (mBufferBlocks == 0 || isCapacityGuaranteed(offset, mBufferBlocks)) {
59        return offset;
60    } else if (mBufferBlocks == 1) {
61        return ConstantInt::getNullValue(offset->getType());
62    } else if (is_power_2(mBufferBlocks)) {
63        return b->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
64    } else {
65        return b->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
66    }
67}
68
69Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
70    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
71        Value * const count = getStreamSetCount(b, handle);
72        Value * const index = b->CreateZExtOrTrunc(streamIndex, count->getType());
73        Value * const cond = b->CreateICmpULT(index, count);
74        b->CreateAssert(cond, "out-of-bounds stream access");
75    }
76    return b->CreateGEP(getBaseAddress(b, handle), {modBufferSize(b, blockIndex), streamIndex});
77}
78
79Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
80    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
81        Value * const count = getStreamSetCount(b, handle);
82        Value * const index = b->CreateZExtOrTrunc(streamIndex, count->getType());
83        Value * const cond = b->CreateICmpULT(index, count);
84        b->CreateAssert(cond, "out-of-bounds stream access");
85    }
86    return b->CreateGEP(getBaseAddress(b, handle), {modBufferSize(b, blockIndex), streamIndex, packIndex});
87}
88
89void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* addr */) const {
90    report_fatal_error("setBaseAddress is not supported by this buffer type");
91}
92
93Value * StreamSetBuffer::getOverflowAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
94    return b->CreateGEP(getBaseAddress(b, handle), b->getSize(mBufferBlocks));
95}
96
97Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * /* handle */) const {
98    return b->getSize(mBufferBlocks * b->getBitBlockWidth());
99}
100
101void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* size */) const {
102    report_fatal_error("setBufferedSize is not supported by this buffer type");
103}
104
105Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
106    return b->getSize(mBufferBlocks * b->getBitBlockWidth());
107}
108
109void StreamSetBuffer::setCapacity(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* c */) const {
110    report_fatal_error("setCapacity is not supported by this buffer type");
111}
112
113Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const b, Value *) const {
114    size_t count = 1;
115    if (isa<ArrayType>(mBaseType)) {
116        count = mBaseType->getArrayNumElements();
117    }
118    return b->getSize(count);
119}
120
121void StreamSetBuffer::doubleCapacity(IDISA::IDISA_Builder * const /* b */, Value */* handle */) const {
122    report_fatal_error("doubleCapacity is not supported by this buffer type");
123}
124
125/**
126 * @brief getRawItemPointer
127 *
128 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
129 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
130 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
131 */
132Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
133    Value * ptr = getBaseAddress(b, handle);
134    Value * relativePosition = absolutePosition;
135    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
136    const auto bw = elemTy->getPrimitiveSizeInBits();
137    assert (is_power_2(bw));
138    if (bw < 8) {
139        Constant * const fw = ConstantInt::get(relativePosition->getType(), 8 / bw);
140        if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
141            b->CreateAssertZero(b->CreateURem(absolutePosition, fw), "absolutePosition must be byte aligned");
142        }
143        relativePosition = b->CreateUDiv(relativePosition, fw);
144        ptr = b->CreatePointerCast(ptr, b->getInt8PtrTy());
145    } else {
146        ptr = b->CreatePointerCast(ptr, elemTy->getPointerTo());
147    }
148    return b->CreateGEP(ptr, relativePosition);
149}
150
151Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const /* handle */, Value * fromPosition, Value * availItems, bool reverse) const {
152    Type * const ty = fromPosition->getType();
153    const auto blockWidth = b->getBitBlockWidth();
154    Constant * const bufferSize = ConstantInt::get(ty, mBufferBlocks * blockWidth);
155    Value * const itemsFromBase = b->CreateURem(fromPosition, bufferSize);
156    if (reverse) {
157        Value * const bufAvail = b->CreateSelect(b->CreateIsNull(itemsFromBase), bufferSize, itemsFromBase);
158        return b->CreateUMin(availItems, bufAvail);
159    } else {
160        Constant * capacity = bufferSize;
161        if (mOverflowBlocks) {
162            capacity = ConstantInt::get(ty, (mBufferBlocks + mOverflowBlocks) * blockWidth - 1);
163        }
164        Value * const linearSpace = b->CreateSub(capacity, itemsFromBase);
165        return b->CreateUMin(availItems, linearSpace);
166    }
167}
168
169Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const /* handle */, Value * fromPosition, Value * consumed, bool reverse) const {
170    Type * const ty = fromPosition->getType();
171    const auto blockWidth = b->getBitBlockWidth();
172    Constant * const bufferSize = ConstantInt::get(ty, mBufferBlocks * blockWidth);
173    fromPosition = b->CreateURem(fromPosition, bufferSize);
174    if (reverse) {
175        return b->CreateSelect(b->CreateIsNull(fromPosition), bufferSize, fromPosition);
176    }
177    consumed = b->CreateURem(consumed, bufferSize);
178    Constant * capacity = bufferSize;
179    if (mOverflowBlocks) {
180        capacity = ConstantInt::get(ty, (mBufferBlocks + mOverflowBlocks) * blockWidth - 1);
181    }
182    Value * const limit = b->CreateSelect(b->CreateICmpULE(consumed, fromPosition), capacity, consumed);
183    return b->CreateSub(limit, fromPosition);
184}
185
186Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
187    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
188        b->CreateAssert(handle, "handle cannot be null");
189    }
190    return handle;
191}
192
193void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const b, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
194    Type * i8ptr = b->getInt8PtrTy();
195    unsigned alignment = b->getBitBlockWidth() / 8;
196    size_t numStreams = 1;
197    if (isa<ArrayType>(mBaseType)) {
198        numStreams = mBaseType->getArrayNumElements();
199    }
200    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
201    Value * blockCopyBytes = b->CreateMul(blocksToCopy, b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8));
202    b->CreateMemMove(b->CreateBitCast(targetBlockPtr, i8ptr), b->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
203}
204
205// Source File Buffer
206Type * SourceBuffer::getStreamSetBlockType() const {
207    return cast<PointerType>(mType->getStructElementType(BaseAddress))->getElementType();
208}
209
210Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle) const {
211    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BufferedSize)});
212    return b->CreateLoad(ptr);
213}
214
215void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle, Value * size) const {
216    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BufferedSize)});
217    b->CreateStore(size, ptr);
218}
219
220Value * SourceBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const handle) const {
221    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Capacity)});
222    return b->CreateLoad(ptr);
223}
224
225void SourceBuffer::setCapacity(IDISA::IDISA_Builder * const b, Value * const handle, Value * c) const {
226    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Capacity)});
227    b->CreateStore(c, ptr);
228}
229
230void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr) const {
231    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
232        b->CreateAssert(handle, "handle cannot be null");
233    }
234    Value * const ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
235    Type * const ptrTy = ptr->getType()->getPointerElementType();
236    if (LLVM_LIKELY(isa<PointerType>(addr->getType()))) {
237        const auto ptrSpace = cast<PointerType>(ptr->getType())->getAddressSpace();
238        const auto addrSpace = cast<PointerType>(ptrTy)->getAddressSpace();
239        if (LLVM_UNLIKELY(addrSpace != ptrSpace)) {
240            report_fatal_error("SourceBuffer: base address was declared with address space "
241                                     + std::to_string(ptrSpace)
242                                     + " but given a pointer in address space "
243                                     + std::to_string(addrSpace));
244        }
245    } else {
246        report_fatal_error("SourceBuffer: base address is not a pointer type");
247    }
248    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
249        b->CreateAssert(ptr, "SourceBuffer: base address cannot be zero");
250        DataLayout DL(b->getModule());
251        IntegerType * const intPtrTy = b->getIntPtrTy(DL, cast<PointerType>(ptrTy)->getAddressSpace());
252        Value * const notAligned = b->CreateURem(b->CreatePtrToInt(ptr, intPtrTy), ConstantInt::get(intPtrTy, b->getBitBlockWidth() / 8));
253        b->CreateAssertZero(notAligned, "SourceBuffer: base address is not aligned with the bit block width");
254    }
255    b->CreateStore(b->CreatePointerCast(addr, ptrTy), ptr);
256}
257
258Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
259    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
260        b->CreateAssert(handle, "handle cannot be null");
261    }
262    Value * const ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
263    return b->CreateLoad(ptr);
264}
265
266Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
267    if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
268    Value * maxAvail = b->CreateNUWSub(getBufferedSize(b, handle), fromPosition);
269    return b->CreateSelect(b->CreateICmpULT(availItems, maxAvail), availItems, maxAvail);
270}
271
272Value * SourceBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value *consumed, bool reverse) const {
273    report_fatal_error("SourceBuffers cannot be written");
274}
275
276void SourceBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
277    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
278        Type * const ty = getType();
279        mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(ty, b->getSize(mBufferBlocks));
280        b->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, b->getCacheAlignment());
281    } else {
282        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
283    }
284}
285
286void SourceBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
287
288}
289
290// External File Buffer
291void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
292    report_fatal_error("External buffers cannot be allocated.");
293}
294
295void ExternalBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> &) const {
296
297}
298
299Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value * availItems, const bool reverse) const {
300    // All available items can be accessed.
301    return reverse ? ConstantInt::getAllOnesValue(availItems->getType()) : availItems;
302}
303
304Value * ExternalBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const, Value *, Value * fromPosition, Value *consumed, const bool reverse) const {
305    // Trust that the buffer is large enough to write any amount
306    return reverse ? fromPosition : ConstantInt::getAllOnesValue(fromPosition->getType());
307}
308
309Value * ExternalBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
310    return ConstantInt::getAllOnesValue(b->getSizeTy());
311}
312
313Value * ExternalBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
314    return ConstantInt::getAllOnesValue(b->getSizeTy());
315}
316
317// Circular Buffer
318Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
319    Value * ptr = getBaseAddress(b, handle);
320    Value * relativePosition = b->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * b->getBitBlockWidth()));
321    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
322    const auto bw = elemTy->getPrimitiveSizeInBits();
323    assert (is_power_2(bw));
324    if (bw < 8) {
325        Constant * const fw = ConstantInt::get(relativePosition->getType(), 8 / bw);
326        relativePosition = b->CreateUDiv(relativePosition, fw);
327        ptr = b->CreatePointerCast(ptr, b->getInt8PtrTy());
328    } else {
329        ptr = b->CreatePointerCast(ptr, elemTy->getPointerTo());
330    }
331    return b->CreateGEP(ptr, relativePosition);
332}
333
334// Expandable Buffer
335
336void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
337    mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(getType());
338    Value * const capacityPtr = b->CreateGEP(mStreamSetBufferPtr, {b->getInt32(0), b->getInt32(0)});
339    b->CreateStore(b->getSize(mInitialCapacity), capacityPtr);
340    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
341    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), b->getSizeTy(), false);
342    Constant * const size = ConstantExpr::getMul(b->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
343    const auto alignment = std::max(b->getCacheAlignment(), b->getBitBlockWidth() / 8);
344    Value * const ptr = b->CreateAlignedMalloc(size, alignment);
345    b->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
346    Value * const streamSetPtr = b->CreateGEP(mStreamSetBufferPtr, {b->getInt32(0), b->getInt32(1)});
347    b->CreateStore(b->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
348}
349
350std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
351
352    // ENTRY
353    Value * const capacityPtr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(0)});
354    Value * const capacity = b->CreateLoad(capacityPtr);
355    Value * const streamSetPtr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(1)});
356    Value * const streamSet = b->CreateLoad(streamSetPtr);
357    blockIndex = modBufferSize(b, blockIndex);
358
359    assert (streamIndex->getType() == capacity->getType());
360    Value * const cond = b->CreateICmpULT(streamIndex, capacity);
361
362    // Are we guaranteed that we can access this stream?
363    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
364        if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
365            b->CreateAssert(cond, "out-of-bounds stream access");
366        }
367        Value * offset = b->CreateAdd(b->CreateMul(blockIndex, capacity), streamIndex);
368        return {streamSet, offset};
369    }
370
371    BasicBlock * const entry = b->GetInsertBlock();
372    BasicBlock * const expand = BasicBlock::Create(b->getContext(), "expand", entry->getParent());
373    BasicBlock * const resume = BasicBlock::Create(b->getContext(), "resume", entry->getParent());
374
375    b->CreateLikelyCondBr(cond, resume, expand);
376
377    // EXPAND
378    b->SetInsertPoint(expand);
379
380    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
381    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
382
383    Value * newCapacity = b->CreateAdd(streamIndex, b->getSize(1));
384    newCapacity = b->CreateCeilLog2(newCapacity);
385    newCapacity = b->CreateShl(b->getSize(1), newCapacity, "newCapacity");
386
387    std::string tmp;
388    raw_string_ostream out(tmp);
389    out << "__expand";
390    elementType->print(out);
391    std::string name = out.str();
392
393    Module * const m = b->getModule();
394    Function * expandFunction = m->getFunction(name);
395
396    if (expandFunction == nullptr) {
397
398        const auto ip = b->saveIP();
399
400        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), b->getSizeTy(), b->getSizeTy()}, false);
401        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
402
403        auto args = expandFunction->arg_begin();
404        Value * streamSet = &*args++;
405        Value * capacity = &*args++;
406        Value * newCapacity = &*args;
407
408        BasicBlock * entry = BasicBlock::Create(b->getContext(), "entry", expandFunction);
409        b->SetInsertPoint(entry);
410
411        Value * size = b->CreateMul(newCapacity, b->getSize(mBufferBlocks));
412        const auto memAlign = std::max(b->getCacheAlignment(), b->getBitBlockWidth() / 8);
413
414        Value * newStreamSet = b->CreatePointerCast(b->CreateAlignedMalloc(b->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
415        Value * const diffCapacity = b->CreateMul(b->CreateSub(newCapacity, capacity), vectorWidth);
416
417        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
418        for (unsigned i = 0; i < mBufferBlocks; ++i) {
419            ConstantInt * const offset = b->getSize(i);
420            Value * srcOffset = b->CreateMul(capacity, offset);
421            Value * srcPtr = b->CreateGEP(streamSet, srcOffset);
422            Value * destOffset = b->CreateMul(newCapacity, offset);
423            Value * destPtr = b->CreateGEP(newStreamSet, destOffset);
424            b->CreateMemCpy(destPtr, srcPtr, b->CreateMul(capacity, vectorWidth), alignment);
425            Value * destZeroOffset = b->CreateAdd(destOffset, capacity);
426            Value * destZeroPtr = b->CreateGEP(newStreamSet, destZeroOffset);
427            b->CreateMemZero(destZeroPtr, diffCapacity, alignment);
428        }
429
430        b->CreateFree(streamSet);
431
432        b->CreateRet(newStreamSet);
433
434        b->restoreIP(ip);
435    }
436
437    Value * newStreamSet = b->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
438    b->CreateStore(newStreamSet, streamSetPtr);
439    b->CreateStore(newCapacity, capacityPtr);
440
441    b->CreateBr(resume);
442
443    // RESUME
444    b->SetInsertPoint(resume);
445
446    PHINode * phiStreamSet = b->CreatePHI(streamSet->getType(), 2);
447    phiStreamSet->addIncoming(streamSet, entry);
448    phiStreamSet->addIncoming(newStreamSet, expand);
449
450    PHINode * phiCapacity = b->CreatePHI(capacity->getType(), 2);
451    phiCapacity->addIncoming(capacity, entry);
452    phiCapacity->addIncoming(newCapacity, expand);
453
454    Value * offset = b->CreateAdd(b->CreateMul(blockIndex, phiCapacity), streamIndex);
455
456    return {phiStreamSet, offset};
457}
458
459Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
460    report_fatal_error("temporarily not supported");
461//    Value * ptr, * offset;
462//    std::tie(ptr, offset) = getInternalStreamBuffer(b, handle, streamIndex, blockIndex, readOnly);
463//    return b->CreateGEP(ptr, offset);
464}
465
466Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
467    report_fatal_error("temporarily not supported");
468//    Value * ptr, * offset;
469//    std::tie(ptr, offset) = getInternalStreamBuffer(b, handle, streamIndex, blockIndex, readOnly);
470//    return b->CreateGEP(ptr, {offset, packIndex});
471}
472
473Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const b, Value * const handle) const {
474    return b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(0)}));
475}
476
477Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
478    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
479        b->CreateAssert(handle, "handle cannot be null");
480    }
481    Value * const baseAddr = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(1)}));
482    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
483        b->CreateAssert(handle, "base address cannot be 0");
484    }
485    return baseAddr;
486}
487
488void ExpandableBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
489    b->CreateFree(getBaseAddress(b.get(), mStreamSetBufferPtr));
490}
491
492Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value *, bool) const {
493    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
494}
495
496
497Value * DynamicBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
498    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
499        b->CreateAssert(handle, "handle cannot be null");
500    }
501    Value * const p = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
502    Value * const addr = b->CreateLoad(p);
503    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
504        b->CreateAssert(addr, "base address cannot be 0");
505    }
506    return addr;
507}
508
509Value * DynamicBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * blockIndex) const {
510    Value * const workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(WorkingBlocks)}));
511    assert (blockIndex->getType() == workingBlocks->getType());
512    return b->CreateGEP(getBaseAddress(b, handle), b->CreateURem(blockIndex, workingBlocks));
513}
514
515Value * DynamicBuffer::getOverflowAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
516    Value * const workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(WorkingBlocks)}));
517    return b->CreateGEP(getBaseAddress(b, handle), workingBlocks);
518}
519
520Value * DynamicBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
521    Constant * blockSize = ConstantInt::get(absolutePosition->getType(), b->getBitBlockWidth());
522    Value * const absBlock = b->CreateUDiv(absolutePosition, blockSize);
523    Value * blockPos = b->CreateURem(absolutePosition, blockSize);
524    Value * blockPtr = getBlockAddress(b, handle, absBlock);
525    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
526    const auto bw = elemTy->getPrimitiveSizeInBits();
527    assert (is_power_2(bw));
528    if (bw < 8) {
529        blockPos = b->CreateUDiv(blockPos, ConstantInt::get(blockPos->getType(), 8 / bw));
530        blockPtr = b->CreatePointerCast(blockPtr, b->getInt8PtrTy());
531    } else {
532        blockPtr = b->CreatePointerCast(blockPtr, elemTy->getPointerTo());
533    }
534    return b->CreateGEP(blockPtr, blockPos);
535}
536
537//Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
538//    Value * const bufferSize = getBufferedSize(b, handle);
539//    assert (bufferSize->getType() == fromPosition->getType());
540//    Value * itemsFromBase = b->CreateURem(fromPosition, bufferSize);
541//    if (reverse) {
542//        Value * bufAvail = b->CreateSelect(b->CreateIsNull(itemsFromBase), bufferSize, itemsFromBase);
543//        return b->CreateSelect(b->CreateICmpULT(bufAvail, availItems), bufAvail, availItems);
544//    } else {
545//        Constant * const overflow = ConstantInt::get(bufBlocks->getType(), mOverflowBlocks * b->getBitBlockWidth() - 1);
546//        Value * const linearSpace = b->CreateAdd(bufferSize, overflow);
547//        Value * remaining = b->CreateSub(linearSpace, itemsFromBase);
548//        return b->CreateSelect(b->CreateICmpULT(availItems, remaining), availItems, remaining);
549//    }
550//}
551
552//Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * consumed, bool reverse) const {
553//    Value * const bufferSize = getBufferedSize(b, handle);
554//    assert (bufferSize->getType() == fromPosition->getType());
555//    Value * bufRem = b->CreateURem(fromPosition, bufferSize);
556//    if (reverse) {
557//        return b->CreateSelect(b->CreateIsNull(bufRem), bufferSize, bufRem);
558//    }
559//    Constant * const overflow = ConstantInt::get(bufBlocks->getType(), mOverflowBlocks * b->getBitBlockWidth() - 1);
560//    Value * const linearSpace = b->CreateAdd(bufferSize, overflow);
561//    return b->CreateSub(linearSpace, bufRem);
562//}
563
564Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
565    Value * const bufferSize = getBufferedSize(b, handle);
566    Value * const itemsFromBase = b->CreateURem(fromPosition, bufferSize);
567    if (reverse) {
568        Value * const bufAvail = b->CreateSelect(b->CreateIsNull(itemsFromBase), bufferSize, itemsFromBase);
569        return b->CreateUMin(availItems, bufAvail);
570    } else {
571        Value * capacity = bufferSize;
572        if (mOverflowBlocks) {
573            Constant * const overflow = b->getSize(mOverflowBlocks * b->getBitBlockWidth() - 1);
574            capacity = b->CreateAdd(bufferSize, overflow);
575        }
576        Value * const linearSpace = b->CreateSub(capacity, itemsFromBase);
577        return b->CreateUMin(availItems, linearSpace);
578    }
579}
580
581Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * consumed, bool reverse) const {
582    Value * const bufferSize = getBufferedSize(b, handle);
583    fromPosition = b->CreateURem(fromPosition, bufferSize);
584    if (reverse) {
585        return b->CreateSelect(b->CreateIsNull(fromPosition), bufferSize, fromPosition);
586    }
587    consumed = b->CreateURem(consumed, bufferSize);
588    Value * capacity = bufferSize;
589    if (mOverflowBlocks) {
590        Constant * const overflow = b->getSize(mOverflowBlocks * b->getBitBlockWidth() - 1);
591        capacity = b->CreateAdd(bufferSize, overflow);
592    }
593    Value * const limit = b->CreateSelect(b->CreateICmpULE(consumed, fromPosition), capacity, consumed);
594    return b->CreateSub(limit, fromPosition);
595}
596
597
598
599Value * DynamicBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle) const {
600    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(WorkingBlocks)});
601    return b->CreateMul(b->CreateLoad(ptr), b->getSize(b->getBitBlockWidth()));
602}
603
604
605inline StructType * getDynamicBufferStructType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * baseType, const unsigned addrSpace) {
606    IntegerType * sizeTy = b->getSizeTy();
607    PointerType * typePtr = baseType->getPointerTo(addrSpace);
608    return StructType::get(b->getContext(), {typePtr, typePtr, sizeTy, sizeTy, sizeTy, sizeTy, sizeTy});
609}
610
611void DynamicBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
612    Type * const structTy = getDynamicBufferStructType(b, mType, mAddressSpace);
613    Value * const handle = b->CreateCacheAlignedAlloca(structTy);
614    size_t numStreams = 1;
615    if (isa<ArrayType>(mBaseType)) {
616        numStreams = mBaseType->getArrayNumElements();
617    }
618    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
619    Value * bufSize = b->getSize((mBufferBlocks + mOverflowBlocks) * b->getBitBlockWidth() * numStreams * fieldWidth/8);
620    bufSize = b->CreateRoundUp(bufSize, b->getSize(b->getCacheAlignment()));
621    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
622    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
623    Value * bufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufSize), bufPtrType);
624    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
625        b->CallPrintInt("allocated: ", bufPtr);
626        b->CallPrintInt("allocated capacity: ", bufSize);
627    }
628    b->CreateStore(bufPtr, bufBasePtrField);
629    b->CreateStore(ConstantPointerNull::getNullValue(bufPtrType), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(PriorBaseAddress)}));
630    b->CreateStore(bufSize, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(AllocatedCapacity)}));
631    b->CreateStore(b->getSize(mBufferBlocks), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(WorkingBlocks)}));
632    b->CreateStore(b->getSize(-1), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Length)}));
633    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(ProducedPosition)}));
634    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(ConsumedPosition)}));
635    mStreamSetBufferPtr = handle;
636}
637
638void DynamicBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
639    Value * const handle = mStreamSetBufferPtr;
640    /* Free the dynamically allocated buffer, but not the stack-allocated buffer struct. */
641    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
642    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
643    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(PriorBaseAddress)});
644    BasicBlock * freePrior = b->CreateBasicBlock("freePrior");
645    BasicBlock * freeCurrent = b->CreateBasicBlock("freeCurrent");
646    Value * priorBuf = b->CreateLoad(priorBasePtrField);
647    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
648    b->CreateCondBr(priorBufIsNonNull, freePrior, freeCurrent);
649    b->SetInsertPoint(freePrior);
650    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
651        b->CallPrintInt("releasing: ", priorBuf);
652    }
653    b->CreateFree(priorBuf);
654    b->CreateBr(freeCurrent);
655    b->SetInsertPoint(freeCurrent);
656    b->CreateFree(b->CreateLoad(bufBasePtrField));
657}
658
659//
660//  Simple capacity doubling.  Use the circular buffer property: duplicating buffer data
661//  ensures that we have correct data.   TODO: consider optimizing based on actual
662//  consumer and producer positions.
663//
664void DynamicBuffer::doubleCapacity(IDISA::IDISA_Builder * const b, Value * const handle) const {
665    size_t numStreams = 1;
666    if (isa<ArrayType>(mBaseType)) {
667        numStreams = mBaseType->getArrayNumElements();
668    }
669    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
670    Constant * blockBytes = b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8);
671    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
672    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
673    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(PriorBaseAddress)});
674    Value * workingBlocksField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(WorkingBlocks)});
675    Value * capacityField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(AllocatedCapacity)});
676
677    Value * oldBufPtr = b->CreateLoad(bufBasePtrField);
678    Value * currentWorkingBlocks = b->CreateLoad(workingBlocksField);
679    Value * workingBytes = b->CreateMul(currentWorkingBlocks, blockBytes);
680    Value * const curAllocated = b->CreateLoad(capacityField);
681    Value * neededCapacity = b->CreateAdd(workingBytes, workingBytes);
682    if (mOverflowBlocks > 0) {
683        Constant * overflowBytes = b->getSize(mOverflowBlocks * b->getBitBlockWidth() * numStreams * fieldWidth/8);
684        neededCapacity = b->CreateAdd(neededCapacity, overflowBytes);
685    }
686    neededCapacity = b->CreateRoundUp(neededCapacity, b->getSize(b->getCacheAlignment()));
687    BasicBlock * doubleEntry = b->GetInsertBlock();
688    BasicBlock * doRealloc = b->CreateBasicBlock("doRealloc");
689    BasicBlock * doCopy2 = b->CreateBasicBlock("doCopy2");
690    b->CreateCondBr(b->CreateICmpULT(curAllocated, neededCapacity), doRealloc, doCopy2);
691    b->SetInsertPoint(doRealloc);
692    // If there is a non-null priorBasePtr, free it.
693    Value * priorBuf = b->CreateLoad(priorBasePtrField);
694    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
695    BasicBlock * deallocatePrior = b->CreateBasicBlock("deallocatePrior");
696    BasicBlock * allocateNew = b->CreateBasicBlock("allocateNew");
697    b->CreateCondBr(priorBufIsNonNull, deallocatePrior, allocateNew);
698    b->SetInsertPoint(deallocatePrior);
699    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
700        b->CallPrintInt("deallocating: ", priorBuf);
701    }
702    b->CreateFree(priorBuf);
703    b->CreateBr(allocateNew);
704
705    b->SetInsertPoint(allocateNew);
706    b->CreateStore(oldBufPtr, priorBasePtrField);
707    Value * newBufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(neededCapacity), bufPtrType);
708    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
709        b->CallPrintInt("re-allocated: ", newBufPtr);
710        b->CallPrintInt("allocated capacity: ", neededCapacity);
711    }
712    b->CreateStore(newBufPtr, bufBasePtrField);
713    createBlockCopy(b, newBufPtr, oldBufPtr, currentWorkingBlocks);
714    b->CreateStore(neededCapacity, capacityField);
715    b->CreateBr(doCopy2);
716
717    b->SetInsertPoint(doCopy2);
718    PHINode * bufPtr = b->CreatePHI(oldBufPtr->getType(), 2);
719    bufPtr->addIncoming(oldBufPtr, doubleEntry);
720    bufPtr->addIncoming(newBufPtr, allocateNew);
721    createBlockCopy(b, b->CreateGEP(bufPtr, currentWorkingBlocks), bufPtr, currentWorkingBlocks);
722    currentWorkingBlocks = b->CreateAdd(currentWorkingBlocks, currentWorkingBlocks);
723    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
724        b->CallPrintInt("currentWorkingBlocks: ", currentWorkingBlocks);
725    }
726    b->CreateStore(currentWorkingBlocks, workingBlocksField);
727}
728
729inline StructType * getSourceBufferType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * const type, const unsigned MemoryAddressSpace) {
730    return StructType::get(b->getContext(), {resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), b->getSizeTy()});
731}
732
733SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
734: StreamSetBuffer(BufferKind::SourceBuffer, type, getSourceBufferType(b, type, MemoryAddressSpace), 0, 0, StructAddressSpace) {
735    mUniqueID = "B";
736    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
737        mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
738    }
739}
740
741ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, Value * addr, unsigned AddressSpace)
742: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, 0, AddressSpace) {
743    mUniqueID = "E";
744    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
745    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
746}
747
748CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
749: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, 0, AddressSpace) {
750    mUniqueID = "C" + std::to_string(bufferBlocks);
751    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
752}
753
754CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
755: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, overflowBlocks, AddressSpace) {
756
757}
758
759CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
760: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, overflowBlocks, AddressSpace) {
761    if (overflowBlocks < 1) {
762        report_fatal_error("CircularCopybackBuffer: overflowBlocks < 1");
763    }
764    if (bufferBlocks < 2 * overflowBlocks) {
765        report_fatal_error("CircularCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
766    }
767    mUniqueID = "CC" + std::to_string(bufferBlocks);
768    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
769    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
770}
771
772ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
773: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, 0, AddressSpace)
774, mInitialCapacity(type->getArrayNumElements()) {
775    mUniqueID = "XP" + std::to_string(bufferBlocks);
776    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
777}
778
779DynamicBuffer::DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t initialCapacity, size_t overflow, unsigned swizzle, unsigned addrSpace)
780: StreamSetBuffer(BufferKind::DynamicBuffer, type, resolveStreamSetType(b, type), initialCapacity, overflow, addrSpace) {
781    if (initialCapacity * b->getBitBlockWidth() < 2 * overflow) {
782        report_fatal_error("DynamicBuffer: initialCapacity * b->getBitBlockWidth() < 2 * overflow");
783    }
784    mUniqueID = "DB";
785    if (swizzle != 1) {
786        mUniqueID += "s" + std::to_string(swizzle);
787    }
788        if (overflow != 0) {
789        mUniqueID += "o" + std::to_string(overflow);
790    }
791    if (addrSpace != 0) {
792        mUniqueID += "@" + std::to_string(addrSpace);
793    }
794}
795
796
797inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned OverflowBlocks, unsigned AddressSpace)
798: mBufferKind(k)
799, mType(resolvedType)
800, mBufferBlocks(BufferBlocks)
801, mOverflowBlocks(OverflowBlocks)
802, mAddressSpace(AddressSpace)
803, mStreamSetBufferPtr(nullptr)
804, mBaseType(baseType)
805, mProducer(nullptr) {
806    assert((k == BufferKind::SourceBuffer || k == BufferKind::ExternalBuffer) ^ (BufferBlocks > 0));
807    assert ("A zero length buffer cannot have overflow blocks!" && ((BufferBlocks > 0) || (OverflowBlocks == 0)));
808}
809
810StreamSetBuffer::~StreamSetBuffer() { }
811
812// Helper routines
813ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
814    unsigned numElements = 1;
815    if (LLVM_LIKELY(type->isArrayTy())) {
816        numElements = type->getArrayNumElements();
817        type = type->getArrayElementType();
818    }
819    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
820        type = type->getVectorElementType();
821        if (LLVM_LIKELY(type->isIntegerTy())) {
822            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
823            type = b->getBitBlockType();
824            if (fieldWidth != 1) {
825                type = ArrayType::get(type, fieldWidth);
826            }
827            return ArrayType::get(type, numElements);
828        }
829    }
830    std::string tmp;
831    raw_string_ostream out(tmp);
832    type->print(out);
833    out << " is an unvalid stream set buffer type.";
834    report_fatal_error(out.str());
835}
836
837StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
838    if (LLVM_LIKELY(type->isArrayTy())) {
839        type = type->getArrayElementType();
840    }
841    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
842        type = type->getVectorElementType();
843        if (LLVM_LIKELY(type->isIntegerTy())) {
844            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
845            type = b->getBitBlockType();
846            if (fieldWidth != 1) {
847                type = ArrayType::get(type, fieldWidth);
848            }
849            return StructType::get(b->getContext(), {b->getSizeTy(), type->getPointerTo()});
850        }
851    }
852    std::string tmp;
853    raw_string_ostream out(tmp);
854    type->print(out);
855    out << " is an unvalid stream set buffer type.";
856    report_fatal_error(out.str());
857}
Note: See TracBrowser for help on using the repository browser.