source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5941

Last change on this file since 5941 was 5856, checked in by nmedfort, 18 months ago

Revised pipeline structure to better control I/O rates

File size: 46.8 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <llvm/IR/Module.h>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/kernel.h>
10#include <kernels/kernel_builder.h>
11#include <toolchain/toolchain.h>
12#include <llvm/Support/Debug.h>
13#include <llvm/Support/Format.h>
14
15namespace llvm { class Constant; }
16namespace llvm { class Function; }
17
18using namespace parabix;
19using namespace llvm;
20using namespace IDISA;
21
22inline static bool is_power_2(const uint64_t n) {
23    return ((n & (n - 1)) == 0) && n;
24}
25
26Type * StreamSetBuffer::getStreamSetBlockType() const { return mType;}
27
28ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
29
30StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
31
32void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
33    assert (mBufferBlocks > 0);
34    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
35        Type * const ty = getType();
36        if (mAddressSpace == 0) {
37            Constant * size = ConstantExpr::getSizeOf(ty);
38            size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks));
39            mStreamSetBufferPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(size), ty->getPointerTo());
40        } else {
41            mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(ty, b->getSize(mBufferBlocks));
42        }
43        b->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, b->getCacheAlignment());
44    } else {
45        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
46    }
47}
48
49void StreamSetBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
50    if (mAddressSpace == 0) {
51        b->CreateFree(mStreamSetBufferPtr);
52    }
53}
54
55inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
56    return isa<ConstantInt>(index) ? cast<ConstantInt>(index)->getLimitedValue() < capacity : false;
57}
58
59Value * StreamSetBuffer::modBufferSize(IDISA::IDISA_Builder * const b, Value * const offset) const {
60    assert (offset->getType()->isIntegerTy());
61    if (mBufferBlocks == 0 || isCapacityGuaranteed(offset, mBufferBlocks)) {
62        return offset;
63    } else if (mBufferBlocks == 1) {
64        return ConstantInt::getNullValue(offset->getType());
65    } else if (is_power_2(mBufferBlocks)) {
66        return b->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
67    } else {
68        return b->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
69    }
70}
71
72Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
73    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
74        Value * const count = getStreamSetCount(b, handle);
75        Value * const index = b->CreateZExtOrTrunc(streamIndex, count->getType());
76        Value * const cond = b->CreateICmpULT(index, count);
77        b->CreateAssert(cond, "out-of-bounds stream access");
78    }
79    return b->CreateGEP(addr, {modBufferSize(b, blockIndex), streamIndex});
80}
81
82Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
83    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
84        Value * const count = getStreamSetCount(b, handle);
85        Value * const index = b->CreateZExtOrTrunc(streamIndex, count->getType());
86        Value * const cond = b->CreateICmpULT(index, count);
87        b->CreateAssert(cond, "out-of-bounds stream access");
88    }
89    return b->CreateGEP(addr, {modBufferSize(b, blockIndex), streamIndex, packIndex});
90}
91
92void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* addr */) const {
93    report_fatal_error("setBaseAddress is not supported by this buffer type");
94}
95
96Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * /* handle */) const {
97    return b->getSize(mBufferBlocks * b->getBitBlockWidth());
98}
99
100void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* size */) const {
101    report_fatal_error("setBufferedSize is not supported by this buffer type");
102}
103
104Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
105    return b->getSize(mBufferBlocks * b->getBitBlockWidth());
106}
107
108void StreamSetBuffer::setCapacity(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* c */) const {
109    report_fatal_error("setCapacity is not supported by this buffer type");
110}
111
112Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const b, Value *) const {
113    size_t count = 1;
114    if (isa<ArrayType>(mBaseType)) {
115        count = mBaseType->getArrayNumElements();
116    }
117    return b->getSize(count);
118}
119
120void StreamSetBuffer::doubleCapacity(IDISA::IDISA_Builder * const /* b */, Value */* handle */) const {
121    report_fatal_error("doubleCapacity is not supported by this buffer type");
122}
123
124/**
125 * @brief getRawItemPointer
126 *
127 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
128 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
129 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
130 */
131Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
132    Value * ptr = getBaseAddress(b, handle);
133    Value * relativePosition = absolutePosition;
134    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
135    const auto bw = elemTy->getPrimitiveSizeInBits();
136    assert (is_power_2(bw));
137    if (bw < 8) {
138        Constant * const fw = ConstantInt::get(relativePosition->getType(), 8 / bw);
139        if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
140            b->CreateAssertZero(b->CreateURem(absolutePosition, fw), "absolutePosition must be byte aligned");
141        }
142        relativePosition = b->CreateUDiv(relativePosition, fw);
143        ptr = b->CreatePointerCast(ptr, b->getInt8PtrTy());
144    } else {
145        ptr = b->CreatePointerCast(ptr, elemTy->getPointerTo());
146    }
147    return b->CreateGEP(ptr, relativePosition);
148}
149
150Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const /* handle */, Value * fromPosition, Value * availItems, bool reverse) const {
151    Constant * bufSize = ConstantInt::get(fromPosition->getType(), mBufferBlocks * b->getStride());
152    Value * itemsFromBase = b->CreateURem(fromPosition, bufSize);
153    if (reverse) {
154        Value * bufAvail = b->CreateSelect(b->CreateICmpEQ(itemsFromBase, b->getSize(0)), bufSize, itemsFromBase);
155        return b->CreateSelect(b->CreateICmpULT(bufAvail, availItems), bufAvail, availItems);
156    } else {
157        Value * linearSpace = b->CreateSub(bufSize, itemsFromBase, "linearSpace");
158        return b->CreateSelect(b->CreateICmpULT(availItems, linearSpace), availItems, linearSpace);
159    }
160}
161
162Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const /* handle */, Value * fromPosition, Value * consumed, bool reverse) const {
163    Constant * const bufferSize = ConstantInt::get(fromPosition->getType(), mBufferBlocks * b->getStride());
164    fromPosition = b->CreateURem(fromPosition, bufferSize);
165    if (reverse) {
166        return b->CreateSelect(b->CreateICmpEQ(fromPosition, b->getSize(0)), bufferSize, fromPosition);
167    }
168    consumed = b->CreateURem(consumed, bufferSize);
169    Constant * capacity = bufferSize;
170    if (mOverflowBlocks) {
171        capacity = ConstantInt::get(fromPosition->getType(), (mBufferBlocks + mOverflowBlocks) * b->getStride());
172    }
173    Value * const limit = b->CreateSelect(b->CreateICmpULE(consumed, fromPosition), capacity, consumed);
174    return b->CreateNUWSub(limit, fromPosition);
175}
176
177Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
178    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
179        b->CreateAssert(handle, "handle cannot be null");
180    }
181    return handle;
182}
183
184
185Value * StreamSetBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * blockIndex) const {
186    return b->CreateGEP(getBaseAddress(b, handle), blockIndex);
187}
188
189void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const b, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
190    Type * i8ptr = b->getInt8PtrTy();
191    unsigned alignment = b->getBitBlockWidth() / 8;
192    size_t numStreams = 1;
193    if (isa<ArrayType>(mBaseType)) {
194        numStreams = mBaseType->getArrayNumElements();
195    }
196    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
197    Value * blockCopyBytes = b->CreateMul(blocksToCopy, b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8));
198    b->CreateMemMove(b->CreateBitCast(targetBlockPtr, i8ptr), b->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
199}
200
201void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const b, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy, const unsigned alignment) const {
202    Constant * const blockSize = ConstantInt::get(itemsToCopy->getType(), b->getBitBlockWidth());
203    size_t numStreams = 1;
204    if (isa<ArrayType>(mBaseType)) {
205        numStreams = mBaseType->getArrayNumElements();
206    }
207    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
208    if (numStreams == 1) {
209        Value * copyBits = b->CreateMul(itemsToCopy, b->getSize(fieldWidth));
210        Value * copyBytes = b->CreateLShr(b->CreateAdd(copyBits, b->getSize(7)), b->getSize(3));
211        b->CreateMemCpy(targetBlockPtr, sourceBlockPtr, copyBytes, alignment);
212    } else {
213        Value * blocksToCopy = b->CreateUDiv(itemsToCopy, blockSize);
214        Value * partialItems = b->CreateURem(itemsToCopy, blockSize);
215        Value * partialBlockTargetPtr = b->CreateGEP(targetBlockPtr, blocksToCopy);
216        Value * partialBlockSourcePtr = b->CreateGEP(sourceBlockPtr, blocksToCopy);
217        Value * blockCopyBytes = b->CreateMul(blocksToCopy, b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8));
218        b->CreateMemCpy(targetBlockPtr, sourceBlockPtr, blockCopyBytes, alignment);
219        Value * partialCopyBitsPerStream = b->CreateMul(partialItems, b->getSize(fieldWidth));
220        Value * partialCopyBytesPerStream = b->CreateLShr(b->CreateAdd(partialCopyBitsPerStream, b->getSize(7)), b->getSize(3));
221        for (unsigned i = 0; i < numStreams; i++) {
222            Value * strmTargetPtr = b->CreateGEP(partialBlockTargetPtr, {b->getInt32(0), b->getInt32(i)});
223            Value * strmSourcePtr = b->CreateGEP(partialBlockSourcePtr, {b->getInt32(0), b->getInt32(i)});
224            b->CreateMemCpy(strmTargetPtr, strmSourcePtr, partialCopyBytesPerStream, alignment);
225        }
226    }
227}
228
229// Source File Buffer
230
231Type * SourceBuffer::getStreamSetBlockType() const {
232    return cast<PointerType>(mType->getStructElementType(int(SourceBuffer::Field::BaseAddress)))->getElementType();
233}
234
235Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle) const {
236    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::BufferedSize))});
237    return b->CreateLoad(ptr);
238}
239
240void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle, Value * size) const {
241    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::BufferedSize))});
242    b->CreateStore(size, ptr);
243}
244
245Value * SourceBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const handle) const {
246    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::Capacity))});
247    return b->CreateLoad(ptr);
248}
249
250void SourceBuffer::setCapacity(IDISA::IDISA_Builder * const b, Value * const handle, Value * c) const {
251    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::Capacity))});
252    b->CreateStore(c, ptr);
253}
254
255void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr) const {
256    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
257        b->CreateAssert(handle, "handle cannot be null");
258    }
259    Value * const ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::BaseAddress))});
260    Type * const ptrTy = ptr->getType()->getPointerElementType();
261    if (LLVM_LIKELY(isa<PointerType>(addr->getType()))) {
262        const auto ptrSpace = cast<PointerType>(ptr->getType())->getAddressSpace();
263        const auto addrSpace = cast<PointerType>(ptrTy)->getAddressSpace();
264        if (LLVM_UNLIKELY(addrSpace != ptrSpace)) {
265            report_fatal_error("SourceBuffer: base address was declared with address space "
266                                     + std::to_string(ptrSpace)
267                                     + " but given a pointer in address space "
268                                     + std::to_string(addrSpace));
269        }
270    } else {
271        report_fatal_error("SourceBuffer: base address is not a pointer type");
272    }
273    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
274        b->CreateAssert(ptr, "SourceBuffer: base address cannot be zero");
275        DataLayout DL(b->getModule());
276        IntegerType * const intPtrTy = b->getIntPtrTy(DL, cast<PointerType>(ptrTy)->getAddressSpace());
277        Value * const notAligned = b->CreateURem(b->CreatePtrToInt(ptr, intPtrTy), ConstantInt::get(intPtrTy, b->getBitBlockWidth() / 8));
278        b->CreateAssertZero(notAligned, "SourceBuffer: base address is not aligned with the bit block width");
279    }
280    b->CreateStore(b->CreatePointerCast(addr, ptrTy), ptr);
281}
282
283Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
284    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
285        b->CreateAssert(handle, "handle cannot be null");
286    }
287    Value * const ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::BaseAddress))});
288    return b->CreateLoad(ptr);
289}
290
291Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
292    if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
293    Value * maxAvail = b->CreateNUWSub(getBufferedSize(b, handle), fromPosition);
294    return b->CreateSelect(b->CreateICmpULT(availItems, maxAvail), availItems, maxAvail);
295}
296
297Value * SourceBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value *consumed, bool reverse) const {
298    report_fatal_error("SourceBuffers cannot be written");
299}
300
301void SourceBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
302    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
303        Type * const ty = getType();
304        mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(ty, b->getSize(mBufferBlocks));
305        b->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, b->getCacheAlignment());
306    } else {
307        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
308    }
309}
310
311void SourceBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
312
313}
314
315// External File Buffer
316void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
317    report_fatal_error("External buffers cannot be allocated.");
318}
319
320void ExternalBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> &) const {
321
322}
323
324Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value * availItems, const bool reverse) const {
325    // All available items can be accessed.
326    return reverse ? ConstantInt::getAllOnesValue(availItems->getType()) : availItems;
327}
328
329Value * ExternalBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const, Value *, Value * fromPosition, Value *consumed, const bool reverse) const {
330    // Trust that the buffer is large enough to write any amount
331    return reverse ? fromPosition : ConstantInt::getAllOnesValue(fromPosition->getType());
332}
333
334Value * ExternalBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
335    return ConstantInt::getAllOnesValue(b->getSizeTy());
336}
337
338
339// Circular Buffer
340Value * CircularBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * const blockIndex) const {
341    return b->CreateGEP(getBaseAddress(b, handle), modBufferSize(b, blockIndex));
342}
343
344Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
345    Value * ptr = getBaseAddress(b, handle);
346    Value * relativePosition = b->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * b->getBitBlockWidth()));
347    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
348    const auto bw = elemTy->getPrimitiveSizeInBits();
349    assert (is_power_2(bw));
350    if (bw < 8) {
351        Constant * const fw = ConstantInt::get(relativePosition->getType(), 8 / bw);
352        relativePosition = b->CreateUDiv(relativePosition, fw);
353        ptr = b->CreatePointerCast(ptr, b->getInt8PtrTy());
354    } else {
355        ptr = b->CreatePointerCast(ptr, elemTy->getPointerTo());
356    }
357    return b->CreateGEP(ptr, relativePosition);
358}
359
360// CircularCopybackBuffer Buffer
361void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
362    Type * const ty = getType();
363    Constant * size = ConstantExpr::getSizeOf(ty);
364    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
365    mStreamSetBufferPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(size), ty->getPointerTo());
366}
367
368
369// SwizzledCopybackBuffer Buffer
370void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
371    Type * const ty = getType();
372    Constant * size = ConstantExpr::getSizeOf(ty);
373    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
374    mStreamSetBufferPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(size), ty->getPointerTo());
375}
376
377void SwizzledCopybackBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const b, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy, const unsigned alignment) const {
378    Type * int8PtrTy = b->getInt8PtrTy();
379    DataLayout DL(b->getModule());
380    IntegerType * const intAddrTy = b->getIntPtrTy(DL);
381
382    Constant * blockSize = ConstantInt::get(itemsToCopy->getType(), b->getBitBlockWidth());
383    Function * f = b->GetInsertBlock()->getParent();
384    BasicBlock * wholeBlockCopy = BasicBlock::Create(b->getContext(), "wholeBlockCopy", f, 0);
385    BasicBlock * partialBlockCopy = BasicBlock::Create(b->getContext(), "partialBlockCopy", f, 0);
386    BasicBlock * copyDone = BasicBlock::Create(b->getContext(), "copyDone", f, 0);
387    const unsigned numStreams = getType()->getArrayNumElements();
388    const unsigned swizzleFactor = b->getBitBlockWidth()/mFieldWidth;
389    const auto elemTy = getType()->getArrayElementType();
390    const unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
391    Value * blocksToCopy = b->CreateUDiv(itemsToCopy, blockSize);
392    Value * partialItems = b->CreateURem(itemsToCopy, blockSize);
393    Value * partialBlockTargetPtr = b->CreateGEP(targetBlockPtr, blocksToCopy);
394    Value * partialBlockSourcePtr = b->CreateGEP(sourceBlockPtr, blocksToCopy);
395    b->CreateCondBr(b->CreateICmpUGT(blocksToCopy, b->getSize(0)), wholeBlockCopy, partialBlockCopy);
396
397    b->SetInsertPoint(wholeBlockCopy);
398    Value * copyLength = b->CreateSub(b->CreatePtrToInt(partialBlockTargetPtr, intAddrTy), b->CreatePtrToInt(targetBlockPtr, intAddrTy));
399    b->CreateMemCpy(b->CreatePointerCast(targetBlockPtr, int8PtrTy), b->CreatePointerCast(sourceBlockPtr, int8PtrTy), copyLength, alignment);
400    b->CreateCondBr(b->CreateICmpUGT(partialItems, b->getSize(0)), partialBlockCopy, copyDone);
401
402    b->SetInsertPoint(partialBlockCopy);
403    Value * copyBits = b->CreateMul(itemsToCopy, b->getSize(fieldWidth * swizzleFactor));
404    Value * copyBytes = b->CreateLShr(b->CreateAdd(copyBits, b->getSize(7)), b->getSize(3));
405    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
406        Value * strmTargetPtr = b->CreateGEP(partialBlockTargetPtr, {b->getInt32(0), b->getInt32(strm)});
407        Value * strmSourcePtr = b->CreateGEP(partialBlockSourcePtr, {b->getInt32(0), b->getInt32(strm)});
408        b->CreateMemCpy(b->CreatePointerCast(strmTargetPtr, int8PtrTy), b->CreatePointerCast(strmSourcePtr, int8PtrTy), copyBytes, alignment);
409    }
410    b->CreateBr(copyDone);
411
412    b->SetInsertPoint(copyDone);
413}
414
415Value * SwizzledCopybackBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * blockIndex) const {
416    return b->CreateGEP(getBaseAddress(b, handle), modBufferSize(b, blockIndex));
417}
418
419// Expandable Buffer
420
421void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
422    mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(getType());
423    Value * const capacityPtr = b->CreateGEP(mStreamSetBufferPtr, {b->getInt32(0), b->getInt32(0)});
424    b->CreateStore(b->getSize(mInitialCapacity), capacityPtr);
425    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
426    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), b->getSizeTy(), false);
427    Constant * const size = ConstantExpr::getMul(b->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
428    const auto alignment = std::max(b->getCacheAlignment(), b->getBitBlockWidth() / 8);
429    Value * const ptr = b->CreateAlignedMalloc(size, alignment);
430    b->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
431    Value * const streamSetPtr = b->CreateGEP(mStreamSetBufferPtr, {b->getInt32(0), b->getInt32(1)});
432    b->CreateStore(b->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
433}
434
435std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
436
437    // ENTRY
438    Value * const capacityPtr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(0)});
439    Value * const capacity = b->CreateLoad(capacityPtr);
440    Value * const streamSetPtr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(1)});
441    Value * const streamSet = b->CreateLoad(streamSetPtr);
442    blockIndex = modBufferSize(b, blockIndex);
443
444    assert (streamIndex->getType() == capacity->getType());
445    Value * const cond = b->CreateICmpULT(streamIndex, capacity);
446
447    // Are we guaranteed that we can access this stream?
448    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
449        if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
450            b->CreateAssert(cond, "out-of-bounds stream access");
451        }
452        Value * offset = b->CreateAdd(b->CreateMul(blockIndex, capacity), streamIndex);
453        return {streamSet, offset};
454    }
455
456    BasicBlock * const entry = b->GetInsertBlock();
457    BasicBlock * const expand = BasicBlock::Create(b->getContext(), "expand", entry->getParent());
458    BasicBlock * const resume = BasicBlock::Create(b->getContext(), "resume", entry->getParent());
459
460    b->CreateLikelyCondBr(cond, resume, expand);
461
462    // EXPAND
463    b->SetInsertPoint(expand);
464
465    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
466    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
467
468    Value * newCapacity = b->CreateAdd(streamIndex, b->getSize(1));
469    newCapacity = b->CreateCeilLog2(newCapacity);
470    newCapacity = b->CreateShl(b->getSize(1), newCapacity, "newCapacity");
471
472    std::string tmp;
473    raw_string_ostream out(tmp);
474    out << "__expand";
475    elementType->print(out);
476    std::string name = out.str();
477
478    Module * const m = b->getModule();
479    Function * expandFunction = m->getFunction(name);
480
481    if (expandFunction == nullptr) {
482
483        const auto ip = b->saveIP();
484
485        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), b->getSizeTy(), b->getSizeTy()}, false);
486        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
487
488        auto args = expandFunction->arg_begin();
489        Value * streamSet = &*args++;
490        Value * capacity = &*args++;
491        Value * newCapacity = &*args;
492
493        BasicBlock * entry = BasicBlock::Create(b->getContext(), "entry", expandFunction);
494        b->SetInsertPoint(entry);
495
496        Value * size = b->CreateMul(newCapacity, b->getSize(mBufferBlocks));
497        const auto memAlign = std::max(b->getCacheAlignment(), b->getBitBlockWidth() / 8);
498
499        Value * newStreamSet = b->CreatePointerCast(b->CreateAlignedMalloc(b->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
500        Value * const diffCapacity = b->CreateMul(b->CreateSub(newCapacity, capacity), vectorWidth);
501
502        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
503        for (unsigned i = 0; i < mBufferBlocks; ++i) {
504            ConstantInt * const offset = b->getSize(i);
505            Value * srcOffset = b->CreateMul(capacity, offset);
506            Value * srcPtr = b->CreateGEP(streamSet, srcOffset);
507            Value * destOffset = b->CreateMul(newCapacity, offset);
508            Value * destPtr = b->CreateGEP(newStreamSet, destOffset);
509            b->CreateMemCpy(destPtr, srcPtr, b->CreateMul(capacity, vectorWidth), alignment);
510            Value * destZeroOffset = b->CreateAdd(destOffset, capacity);
511            Value * destZeroPtr = b->CreateGEP(newStreamSet, destZeroOffset);
512            b->CreateMemZero(destZeroPtr, diffCapacity, alignment);
513        }
514
515        b->CreateFree(streamSet);
516
517        b->CreateRet(newStreamSet);
518
519        b->restoreIP(ip);
520    }
521
522    Value * newStreamSet = b->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
523    b->CreateStore(newStreamSet, streamSetPtr);
524    b->CreateStore(newCapacity, capacityPtr);
525
526    b->CreateBr(resume);
527
528    // RESUME
529    b->SetInsertPoint(resume);
530
531    PHINode * phiStreamSet = b->CreatePHI(streamSet->getType(), 2);
532    phiStreamSet->addIncoming(streamSet, entry);
533    phiStreamSet->addIncoming(newStreamSet, expand);
534
535    PHINode * phiCapacity = b->CreatePHI(capacity->getType(), 2);
536    phiCapacity->addIncoming(capacity, entry);
537    phiCapacity->addIncoming(newCapacity, expand);
538
539    Value * offset = b->CreateAdd(b->CreateMul(blockIndex, phiCapacity), streamIndex);
540
541    return {phiStreamSet, offset};
542}
543
544Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
545    report_fatal_error("temporarily not supported");
546//    Value * ptr, * offset;
547//    std::tie(ptr, offset) = getInternalStreamBuffer(b, handle, streamIndex, blockIndex, readOnly);
548//    return b->CreateGEP(ptr, offset);
549}
550
551Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
552    report_fatal_error("temporarily not supported");
553//    Value * ptr, * offset;
554//    std::tie(ptr, offset) = getInternalStreamBuffer(b, handle, streamIndex, blockIndex, readOnly);
555//    return b->CreateGEP(ptr, {offset, packIndex});
556}
557
558Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const b, Value * const handle) const {
559    return b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(0)}));
560}
561
562Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
563    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
564        b->CreateAssert(handle, "handle cannot be null");
565    }
566    Value * const baseAddr = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(1)}));
567    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
568        b->CreateAssert(handle, "base address cannot be 0");
569    }
570    return baseAddr;
571}
572
573void ExpandableBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
574    b->CreateFree(getBaseAddress(b.get(), mStreamSetBufferPtr));
575}
576
577Value * ExpandableBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value *, Value *) const {
578    report_fatal_error("Expandable buffers: getBlockAddress is not supported.");
579}
580
581Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value *, bool) const {
582    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
583}
584
585
586Value * DynamicBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
587    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
588        b->CreateAssert(handle, "handle cannot be null");
589    }
590    Value * const p = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
591    Value * const addr = b->CreateLoad(p);
592    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
593        b->CreateAssert(addr, "base address cannot be 0");
594    }
595    return addr;
596}
597
598Value * DynamicBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * blockIndex) const {
599    Value * const workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
600    assert (blockIndex->getType() == workingBlocks->getType());
601    return b->CreateGEP(getBaseAddress(b, handle), b->CreateURem(blockIndex, workingBlocks));
602}
603
604Value * DynamicBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
605    Constant * blockSize = ConstantInt::get(absolutePosition->getType(), b->getBitBlockWidth());
606    Value * const absBlock = b->CreateUDiv(absolutePosition, blockSize);
607    Value * blockPos = b->CreateURem(absolutePosition, blockSize);
608    Value * blockPtr = getBlockAddress(b, handle, absBlock);
609    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
610    const auto bw = elemTy->getPrimitiveSizeInBits();
611    assert (is_power_2(bw));
612    if (bw < 8) {
613        blockPos = b->CreateUDiv(blockPos, ConstantInt::get(blockPos->getType(), 8 / bw));
614        blockPtr = b->CreatePointerCast(blockPtr, b->getInt8PtrTy());
615    } else {
616        blockPtr = b->CreatePointerCast(blockPtr, elemTy->getPointerTo());
617    }
618    return b->CreateGEP(blockPtr, blockPos);
619}
620
621
622Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
623    Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
624    Constant * blockSize = ConstantInt::get(bufBlocks->getType(), b->getBitBlockWidth());
625    Value * bufSize = b->CreateMul(bufBlocks, blockSize);
626    assert (bufSize->getType() == fromPosition->getType());
627    Value * itemsFromBase = b->CreateURem(fromPosition, bufSize);
628    if (reverse) {
629        Value * bufAvail = b->CreateSelect(b->CreateICmpEQ(itemsFromBase, b->getSize(0)), bufSize, itemsFromBase);
630        return b->CreateSelect(b->CreateICmpULT(bufAvail, availItems), bufAvail, availItems);
631    } else {
632        Value * linearSpace = b->CreateSub(bufSize, itemsFromBase, "linearSpace");
633        return b->CreateSelect(b->CreateICmpULT(availItems, linearSpace), availItems, linearSpace);
634    }
635}
636
637Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value *consumed, bool reverse) const {
638    Value * bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
639    Constant * blockSize = ConstantInt::get(bufBlocks->getType(), b->getBitBlockWidth());
640    Value * bufSize = b->CreateMul(bufBlocks, blockSize);
641    assert (bufSize->getType() == fromPosition->getType());
642    Value * bufRem = b->CreateURem(fromPosition, bufSize);
643    if (reverse) {
644        return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufSize, bufRem);
645    }
646    Constant * overflow = ConstantInt::get(bufBlocks->getType(), mOverflowBlocks);
647    bufSize = b->CreateMul(b->CreateAdd(bufBlocks, overflow), blockSize);
648    return b->CreateSub(bufSize, bufRem, "linearWritable");
649}
650
651Value * DynamicBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle) const {
652    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))});
653    return b->CreateMul(b->CreateLoad(ptr), b->getSize(b->getBitBlockWidth()));
654}
655
656void DynamicBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
657    Value * const handle = b->CreateCacheAlignedAlloca(mBufferStructType);
658    size_t numStreams = 1;
659    if (isa<ArrayType>(mBaseType)) {
660        numStreams = mBaseType->getArrayNumElements();
661    }
662    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
663    Value * bufSize = b->getSize((mBufferBlocks + mOverflowBlocks) * b->getBitBlockWidth() * numStreams * fieldWidth/8);
664    bufSize = b->CreateRoundUp(bufSize, b->getSize(b->getCacheAlignment()));
665    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::BaseAddress))});
666    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
667    Value * bufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufSize), bufPtrType);
668    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
669        b->CallPrintInt("allocated: ", bufPtr);
670        b->CallPrintInt("allocated capacity: ", bufSize);
671    }
672    b->CreateStore(bufPtr, bufBasePtrField);
673    b->CreateStore(ConstantPointerNull::getNullValue(bufPtrType), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))}));
674    b->CreateStore(bufSize, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::AllocatedCapacity))}));
675    b->CreateStore(b->getSize(mBufferBlocks), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
676    b->CreateStore(b->getSize(-1), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::Length))}));
677    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ProducedPosition))}));
678    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ConsumedPosition))}));
679    mStreamSetBufferPtr = handle;
680}
681
682void DynamicBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
683    Value * const handle = mStreamSetBufferPtr;
684    /* Free the dynamically allocated buffer, but not the stack-allocated buffer struct. */
685    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
686    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
687    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
688    BasicBlock * freePrior = b->CreateBasicBlock("freePrior");
689    BasicBlock * freeCurrent = b->CreateBasicBlock("freeCurrent");
690    Value * priorBuf = b->CreateLoad(priorBasePtrField);
691    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
692    b->CreateCondBr(priorBufIsNonNull, freePrior, freeCurrent);
693    b->SetInsertPoint(freePrior);
694    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
695        b->CallPrintInt("releasing: ", priorBuf);
696    }
697    b->CreateFree(priorBuf);
698    b->CreateBr(freeCurrent);
699    b->SetInsertPoint(freeCurrent);
700    b->CreateFree(b->CreateLoad(bufBasePtrField));
701}
702
703//
704//  Simple capacity doubling.  Use the circular buffer property: duplicating buffer data
705//  ensures that we have correct data.   TODO: consider optimizing based on actual
706//  consumer and producer positions.
707//
708void DynamicBuffer::doubleCapacity(IDISA::IDISA_Builder * const b, Value * const handle) const {
709    size_t numStreams = 1;
710    if (isa<ArrayType>(mBaseType)) {
711        numStreams = mBaseType->getArrayNumElements();
712    }
713    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
714    Constant * blockBytes = b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8);
715    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
716    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
717    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
718    Value * workingBlocksField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))});
719    Value * capacityField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::AllocatedCapacity))});
720
721    Value * oldBufPtr = b->CreateLoad(bufBasePtrField);
722    Value * currentWorkingBlocks = b->CreateLoad(workingBlocksField);
723    Value * workingBytes = b->CreateMul(currentWorkingBlocks, blockBytes);
724    Value * const curAllocated = b->CreateLoad(capacityField);
725    Value * neededCapacity = b->CreateAdd(workingBytes, workingBytes);
726    if (mOverflowBlocks > 0) {
727        Constant * overflowBytes = b->getSize(mOverflowBlocks * b->getBitBlockWidth() * numStreams * fieldWidth/8);
728        neededCapacity = b->CreateAdd(neededCapacity, overflowBytes);
729    }
730    neededCapacity = b->CreateRoundUp(neededCapacity, b->getSize(b->getCacheAlignment()));
731    BasicBlock * doubleEntry = b->GetInsertBlock();
732    BasicBlock * doRealloc = b->CreateBasicBlock("doRealloc");
733    BasicBlock * doCopy2 = b->CreateBasicBlock("doCopy2");
734    b->CreateCondBr(b->CreateICmpULT(curAllocated, neededCapacity), doRealloc, doCopy2);
735    b->SetInsertPoint(doRealloc);
736    // If there is a non-null priorBasePtr, free it.
737    Value * priorBuf = b->CreateLoad(priorBasePtrField);
738    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
739    BasicBlock * deallocatePrior = b->CreateBasicBlock("deallocatePrior");
740    BasicBlock * allocateNew = b->CreateBasicBlock("allocateNew");
741    b->CreateCondBr(priorBufIsNonNull, deallocatePrior, allocateNew);
742    b->SetInsertPoint(deallocatePrior);
743    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
744        b->CallPrintInt("deallocating: ", priorBuf);
745    }
746    b->CreateFree(priorBuf);
747    b->CreateBr(allocateNew);
748    b->SetInsertPoint(allocateNew);
749    b->CreateStore(oldBufPtr, priorBasePtrField);
750    Value * newBufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(neededCapacity), bufPtrType);
751    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
752        b->CallPrintInt("re-allocated: ", newBufPtr);
753        b->CallPrintInt("allocated capacity: ", neededCapacity);
754    }
755    b->CreateStore(newBufPtr, bufBasePtrField);
756    createBlockCopy(b, newBufPtr, oldBufPtr, currentWorkingBlocks);
757    b->CreateStore(neededCapacity, capacityField);
758    b->CreateBr(doCopy2);
759    b->SetInsertPoint(doCopy2);
760    PHINode * bufPtr = b->CreatePHI(oldBufPtr->getType(), 2);
761    bufPtr->addIncoming(oldBufPtr, doubleEntry);
762    bufPtr->addIncoming(newBufPtr, allocateNew);
763    createBlockCopy(b, b->CreateGEP(bufPtr, currentWorkingBlocks), bufPtr, currentWorkingBlocks);
764    currentWorkingBlocks = b->CreateAdd(currentWorkingBlocks, currentWorkingBlocks);
765    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
766        b->CallPrintInt("currentWorkingBlocks: ", currentWorkingBlocks);
767    }
768    b->CreateStore(currentWorkingBlocks, workingBlocksField);
769}
770
771inline StructType * getSourceBufferType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * const type, const unsigned MemoryAddressSpace) {
772    return StructType::get(b->getContext(), {resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), b->getSizeTy()});
773}
774
775SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
776: StreamSetBuffer(BufferKind::SourceBuffer, type, getSourceBufferType(b, type, MemoryAddressSpace), 0, 0, StructAddressSpace) {
777    mUniqueID = "B";
778    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
779        mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
780    }
781}
782
783ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, Value * addr, unsigned AddressSpace)
784: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, 0, AddressSpace) {
785    mUniqueID = "E";
786    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
787    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
788}
789
790CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
791: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, 0, AddressSpace) {
792    mUniqueID = "C" + std::to_string(bufferBlocks);
793    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
794}
795
796CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
797: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, overflowBlocks, AddressSpace) {
798
799}
800
801CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
802: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, overflowBlocks, AddressSpace) {
803    if (bufferBlocks < 2 * overflowBlocks) {
804        report_fatal_error("CircularCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
805    }
806    mUniqueID = "CC" + std::to_string(bufferBlocks);
807    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
808    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
809}
810
811ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
812: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, 0, AddressSpace)
813, mInitialCapacity(type->getArrayNumElements()) {
814    mUniqueID = "XP" + std::to_string(bufferBlocks);
815    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
816}
817
818SwizzledCopybackBuffer::SwizzledCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
819: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, overflowBlocks, AddressSpace), mFieldWidth(fieldwidth) {
820    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
821    if (bufferBlocks < 2 * overflowBlocks) {
822        report_fatal_error("SwizzledCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
823    }
824    if (overflowBlocks != 1) {
825        mUniqueID += "_" + std::to_string(mOverflowBlocks);
826    }
827    if (AddressSpace > 0) {
828        mUniqueID += "@" + std::to_string(AddressSpace);
829    }
830}
831
832inline StructType * getDynamicBufferStructType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * baseType, const unsigned addrSpace) {
833    IntegerType * sizeTy = b->getSizeTy();
834    PointerType * typePtr = baseType->getPointerTo(addrSpace);
835    return StructType::get(b->getContext(), {typePtr, typePtr, sizeTy, sizeTy, sizeTy, sizeTy, sizeTy});
836}
837
838DynamicBuffer::DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t initialCapacity, size_t overflow, unsigned swizzle, unsigned addrSpace)
839: StreamSetBuffer(BufferKind::DynamicBuffer, type, resolveStreamSetType(b, type), initialCapacity, overflow, addrSpace)
840, mBufferStructType(getDynamicBufferStructType(b, mType, addrSpace))
841, mSwizzleFactor(swizzle) {
842    if (initialCapacity * b->getBitBlockWidth() < 2 * overflow) {
843        report_fatal_error("DynamicBuffer: initialCapacity * b->getBitBlockWidth() < 2 * overflow");
844    }
845    mUniqueID = "DB";
846    if (swizzle != 1) {
847        mUniqueID += "s" + std::to_string(swizzle);
848    }
849        if (overflow != 0) {
850        mUniqueID += "o" + std::to_string(overflow);
851    }
852    if (addrSpace != 0) {
853        mUniqueID += "@" + std::to_string(addrSpace);
854    }
855}
856
857
858inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned OverflowBlocks, unsigned AddressSpace)
859: mBufferKind(k)
860, mType(resolvedType)
861, mBufferBlocks(BufferBlocks)
862, mOverflowBlocks(OverflowBlocks)
863, mAddressSpace(AddressSpace)
864, mStreamSetBufferPtr(nullptr)
865, mBaseType(baseType)
866, mProducer(nullptr) {
867    assert((k == BufferKind::SourceBuffer || k == BufferKind::ExternalBuffer) ^ (BufferBlocks > 0));
868    assert ("A zero length buffer cannot have overflow blocks!" && ((BufferBlocks > 0) || (OverflowBlocks == 0)));
869}
870
871StreamSetBuffer::~StreamSetBuffer() { }
872
873// Helper routines
874ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
875    unsigned numElements = 1;
876    if (LLVM_LIKELY(type->isArrayTy())) {
877        numElements = type->getArrayNumElements();
878        type = type->getArrayElementType();
879    }
880    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
881        type = type->getVectorElementType();
882        if (LLVM_LIKELY(type->isIntegerTy())) {
883            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
884            type = b->getBitBlockType();
885            if (fieldWidth != 1) {
886                type = ArrayType::get(type, fieldWidth);
887            }
888            return ArrayType::get(type, numElements);
889        }
890    }
891    std::string tmp;
892    raw_string_ostream out(tmp);
893    type->print(out);
894    out << " is an unvalid stream set buffer type.";
895    report_fatal_error(out.str());
896}
897
898StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
899    if (LLVM_LIKELY(type->isArrayTy())) {
900        type = type->getArrayElementType();
901    }
902    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
903        type = type->getVectorElementType();
904        if (LLVM_LIKELY(type->isIntegerTy())) {
905            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
906            type = b->getBitBlockType();
907            if (fieldWidth != 1) {
908                type = ArrayType::get(type, fieldWidth);
909            }
910            return StructType::get(b->getContext(), {b->getSizeTy(), type->getPointerTo()});
911        }
912    }
913    std::string tmp;
914    raw_string_ostream out(tmp);
915    type->print(out);
916    out << " is an unvalid stream set buffer type.";
917    report_fatal_error(out.str());
918}
Note: See TracBrowser for help on using the repository browser.