source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5755

Last change on this file since 5755 was 5755, checked in by nmedfort, 16 months ago

Bug fixes and simplified MultiBlockKernel? logic

File size: 50.9 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <llvm/IR/Module.h>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/kernel.h>
10#include <kernels/kernel_builder.h>
11#include <toolchain/toolchain.h>
12#include <llvm/Support/Debug.h>
13#include <llvm/Support/Format.h>
14
15namespace llvm { class Constant; }
16namespace llvm { class Function; }
17
18using namespace parabix;
19using namespace llvm;
20using namespace IDISA;
21
22inline static bool is_power_2(const uint64_t n) {
23    return ((n & (n - 1)) == 0) && n;
24}
25
26Type * StreamSetBuffer::getStreamSetBlockType() const { return mType;}
27
28ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
29
30StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
31
32void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
33    assert (mBufferBlocks > 0);
34    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
35        Type * const ty = getType();
36        if (mAddressSpace == 0) {
37            Constant * size = ConstantExpr::getSizeOf(ty);
38            size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks));
39            mStreamSetBufferPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(size), ty->getPointerTo());
40        } else {
41            mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(ty, b->getSize(mBufferBlocks));
42        }
43        b->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, b->getCacheAlignment());
44    } else {
45        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
46    }
47}
48
49void StreamSetBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
50    if (mAddressSpace == 0) {
51        b->CreateFree(mStreamSetBufferPtr);
52    }
53}
54
55inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
56    return isa<ConstantInt>(index) ? cast<ConstantInt>(index)->getLimitedValue() < capacity : false;
57}
58
59Value * StreamSetBuffer::modBufferSize(IDISA::IDISA_Builder * const b, Value * const offset) const {
60    assert (offset->getType()->isIntegerTy());
61    if (mBufferBlocks == 0 || isCapacityGuaranteed(offset, mBufferBlocks)) {
62        return offset;
63    } else if (mBufferBlocks == 1) {
64        return ConstantInt::getNullValue(offset->getType());
65    } else if (is_power_2(mBufferBlocks)) {
66        return b->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
67    } else {
68        return b->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
69    }
70}
71
72Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
73    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
74        Value * const count = getStreamSetCount(b, handle);
75        Value * const index = b->CreateZExtOrTrunc(streamIndex, count->getType());
76        Value * const cond = b->CreateICmpULT(index, count);
77        b->CreateAssert(cond, "out-of-bounds stream access");
78    }
79    return b->CreateGEP(addr, {modBufferSize(b, blockIndex), streamIndex});
80}
81
82Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
83    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
84        Value * const count = getStreamSetCount(b, handle);
85        Value * const index = b->CreateZExtOrTrunc(streamIndex, count->getType());
86        Value * const cond = b->CreateICmpULT(index, count);
87        b->CreateAssert(cond, "out-of-bounds stream access");
88    }
89    return b->CreateGEP(addr, {modBufferSize(b, blockIndex), streamIndex, packIndex});
90}
91
92void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* addr */) const {
93    report_fatal_error("setBaseAddress is not supported by this buffer type");
94}
95
96Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * /* handle */) const {
97    return b->getSize(mBufferBlocks * b->getBitBlockWidth());
98}
99
100void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* size */) const {
101    report_fatal_error("setBufferedSize is not supported by this buffer type");
102}
103
104Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const handle) const {
105    return getBufferedSize(b, handle);
106}
107
108void StreamSetBuffer::setCapacity(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* c */) const {
109    report_fatal_error("setCapacity is not supported by this buffer type");
110}
111
112Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const b, Value *) const {
113    size_t count = 1;
114    if (isa<ArrayType>(mBaseType)) {
115        count = mBaseType->getArrayNumElements();
116    }
117    return b->getSize(count);
118}
119
120/**
121 * @brief getRawItemPointer
122 *
123 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
124 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
125 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
126 */
127Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
128    Value * ptr = getBaseAddress(b, handle);
129    Value * relativePosition = absolutePosition;
130    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
131    const auto bw = elemTy->getPrimitiveSizeInBits();
132    assert (is_power_2(bw));
133    if (bw < 8) {
134        Constant * const fw = ConstantInt::get(relativePosition->getType(), 8 / bw);
135        if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
136            b->CreateAssertZero(b->CreateURem(absolutePosition, fw), "absolutePosition must be byte aligned");
137        }
138        relativePosition = b->CreateUDiv(relativePosition, fw);
139        ptr = b->CreatePointerCast(ptr, b->getInt8PtrTy());
140    } else {
141        ptr = b->CreatePointerCast(ptr, elemTy->getPointerTo());
142    }
143    return b->CreateGEP(ptr, relativePosition);
144}
145
146Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
147    Constant * bufSize = ConstantInt::get(fromPosition->getType(), mBufferBlocks * b->getStride());
148    Value * itemsFromBase = b->CreateURem(fromPosition, bufSize);
149    if (reverse) {
150        Value * bufAvail = b->CreateSelect(b->CreateICmpEQ(itemsFromBase, b->getSize(0)), bufSize, itemsFromBase);
151        return b->CreateSelect(b->CreateICmpULT(bufAvail, availItems), bufAvail, availItems);
152    } else {
153        Value * linearSpace = b->CreateSub(bufSize, itemsFromBase, "linearSpace");
154        return b->CreateSelect(b->CreateICmpULT(availItems, linearSpace), availItems, linearSpace);
155    }
156}
157
158Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
159    Constant * bufSize = ConstantInt::get(fromPosition->getType(), mBufferBlocks * b->getStride());
160    Value * bufRem = b->CreateURem(fromPosition, bufSize);
161    if (reverse) {
162        return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufSize, bufRem);
163    }
164    return b->CreateSub(bufSize, bufRem, "linearSpace");
165}
166
167Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
168    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
169        b->CreateAssert(handle, "handle cannot be null");
170    }
171    return handle;
172}
173
174
175Value * StreamSetBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * blockIndex) const {
176    return b->CreateGEP(getBaseAddress(b, handle), blockIndex);
177}
178
179void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const b, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
180    Type * i8ptr = b->getInt8PtrTy();
181    unsigned alignment = b->getBitBlockWidth() / 8;
182    size_t numStreams = 1;
183    if (isa<ArrayType>(mBaseType)) {
184        numStreams = mBaseType->getArrayNumElements();
185    }
186    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
187    Value * blockCopyBytes = b->CreateMul(blocksToCopy, b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8));
188    b->CreateMemMove(b->CreateBitCast(targetBlockPtr, i8ptr), b->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
189}
190
191inline bool isConstantZero(Value * const v) {
192    return isa<Constant>(v) && cast<Constant>(v)->isNullValue();
193}
194
195void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const b, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy, const unsigned alignment) const {
196    Constant * const blockSize = ConstantInt::get(itemsToCopy->getType(), b->getBitBlockWidth());
197    size_t numStreams = 1;
198    if (isa<ArrayType>(mBaseType)) {
199        numStreams = mBaseType->getArrayNumElements();
200    }
201    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
202    if (numStreams == 1) {
203        Value * copyBits = b->CreateMul(itemsToCopy, b->getSize(fieldWidth));
204        Value * copyBytes = b->CreateLShr(b->CreateAdd(copyBits, b->getSize(7)), b->getSize(3));
205        b->CreateMemCpy(targetBlockPtr, sourceBlockPtr, copyBytes, alignment);
206    } else {
207        Value * blocksToCopy = b->CreateUDiv(itemsToCopy, blockSize);
208        Value * partialItems = b->CreateURem(itemsToCopy, blockSize);
209        Value * partialBlockTargetPtr = b->CreateGEP(targetBlockPtr, blocksToCopy);
210        Value * partialBlockSourcePtr = b->CreateGEP(sourceBlockPtr, blocksToCopy);
211        Value * blockCopyBytes = b->CreateMul(blocksToCopy, b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8));
212        b->CreateMemCpy(targetBlockPtr, sourceBlockPtr, blockCopyBytes, alignment);
213        Value * partialCopyBitsPerStream = b->CreateMul(partialItems, b->getSize(fieldWidth));
214        Value * partialCopyBytesPerStream = b->CreateLShr(b->CreateAdd(partialCopyBitsPerStream, b->getSize(7)), b->getSize(3));
215        for (unsigned i = 0; i < numStreams; i++) {
216            Value * strmTargetPtr = b->CreateGEP(partialBlockTargetPtr, {b->getInt32(0), b->getInt32(i)});
217            Value * strmSourcePtr = b->CreateGEP(partialBlockSourcePtr, {b->getInt32(0), b->getInt32(i)});
218            b->CreateMemCpy(strmTargetPtr, strmSourcePtr, partialCopyBytesPerStream, alignment);
219        }
220    }
221}
222
223void StreamSetBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * const handle, Value * priorProduced, Value * newProduced, const std::string Name) const {
224    report_fatal_error("Copy back not supported for this buffer type:" + Name);
225}
226
227// Source File Buffer
228
229Type * SourceBuffer::getStreamSetBlockType() const {
230    return cast<PointerType>(mType->getStructElementType(int(SourceBuffer::Field::BaseAddress)))->getElementType();
231}
232
233Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle) const {
234    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::BufferedSize))});
235    return b->CreateLoad(ptr);
236}
237
238void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle, Value * size) const {
239    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::BufferedSize))});
240    b->CreateStore(size, ptr);
241}
242
243Value * SourceBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const handle) const {
244    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::Capacity))});
245    return b->CreateLoad(ptr);
246}
247
248void SourceBuffer::setCapacity(IDISA::IDISA_Builder * const b, Value * const handle, Value * c) const {
249    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::Capacity))});
250    b->CreateStore(c, ptr);
251}
252
253void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr) const {
254    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
255        b->CreateAssert(handle, "handle cannot be null");
256    }
257    Value * const ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::BaseAddress))});
258    Type * const ptrTy = ptr->getType()->getPointerElementType();
259    if (LLVM_LIKELY(isa<PointerType>(addr->getType()))) {
260        const auto ptrSpace = cast<PointerType>(ptr->getType())->getAddressSpace();
261        const auto addrSpace = cast<PointerType>(ptrTy)->getAddressSpace();
262        if (LLVM_UNLIKELY(addrSpace != ptrSpace)) {
263            report_fatal_error("SourceBuffer: base address was declared with address space "
264                                     + std::to_string(ptrSpace)
265                                     + " but given a pointer in address space "
266                                     + std::to_string(addrSpace));
267        }
268    } else {
269        report_fatal_error("SourceBuffer: base address is not a pointer type");
270    }
271    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
272        b->CreateAssert(ptr, "SourceBuffer: base address cannot be zero");
273        DataLayout DL(b->getModule());
274        IntegerType * const intPtrTy = b->getIntPtrTy(DL, cast<PointerType>(ptrTy)->getAddressSpace());
275        Value * const notAligned = b->CreateURem(b->CreatePtrToInt(ptr, intPtrTy), ConstantInt::get(intPtrTy, b->getBitBlockWidth() / 8));
276        b->CreateAssertZero(notAligned, "SourceBuffer: base address is not aligned with the bit block width");
277    }
278    b->CreateStore(b->CreatePointerCast(addr, ptrTy), ptr);
279}
280
281Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
282    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
283        b->CreateAssert(handle, "handle cannot be null");
284    }
285    Value * const ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::BaseAddress))});
286    return b->CreateLoad(ptr);
287}
288
289Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
290    if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
291    Value * maxAvail = b->CreateSub(getBufferedSize(b, handle), fromPosition);
292    return b->CreateSelect(b->CreateICmpULT(availItems, maxAvail), availItems, maxAvail);
293}
294
295Value * SourceBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
296    report_fatal_error("SourceBuffers cannot be written");
297}
298
299void SourceBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
300    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
301        Type * const ty = getType();
302        mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(ty, b->getSize(mBufferBlocks));
303        b->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, b->getCacheAlignment());
304    } else {
305        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
306    }
307}
308
309void SourceBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
310
311}
312
313// External File Buffer
314void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
315    report_fatal_error("External buffers cannot be allocated.");
316}
317
318void ExternalBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> &) const {
319
320}
321
322Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value * availItems, const bool reverse) const {
323    // All available items can be accessed.
324    return reverse ? ConstantInt::getAllOnesValue(availItems->getType()) : availItems;
325}
326
327Value * ExternalBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const, Value *, Value * fromPosition, const bool reverse) const {
328    // Trust that the buffer is large enough to write any amount
329    return reverse ? fromPosition : ConstantInt::getAllOnesValue(fromPosition->getType());
330}
331
332// Circular Buffer
333Value * CircularBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * const blockIndex) const {
334    return b->CreateGEP(getBaseAddress(b, handle), modBufferSize(b, blockIndex));
335}
336
337Value * CircularBuffer::getLinearlyCopyableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
338//    Constant * bufSize = ConstantInt::get(priorProduced->getType(), mBufferBlocks * b->getBitBlockWidth());
339//    Value * from = b->CreateURem(fromPosition, bufSize);
340//    Value * avail = b->CreateURem(availItems, bufSize);
341//    Value * wraparound = b->CreateICmpUGT(from, avail);
342
343
344    return nullptr;
345}
346
347Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
348    Value * ptr = getBaseAddress(b, handle);
349    Value * relativePosition = b->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * b->getBitBlockWidth()));
350    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
351    const auto bw = elemTy->getPrimitiveSizeInBits();
352    assert (is_power_2(bw));
353    if (bw < 8) {
354        Constant * const fw = ConstantInt::get(relativePosition->getType(), 8 / bw);
355        relativePosition = b->CreateUDiv(relativePosition, fw);
356        ptr = b->CreatePointerCast(ptr, b->getInt8PtrTy());
357    } else {
358        ptr = b->CreatePointerCast(ptr, elemTy->getPointerTo());
359    }
360    return b->CreateGEP(ptr, relativePosition);
361}
362
363// CircularCopybackBuffer Buffer
364void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
365    Type * const ty = getType();
366    Constant * size = ConstantExpr::getSizeOf(ty);
367    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
368    mStreamSetBufferPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(size), ty->getPointerTo());
369}
370
371Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
372    Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(b, handle, fromPosition, reverse);
373    if (reverse) return writableProper;
374    return b->CreateAdd(writableProper, b->getSize(mOverflowBlocks * b->getBitBlockWidth()));
375}
376
377void CircularCopybackBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * const handle, Value * priorProduced, Value * newProduced, const std::string Name) const {
378    assert (priorProduced->getType() == newProduced->getType());
379    Constant * bufSize = ConstantInt::get(priorProduced->getType(), mBufferBlocks * b->getBitBlockWidth());
380    Value * priorBufPos = b->CreateURem(priorProduced, bufSize);
381    Value * newBufPos = b->CreateURem(newProduced, bufSize);
382    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_circularCopyBack");
383    BasicBlock * done = b->CreateBasicBlock(Name + "_circularCopyBackDone");
384    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
385    b->CreateCondBr(wraparound, copyBack, done);
386
387    b->SetInsertPoint(copyBack);
388    Value * const baseAddress = getBaseAddress(b, handle);
389    Value * overflowAddress = b->CreateGEP(baseAddress, b->getInt32(mBufferBlocks));
390    // copyStream(b, baseAddress, b->getSize(0), overflowAddress, b->getSize(0), newBufPos);
391    createBlockAlignedCopy(b, baseAddress, overflowAddress, newBufPos);
392    b->CreateBr(done);
393
394    b->SetInsertPoint(done);
395}
396
397
398// SwizzledCopybackBuffer Buffer
399
400void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
401    Type * const ty = getType();
402    Constant * size = ConstantExpr::getSizeOf(ty);
403    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
404    mStreamSetBufferPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(size), ty->getPointerTo());
405}
406
407void SwizzledCopybackBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const b, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy, const unsigned alignment) const {
408    Type * int8PtrTy = b->getInt8PtrTy();
409    DataLayout DL(b->getModule());
410    IntegerType * const intAddrTy = b->getIntPtrTy(DL);
411
412    Constant * blockSize = ConstantInt::get(itemsToCopy->getType(), b->getBitBlockWidth());
413    Function * f = b->GetInsertBlock()->getParent();
414    BasicBlock * wholeBlockCopy = BasicBlock::Create(b->getContext(), "wholeBlockCopy", f, 0);
415    BasicBlock * partialBlockCopy = BasicBlock::Create(b->getContext(), "partialBlockCopy", f, 0);
416    BasicBlock * copyDone = BasicBlock::Create(b->getContext(), "copyDone", f, 0);
417    const unsigned numStreams = getType()->getArrayNumElements();
418    const unsigned swizzleFactor = b->getBitBlockWidth()/mFieldWidth;
419    const auto elemTy = getType()->getArrayElementType();
420    const unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
421    Value * blocksToCopy = b->CreateUDiv(itemsToCopy, blockSize);
422    Value * partialItems = b->CreateURem(itemsToCopy, blockSize);
423    Value * partialBlockTargetPtr = b->CreateGEP(targetBlockPtr, blocksToCopy);
424    Value * partialBlockSourcePtr = b->CreateGEP(sourceBlockPtr, blocksToCopy);
425    b->CreateCondBr(b->CreateICmpUGT(blocksToCopy, b->getSize(0)), wholeBlockCopy, partialBlockCopy);
426
427    b->SetInsertPoint(wholeBlockCopy);
428    Value * copyLength = b->CreateSub(b->CreatePtrToInt(partialBlockTargetPtr, intAddrTy), b->CreatePtrToInt(targetBlockPtr, intAddrTy));
429    b->CreateMemCpy(b->CreatePointerCast(targetBlockPtr, int8PtrTy), b->CreatePointerCast(sourceBlockPtr, int8PtrTy), copyLength, alignment);
430    b->CreateCondBr(b->CreateICmpUGT(partialItems, b->getSize(0)), partialBlockCopy, copyDone);
431
432    b->SetInsertPoint(partialBlockCopy);
433    Value * copyBits = b->CreateMul(itemsToCopy, b->getSize(fieldWidth * swizzleFactor));
434    Value * copyBytes = b->CreateLShr(b->CreateAdd(copyBits, b->getSize(7)), b->getSize(3));
435    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
436        Value * strmTargetPtr = b->CreateGEP(partialBlockTargetPtr, {b->getInt32(0), b->getInt32(strm)});
437        Value * strmSourcePtr = b->CreateGEP(partialBlockSourcePtr, {b->getInt32(0), b->getInt32(strm)});
438        b->CreateMemCpy(b->CreatePointerCast(strmTargetPtr, int8PtrTy), b->CreatePointerCast(strmSourcePtr, int8PtrTy), copyBytes, alignment);
439    }
440    b->CreateBr(copyDone);
441
442    b->SetInsertPoint(copyDone);
443}
444
445Value * SwizzledCopybackBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * blockIndex) const {
446    return b->CreateGEP(getBaseAddress(b, handle), modBufferSize(b, blockIndex));
447}
448
449Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
450    Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(b, handle, fromPosition, reverse);
451    if (reverse) return writableProper;
452    return b->CreateAdd(writableProper, b->getSize(mOverflowBlocks * b->getBitBlockWidth()));
453}
454
455void SwizzledCopybackBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * const handle, Value * priorProduced, Value * newProduced, const std::string Name) const {
456    assert (priorProduced->getType() == newProduced->getType());
457    Constant * bufSize = ConstantInt::get(priorProduced->getType(), mBufferBlocks * b->getBitBlockWidth());
458    Value * priorBufPos = b->CreateURem(priorProduced, bufSize);
459    Value * newBufPos = b->CreateURem(newProduced, bufSize);
460    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_swizzledCopyBack");
461    BasicBlock * done = b->CreateBasicBlock(Name + "_swizzledCopyBackDone");
462    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
463    b->CreateCondBr(wraparound, copyBack, done);
464    b->SetInsertPoint(copyBack);
465    Value * overFlowAreaPtr = b->CreateGEP(handle, b->getSize(mBufferBlocks));
466    createBlockAlignedCopy(b, handle, overFlowAreaPtr, newBufPos);
467    b->CreateBr(done);
468    b->SetInsertPoint(done);
469}
470
471// Expandable Buffer
472
473void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
474    mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(getType());
475    Value * const capacityPtr = b->CreateGEP(mStreamSetBufferPtr, {b->getInt32(0), b->getInt32(0)});
476    b->CreateStore(b->getSize(mInitialCapacity), capacityPtr);
477    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
478    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), b->getSizeTy(), false);
479    Constant * const size = ConstantExpr::getMul(b->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
480    const auto alignment = std::max(b->getCacheAlignment(), b->getBitBlockWidth() / 8);
481    Value * const ptr = b->CreateAlignedMalloc(size, alignment);
482    b->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
483    Value * const streamSetPtr = b->CreateGEP(mStreamSetBufferPtr, {b->getInt32(0), b->getInt32(1)});
484    b->CreateStore(b->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
485}
486
487std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
488
489    // ENTRY
490    Value * const capacityPtr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(0)});
491    Value * const capacity = b->CreateLoad(capacityPtr);
492    Value * const streamSetPtr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(1)});
493    Value * const streamSet = b->CreateLoad(streamSetPtr);
494    blockIndex = modBufferSize(b, blockIndex);
495
496    assert (streamIndex->getType() == capacity->getType());
497    Value * const cond = b->CreateICmpULT(streamIndex, capacity);
498
499    // Are we guaranteed that we can access this stream?
500    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
501        if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
502            b->CreateAssert(cond, "out-of-bounds stream access");
503        }
504        Value * offset = b->CreateAdd(b->CreateMul(blockIndex, capacity), streamIndex);
505        return {streamSet, offset};
506    }
507
508    BasicBlock * const entry = b->GetInsertBlock();
509    BasicBlock * const expand = BasicBlock::Create(b->getContext(), "expand", entry->getParent());
510    BasicBlock * const resume = BasicBlock::Create(b->getContext(), "resume", entry->getParent());
511
512    b->CreateLikelyCondBr(cond, resume, expand);
513
514    // EXPAND
515    b->SetInsertPoint(expand);
516
517    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
518    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
519
520    Value * newCapacity = b->CreateAdd(streamIndex, b->getSize(1));
521    newCapacity = b->CreateCeilLog2(newCapacity);
522    newCapacity = b->CreateShl(b->getSize(1), newCapacity, "newCapacity");
523
524    std::string tmp;
525    raw_string_ostream out(tmp);
526    out << "__expand";
527    elementType->print(out);
528    std::string name = out.str();
529
530    Module * const m = b->getModule();
531    Function * expandFunction = m->getFunction(name);
532
533    if (expandFunction == nullptr) {
534
535        const auto ip = b->saveIP();
536
537        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), b->getSizeTy(), b->getSizeTy()}, false);
538        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
539
540        auto args = expandFunction->arg_begin();
541        Value * streamSet = &*args++;
542        Value * capacity = &*args++;
543        Value * newCapacity = &*args;
544
545        BasicBlock * entry = BasicBlock::Create(b->getContext(), "entry", expandFunction);
546        b->SetInsertPoint(entry);
547
548        Value * size = b->CreateMul(newCapacity, b->getSize(mBufferBlocks));
549        const auto memAlign = std::max(b->getCacheAlignment(), b->getBitBlockWidth() / 8);
550
551        Value * newStreamSet = b->CreatePointerCast(b->CreateAlignedMalloc(b->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
552        Value * const diffCapacity = b->CreateMul(b->CreateSub(newCapacity, capacity), vectorWidth);
553
554        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
555        for (unsigned i = 0; i < mBufferBlocks; ++i) {
556            ConstantInt * const offset = b->getSize(i);
557            Value * srcOffset = b->CreateMul(capacity, offset);
558            Value * srcPtr = b->CreateGEP(streamSet, srcOffset);
559            Value * destOffset = b->CreateMul(newCapacity, offset);
560            Value * destPtr = b->CreateGEP(newStreamSet, destOffset);
561            b->CreateMemCpy(destPtr, srcPtr, b->CreateMul(capacity, vectorWidth), alignment);
562            Value * destZeroOffset = b->CreateAdd(destOffset, capacity);
563            Value * destZeroPtr = b->CreateGEP(newStreamSet, destZeroOffset);
564            b->CreateMemZero(destZeroPtr, diffCapacity, alignment);
565        }
566
567        b->CreateFree(streamSet);
568
569        b->CreateRet(newStreamSet);
570
571        b->restoreIP(ip);
572    }
573
574    Value * newStreamSet = b->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
575    b->CreateStore(newStreamSet, streamSetPtr);
576    b->CreateStore(newCapacity, capacityPtr);
577
578    b->CreateBr(resume);
579
580    // RESUME
581    b->SetInsertPoint(resume);
582
583    PHINode * phiStreamSet = b->CreatePHI(streamSet->getType(), 2);
584    phiStreamSet->addIncoming(streamSet, entry);
585    phiStreamSet->addIncoming(newStreamSet, expand);
586
587    PHINode * phiCapacity = b->CreatePHI(capacity->getType(), 2);
588    phiCapacity->addIncoming(capacity, entry);
589    phiCapacity->addIncoming(newCapacity, expand);
590
591    Value * offset = b->CreateAdd(b->CreateMul(blockIndex, phiCapacity), streamIndex);
592
593    return {phiStreamSet, offset};
594}
595
596Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
597    report_fatal_error("temporarily not supported");
598//    Value * ptr, * offset;
599//    std::tie(ptr, offset) = getInternalStreamBuffer(b, handle, streamIndex, blockIndex, readOnly);
600//    return b->CreateGEP(ptr, offset);
601}
602
603Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
604    report_fatal_error("temporarily not supported");
605//    Value * ptr, * offset;
606//    std::tie(ptr, offset) = getInternalStreamBuffer(b, handle, streamIndex, blockIndex, readOnly);
607//    return b->CreateGEP(ptr, {offset, packIndex});
608}
609
610Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const b, Value * const handle) const {
611    return b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(0)}));
612}
613
614Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
615    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
616        b->CreateAssert(handle, "handle cannot be null");
617    }
618    Value * const baseAddr = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(1)}));
619    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
620        b->CreateAssert(handle, "base address cannot be 0");
621    }
622    return baseAddr;
623}
624
625void ExpandableBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
626    b->CreateFree(getBaseAddress(b.get(), mStreamSetBufferPtr));
627}
628
629Value * ExpandableBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value *, Value *) const {
630    report_fatal_error("Expandable buffers: getBlockAddress is not supported.");
631}
632
633Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value *, bool) const {
634    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
635}
636
637
638Value * DynamicBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
639    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
640        b->CreateAssert(handle, "handle cannot be null");
641    }
642    Value * const p = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
643    Value * const addr = b->CreateLoad(p);
644    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
645        b->CreateAssert(addr, "base address cannot be 0");
646    }
647    return addr;
648}
649
650Value * DynamicBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * blockIndex) const {
651    Value * const workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
652    assert (blockIndex->getType() == workingBlocks->getType());
653    return b->CreateGEP(getBaseAddress(b, handle), b->CreateURem(blockIndex, workingBlocks));
654}
655
656Value * DynamicBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
657    Constant * blockSize = ConstantInt::get(absolutePosition->getType(), b->getBitBlockWidth());
658    Value * const absBlock = b->CreateUDiv(absolutePosition, blockSize);
659    Value * blockPos = b->CreateURem(absolutePosition, blockSize);
660    Value * blockPtr = getBlockAddress(b, handle, absBlock);
661    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
662    const auto bw = elemTy->getPrimitiveSizeInBits();
663    assert (is_power_2(bw));
664    if (bw < 8) {
665        blockPos = b->CreateUDiv(blockPos, ConstantInt::get(blockPos->getType(), 8 / bw));
666        blockPtr = b->CreatePointerCast(blockPtr, b->getInt8PtrTy());
667    } else {
668        blockPtr = b->CreatePointerCast(blockPtr, elemTy->getPointerTo());
669    }
670    return b->CreateGEP(blockPtr, blockPos);
671}
672
673
674Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
675    Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
676    Constant * blockSize = ConstantInt::get(bufBlocks->getType(), b->getBitBlockWidth());
677    Value * bufSize = b->CreateMul(bufBlocks, blockSize);
678    assert (bufSize->getType() == fromPosition->getType());
679    Value * itemsFromBase = b->CreateURem(fromPosition, bufSize);
680    if (reverse) {
681        Value * bufAvail = b->CreateSelect(b->CreateICmpEQ(itemsFromBase, b->getSize(0)), bufSize, itemsFromBase);
682        return b->CreateSelect(b->CreateICmpULT(bufAvail, availItems), bufAvail, availItems);
683    } else {
684        Value * linearSpace = b->CreateSub(bufSize, itemsFromBase, "linearSpace");
685        return b->CreateSelect(b->CreateICmpULT(availItems, linearSpace), availItems, linearSpace);
686    }
687}
688
689Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
690    Value * bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
691    Constant * blockSize = ConstantInt::get(bufBlocks->getType(), b->getBitBlockWidth());
692    Value * bufSize = b->CreateMul(bufBlocks, blockSize);
693    assert (bufSize->getType() == fromPosition->getType());
694    Value * bufRem = b->CreateURem(fromPosition, bufSize);
695    if (reverse) {
696        return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufSize, bufRem);
697    }
698    Constant * overflow = ConstantInt::get(bufBlocks->getType(), mOverflowBlocks);
699    bufSize = b->CreateMul(b->CreateAdd(bufBlocks, overflow), blockSize);
700    return b->CreateSub(bufSize, bufRem, "linearWritable");
701}
702
703Value * DynamicBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle) const {
704    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))});
705    return b->CreateMul(b->CreateLoad(ptr), b->getSize(b->getBitBlockWidth()));
706}
707
708void DynamicBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * const handle, Value * priorProducedCount, Value * newProducedCount, const std::string Name) const {
709    assert (priorProducedCount->getType() == newProducedCount->getType());   
710    Value * workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
711    assert (workingBlocks->getType() == newProducedCount->getType());
712    Value * bufSize = b->CreateMul(workingBlocks, ConstantInt::get(workingBlocks->getType(), b->getBitBlockWidth()));
713    Value * priorBufPos = b->CreateURem(priorProducedCount, bufSize);
714    Value * newBufPos = b->CreateURem(newProducedCount, bufSize);
715    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_dynamicCopyBack");
716    BasicBlock * done = b->CreateBasicBlock(Name + "_dynamicCopyBackDone");
717
718    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
719    b->CreateCondBr(wraparound, copyBack, done);
720
721    b->SetInsertPoint(copyBack);
722    Value * bufBasePtr = getBaseAddress(b, handle);
723    Value * overFlowAreaPtr = b->CreateGEP(bufBasePtr, workingBlocks);
724    createBlockAlignedCopy(b, bufBasePtr, overFlowAreaPtr, newBufPos);
725    b->CreateBr(done);
726
727    b->SetInsertPoint(done);
728}
729
730void DynamicBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
731    Value * const handle = b->CreateCacheAlignedAlloca(mBufferStructType);
732    size_t numStreams = 1;
733    if (isa<ArrayType>(mBaseType)) {
734        numStreams = mBaseType->getArrayNumElements();
735    }
736    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
737    Value * bufSize = b->getSize((mBufferBlocks + mOverflowBlocks) * b->getBitBlockWidth() * numStreams * fieldWidth/8);
738    bufSize = b->CreateRoundUp(bufSize, b->getSize(b->getCacheAlignment()));
739    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::BaseAddress))});
740    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
741    Value * bufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufSize), bufPtrType);
742    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
743        b->CallPrintInt("allocated: ", bufPtr);
744        b->CallPrintInt("allocated capacity: ", bufSize);
745    }
746    b->CreateStore(bufPtr, bufBasePtrField);
747    b->CreateStore(ConstantPointerNull::getNullValue(bufPtrType), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))}));
748    b->CreateStore(bufSize, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::AllocatedCapacity))}));
749    b->CreateStore(b->getSize(mBufferBlocks), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
750    b->CreateStore(b->getSize(-1), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::Length))}));
751    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ProducedPosition))}));
752    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ConsumedPosition))}));
753    mStreamSetBufferPtr = handle;
754}
755
756void DynamicBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
757    Value * const handle = mStreamSetBufferPtr;
758    /* Free the dynamically allocated buffer, but not the stack-allocated buffer struct. */
759    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
760    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
761    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
762    BasicBlock * freePrior = b->CreateBasicBlock("freePrior");
763    BasicBlock * freeCurrent = b->CreateBasicBlock("freeCurrent");
764    Value * priorBuf = b->CreateLoad(priorBasePtrField);
765    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
766    b->CreateCondBr(priorBufIsNonNull, freePrior, freeCurrent);
767    b->SetInsertPoint(freePrior);
768    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
769        b->CallPrintInt("releasing: ", priorBuf);
770    }
771    b->CreateFree(priorBuf);
772    b->CreateBr(freeCurrent);
773    b->SetInsertPoint(freeCurrent);
774    b->CreateFree(b->CreateLoad(bufBasePtrField));
775}
776
777//
778//  Simple capacity doubling.  Use the circular buffer property: duplicating buffer data
779//  ensures that we have correct data.   TODO: consider optimizing based on actual
780//  consumer and producer positions.
781//
782void DynamicBuffer::doubleCapacity(IDISA::IDISA_Builder * const b, Value * const handle) {
783    size_t numStreams = 1;
784    if (isa<ArrayType>(mBaseType)) {
785        numStreams = mBaseType->getArrayNumElements();
786    }
787    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
788    Constant * blockBytes = b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8);
789    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
790    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
791    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
792    Value * workingBlocksField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))});
793    Value * capacityField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::AllocatedCapacity))});
794
795    Value * oldBufPtr = b->CreateLoad(bufBasePtrField);
796    Value * currentWorkingBlocks = b->CreateLoad(workingBlocksField);
797    Value * workingBytes = b->CreateMul(currentWorkingBlocks, blockBytes);
798    Value * const curAllocated = b->CreateLoad(capacityField);
799    Value * neededCapacity = b->CreateAdd(workingBytes, workingBytes);
800    if (mOverflowBlocks > 0) {
801        Constant * overflowBytes = b->getSize(mOverflowBlocks * b->getBitBlockWidth() * numStreams * fieldWidth/8);
802        neededCapacity = b->CreateAdd(neededCapacity, overflowBytes);
803    }
804    neededCapacity = b->CreateRoundUp(neededCapacity, b->getSize(b->getCacheAlignment()));
805    BasicBlock * doubleEntry = b->GetInsertBlock();
806    BasicBlock * doRealloc = b->CreateBasicBlock("doRealloc");
807    BasicBlock * doCopy2 = b->CreateBasicBlock("doCopy2");
808    b->CreateCondBr(b->CreateICmpULT(curAllocated, neededCapacity), doRealloc, doCopy2);
809    b->SetInsertPoint(doRealloc);
810    // If there is a non-null priorBasePtr, free it.
811    Value * priorBuf = b->CreateLoad(priorBasePtrField);
812    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
813    BasicBlock * deallocatePrior = b->CreateBasicBlock("deallocatePrior");
814    BasicBlock * allocateNew = b->CreateBasicBlock("allocateNew");
815    b->CreateCondBr(priorBufIsNonNull, deallocatePrior, allocateNew);
816    b->SetInsertPoint(deallocatePrior);
817    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
818        b->CallPrintInt("deallocating: ", priorBuf);
819    }
820    b->CreateFree(priorBuf);
821    b->CreateBr(allocateNew);
822    b->SetInsertPoint(allocateNew);
823    b->CreateStore(oldBufPtr, priorBasePtrField);
824    Value * newBufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(neededCapacity), bufPtrType);
825    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
826        b->CallPrintInt("re-allocated: ", newBufPtr);
827        b->CallPrintInt("allocated capacity: ", neededCapacity);
828    }
829    b->CreateStore(newBufPtr, bufBasePtrField);
830    createBlockCopy(b, newBufPtr, oldBufPtr, currentWorkingBlocks);
831    b->CreateStore(neededCapacity, capacityField);
832    b->CreateBr(doCopy2);
833    b->SetInsertPoint(doCopy2);
834    PHINode * bufPtr = b->CreatePHI(oldBufPtr->getType(), 2);
835    bufPtr->addIncoming(oldBufPtr, doubleEntry);
836    bufPtr->addIncoming(newBufPtr, allocateNew);
837    createBlockCopy(b, b->CreateGEP(bufPtr, currentWorkingBlocks), bufPtr, currentWorkingBlocks);
838    currentWorkingBlocks = b->CreateAdd(currentWorkingBlocks, currentWorkingBlocks);
839    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
840        b->CallPrintInt("currentWorkingBlocks: ", currentWorkingBlocks);
841    }
842    b->CreateStore(currentWorkingBlocks, workingBlocksField);
843}
844
845inline StructType * getSourceBufferType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * const type, const unsigned MemoryAddressSpace) {
846    return StructType::get(b->getContext(), {resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), b->getSizeTy()});
847}
848
849SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
850: StreamSetBuffer(BufferKind::SourceBuffer, type, getSourceBufferType(b, type, MemoryAddressSpace), 0, 0, StructAddressSpace) {
851    mUniqueID = "B";
852    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
853        mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
854    }
855}
856
857ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, Value * addr, unsigned AddressSpace)
858: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, 0, AddressSpace) {
859    mUniqueID = "E";
860    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
861    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
862}
863
864CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
865: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, 0, AddressSpace) {
866    mUniqueID = "C" + std::to_string(bufferBlocks);
867    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
868}
869
870CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
871: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, overflowBlocks, AddressSpace) {
872
873}
874
875CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
876: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, overflowBlocks, AddressSpace) {
877    if (bufferBlocks < 2 * overflowBlocks) {
878        report_fatal_error("CircularCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
879    }
880    mUniqueID = "CC" + std::to_string(bufferBlocks);
881    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
882    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
883}
884
885ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
886: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, 0, AddressSpace)
887, mInitialCapacity(type->getArrayNumElements()) {
888    mUniqueID = "XP" + std::to_string(bufferBlocks);
889    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
890}
891
892SwizzledCopybackBuffer::SwizzledCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
893: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, overflowBlocks, AddressSpace), mFieldWidth(fieldwidth) {
894    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
895    if (bufferBlocks < 2 * overflowBlocks) {
896        report_fatal_error("SwizzledCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
897    }
898    if (overflowBlocks != 1) {
899        mUniqueID += "_" + std::to_string(mOverflowBlocks);
900    }
901    if (AddressSpace > 0) {
902        mUniqueID += "@" + std::to_string(AddressSpace);
903    }
904}
905
906inline StructType * getDynamicBufferStructType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * baseType, const unsigned addrSpace) {
907    IntegerType * sizeTy = b->getSizeTy();
908    PointerType * typePtr = baseType->getPointerTo(addrSpace);
909    return StructType::get(b->getContext(), {typePtr, typePtr, sizeTy, sizeTy, sizeTy, sizeTy, sizeTy});
910}
911
912DynamicBuffer::DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t initialCapacity, size_t overflow, unsigned swizzle, unsigned addrSpace)
913: StreamSetBuffer(BufferKind::DynamicBuffer, type, resolveStreamSetType(b, type), initialCapacity, overflow, addrSpace)
914, mBufferStructType(getDynamicBufferStructType(b, mType, addrSpace))
915, mSwizzleFactor(swizzle) {
916    if (initialCapacity * b->getBitBlockWidth() < 2 * overflow) {
917        report_fatal_error("DynamicBuffer: initialCapacity * b->getBitBlockWidth() < 2 * overflow");
918    }
919    mUniqueID = "DB";
920    if (swizzle != 1) {
921        mUniqueID += "s" + std::to_string(swizzle);
922    }
923        if (overflow != 0) {
924        mUniqueID += "o" + std::to_string(overflow);
925    }
926    if (addrSpace != 0) {
927        mUniqueID += "@" + std::to_string(addrSpace);
928    }
929}
930
931
932inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned OverflowBlocks, unsigned AddressSpace)
933: mBufferKind(k)
934, mType(resolvedType)
935, mBufferBlocks(BufferBlocks)
936, mOverflowBlocks(OverflowBlocks)
937, mAddressSpace(AddressSpace)
938, mStreamSetBufferPtr(nullptr)
939, mBaseType(baseType)
940, mProducer(nullptr) {
941    assert((k == BufferKind::SourceBuffer || k == BufferKind::ExternalBuffer) ^ (BufferBlocks > 0));
942    assert ("A zero length buffer cannot have overflow blocks!" && ((BufferBlocks > 0) || (OverflowBlocks == 0)));
943}
944
945StreamSetBuffer::~StreamSetBuffer() { }
946
947// Helper routines
948ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
949    unsigned numElements = 1;
950    if (LLVM_LIKELY(type->isArrayTy())) {
951        numElements = type->getArrayNumElements();
952        type = type->getArrayElementType();
953    }
954    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
955        type = type->getVectorElementType();
956        if (LLVM_LIKELY(type->isIntegerTy())) {
957            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
958            type = b->getBitBlockType();
959            if (fieldWidth != 1) {
960                type = ArrayType::get(type, fieldWidth);
961            }
962            return ArrayType::get(type, numElements);
963        }
964    }
965    std::string tmp;
966    raw_string_ostream out(tmp);
967    type->print(out);
968    out << " is an unvalid stream set buffer type.";
969    report_fatal_error(out.str());
970}
971
972StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
973    if (LLVM_LIKELY(type->isArrayTy())) {
974        type = type->getArrayElementType();
975    }
976    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
977        type = type->getVectorElementType();
978        if (LLVM_LIKELY(type->isIntegerTy())) {
979            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
980            type = b->getBitBlockType();
981            if (fieldWidth != 1) {
982                type = ArrayType::get(type, fieldWidth);
983            }
984            return StructType::get(b->getContext(), {b->getSizeTy(), type->getPointerTo()});
985        }
986    }
987    std::string tmp;
988    raw_string_ostream out(tmp);
989    type->print(out);
990    out << " is an unvalid stream set buffer type.";
991    report_fatal_error(out.str());
992}
Note: See TracBrowser for help on using the repository browser.