source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5733

Last change on this file since 5733 was 5733, checked in by cameron, 18 months ago

Changes for compatibility with LLVM 5.0.0

File size: 51.0 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <llvm/IR/Module.h>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/kernel.h>
10#include <kernels/kernel_builder.h>
11#include <toolchain/toolchain.h>
12#include <llvm/Support/Debug.h>
13#include <llvm/Support/Format.h>
14
15namespace llvm { class Constant; }
16namespace llvm { class Function; }
17
18using namespace parabix;
19using namespace llvm;
20using namespace IDISA;
21
22
23Type * StreamSetBuffer::getStreamSetBlockType() const { return mType;}
24
25ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
26
27StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
28
29void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
30    assert (mBufferBlocks > 0);
31    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
32        Type * const ty = getType();
33        if (mAddressSpace == 0) {
34            Constant * size = ConstantExpr::getSizeOf(ty);
35            size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks));
36            mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
37        } else {
38            mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
39        }
40        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
41    } else {
42        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
43    }
44}
45
46void StreamSetBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const {
47    if (mAddressSpace == 0) {
48        iBuilder->CreateFree(mStreamSetBufferPtr);
49    }
50}
51
52Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * addr, Value * streamIndex, const bool /* readOnly */) const {
53    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
54        Value * const count = getStreamSetCount(iBuilder, self);
55        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
56        Value * const cond = iBuilder->CreateICmpULT(index, count);
57        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
58    }
59    return iBuilder->CreateGEP(addr, {iBuilder->getInt32(0), streamIndex});
60}
61
62Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * addr, Value * streamIndex, Value * packIndex, const bool /* readOnly */) const {
63    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
64        Value * const count = getStreamSetCount(iBuilder, self);
65        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
66        Value * const cond = iBuilder->CreateICmpULT(index, count);
67        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
68    }
69    return iBuilder->CreateGEP(addr, {iBuilder->getInt32(0), streamIndex, packIndex});
70}
71
72void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const /* iBuilder */, Value * /* self */, Value * /* addr */) const {
73    report_fatal_error("setBaseAddress is not supported by this buffer type");
74}
75
76Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const /* iBuilder */, Value * /* self */) const {
77    report_fatal_error("getBufferedSize is not supported by this buffer type");
78}
79
80void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const /* iBuilder */, Value * /* self */, llvm::Value * /* size */) const {
81    report_fatal_error("setBufferedSize is not supported by this buffer type");
82}
83
84Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
85    return getBufferedSize(iBuilder, self);
86}
87
88void StreamSetBuffer::setCapacity(IDISA::IDISA_Builder * const /* iBuilder */, Value * /* self */, llvm::Value * /* c */) const {
89    report_fatal_error("setCapacity is not supported by this buffer type");
90}
91
92inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
93    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
94        if (LLVM_LIKELY(cast<ConstantInt>(index)->getLimitedValue() < capacity)) {
95            return true;
96        }
97    }
98    return false;
99}
100
101Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value *) const {
102    size_t count = 1;
103    if (isa<ArrayType>(mBaseType)) {
104        count = mBaseType->getArrayNumElements();
105    }
106    return iBuilder->getSize(count);
107}
108
109inline Value * StreamSetBuffer::modByBufferBlocks(IDISA::IDISA_Builder * const iBuilder, Value * const offset) const {
110    assert (offset->getType()->isIntegerTy());
111    if (isCapacityGuaranteed(offset, mBufferBlocks)) {
112        return offset;
113    } else if (mBufferBlocks == 1) {
114        return ConstantInt::getNullValue(iBuilder->getSizeTy());
115    } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
116        return iBuilder->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
117    } else {
118        return iBuilder->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
119    }
120}
121
122/**
123 * @brief getRawItemPointer
124 *
125 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
126 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
127 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
128 */
129Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * absolutePosition) const {
130    Value * ptr = getBaseAddress(iBuilder, self);
131    Value * relativePosition = absolutePosition;
132    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
133    if (bw < 8) {
134        assert (bw  == 1 || bw == 2 || bw == 4);
135        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
136        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
137    } else {
138        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
139    }
140    return iBuilder->CreateGEP(ptr, relativePosition);
141}
142
143Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * self, Value * fromPosition, Value * availItems, bool reverse) const {
144    Constant * bufSize = ConstantInt::get(fromPosition->getType(), mBufferBlocks * b->getStride());
145    Value * itemsFromBase = b->CreateURem(fromPosition, bufSize);
146    if (reverse) {
147        Value * bufAvail = b->CreateSelect(b->CreateICmpEQ(itemsFromBase, b->getSize(0)), bufSize, itemsFromBase);
148        return b->CreateSelect(b->CreateICmpULT(bufAvail, availItems), bufAvail, availItems);
149    } else {
150        Value * linearSpace = b->CreateSub(bufSize, itemsFromBase, "linearSpace");
151        return b->CreateSelect(b->CreateICmpULT(availItems, linearSpace), availItems, linearSpace);
152    }
153}
154
155Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
156    Constant * bufSize = ConstantInt::get(fromPosition->getType(), mBufferBlocks * iBuilder->getStride());
157    Value * bufRem = iBuilder->CreateURem(fromPosition, bufSize);
158    if (reverse) {
159        return iBuilder->CreateSelect(iBuilder->CreateICmpEQ(bufRem, iBuilder->getSize(0)), bufSize, bufRem);
160    }
161    return iBuilder->CreateSub(bufSize, bufRem, "linearSpace");
162}
163
164Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
165    iBuilder->CreateAssert(self, "StreamSetBuffer base address cannot be 0");
166    return self;
167}
168
169void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
170    Type * i8ptr = iBuilder->getInt8PtrTy();
171    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
172    size_t numStreams = 1;
173    if (isa<ArrayType>(mBaseType)) {
174        numStreams = mBaseType->getArrayNumElements();
175    }
176    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
177    Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
178    iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
179}
180
181Value * StreamSetBuffer::copy(IDISA::IDISA_Builder * const b, Value * self, Value * const target, Value * const source, Value * itemsToCopy, const unsigned alignment) const {
182    Type * ty = getBaseType();
183    if (LLVM_LIKELY(isa<ArrayType>(ty))) {
184        ty = ty->getArrayElementType();
185    }
186    if (LLVM_LIKELY(isa<VectorType>(ty))) {
187        ty = ty->getVectorElementType();
188    }
189    const auto itemWidth = ty->getScalarSizeInBits();
190    assert (itemWidth > 0);
191    Value * const m = b->CreateMul(getStreamSetCount(b, self), b->getSize(itemWidth / 8));
192    Value * const bytesToCopy = b->CreateMul(itemsToCopy, m);
193
194    // TODO: lz4d s2p reads misaligned data into the source stream. The stream binding should indicate alignment.
195    // alignment ? alignment : b->getBitBlockWidth() / 8
196    b->CreateMemCpy(target, source, bytesToCopy, 1);
197    return bytesToCopy;
198}
199
200void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
201    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
202    Constant * const blockSize = ConstantInt::get(itemsToCopy->getType(), iBuilder->getBitBlockWidth());
203    size_t numStreams = 1;
204    if (isa<ArrayType>(mBaseType)) {
205        numStreams = mBaseType->getArrayNumElements();
206    }
207    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
208    if (numStreams == 1) {
209        Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth));
210        Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
211        iBuilder->CreateMemMove(targetBlockPtr, sourceBlockPtr, copyBytes, alignment);
212    } else {
213        Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
214        Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
215        Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
216        Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
217        Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
218        iBuilder->CreateMemMove(targetBlockPtr, sourceBlockPtr, blockCopyBytes, alignment);
219        Value * partialCopyBitsPerStream = iBuilder->CreateMul(partialItems, iBuilder->getSize(fieldWidth));
220        Value * partialCopyBytesPerStream = iBuilder->CreateLShr(iBuilder->CreateAdd(partialCopyBitsPerStream, iBuilder->getSize(7)), iBuilder->getSize(3));
221        for (unsigned i = 0; i < numStreams; i++) {
222            Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
223            Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
224            iBuilder->CreateMemMove(strmTargetPtr, strmSourcePtr, partialCopyBytesPerStream, alignment);
225        }
226    }
227}
228
229void StreamSetBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProduced, Value * newProduced, const std::string Name) const {
230    report_fatal_error("Copy back not supported for this buffer type:" + Name);
231}
232
233// Source File Buffer
234
235Type * SourceBuffer::getStreamSetBlockType() const {
236    return cast<PointerType>(mType->getStructElementType(int(SourceBuffer::Field::BaseAddress)))->getElementType();
237}
238
239
240Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
241    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
242    return iBuilder->CreateLoad(ptr);
243}
244
245void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * size) const {
246    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
247    iBuilder->CreateStore(size, ptr);
248}
249
250Value * SourceBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
251    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
252    return iBuilder->CreateLoad(ptr);
253}
254
255void SourceBuffer::setCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * c) const {
256    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
257    iBuilder->CreateStore(c, ptr);
258}
259
260void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * addr) const {
261    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
262    iBuilder->CreateStore(iBuilder->CreatePointerCast(addr, ptr->getType()->getPointerElementType()), ptr);
263}
264
265Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * const self) const {
266    iBuilder->CreateAssert(self, "SourceBuffer: instance cannot be null");
267    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
268    Value * const addr = iBuilder->CreateLoad(ptr);
269    iBuilder->CreateAssert(addr, "SourceBuffer: base address cannot be 0");
270    return addr;
271}
272
273Value * SourceBuffer::getBlockAddress(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
274    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex );
275}
276
277Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, Value * availItems, bool reverse) const {
278    if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
279    Value * maxAvail = iBuilder->CreateSub(getBufferedSize(iBuilder, self), fromPosition);
280    return iBuilder->CreateSelect(iBuilder->CreateICmpULT(availItems, maxAvail), availItems, maxAvail);
281}
282
283Value * SourceBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
284    report_fatal_error("SourceBuffers cannot be written");
285}
286
287void SourceBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
288    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
289        Type * const ty = getType();
290        mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
291        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
292    } else {
293        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
294    }
295}
296
297void SourceBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const {
298
299}
300
301// External File Buffer
302void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
303    report_fatal_error("External buffers cannot be allocated.");
304}
305
306void ExternalBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> &) const {
307
308}
309
310Value * ExternalBuffer::getBlockAddress(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
311    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
312}
313
314Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value * availItems, const bool reverse) const {
315    // All available items can be accessed.
316    return reverse ? ConstantInt::getAllOnesValue(availItems->getType()) : availItems;
317}
318
319Value * ExternalBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const, Value *, Value * fromPosition, const bool reverse) const {
320    // Trust that the buffer is large enough to write any amount
321    return reverse ? fromPosition : ConstantInt::getAllOnesValue(fromPosition->getType());
322}
323
324// Circular Buffer
325Value * CircularBuffer::getBlockAddress(IDISA::IDISA_Builder * const iBuilder, Value * const self, Value * const blockIndex) const {
326    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
327}
328
329Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * absolutePosition) const {
330    Value * ptr = getBaseAddress(iBuilder, self);
331    Value * relativePosition = iBuilder->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * iBuilder->getBitBlockWidth()));
332    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
333    if (bw < 8) {
334        assert (bw  == 1 || bw == 2 || bw == 4);
335        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
336        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
337    } else {
338        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
339    }
340    return iBuilder->CreateGEP(ptr, relativePosition);
341}
342
343// CircularCopybackBuffer Buffer
344void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
345    Type * const ty = getType();
346    Constant * size = ConstantExpr::getSizeOf(ty);
347    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
348    mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
349}
350
351Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
352    Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(iBuilder, self, fromPosition, reverse);
353    if (reverse) return writableProper;
354    return iBuilder->CreateAdd(writableProper, iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
355}
356
357void CircularCopybackBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProduced, Value * newProduced, const std::string Name) const {
358    assert (priorProduced->getType() == newProduced->getType());
359    Constant * bufSize = ConstantInt::get(priorProduced->getType(), mBufferBlocks * b->getBitBlockWidth());
360    Value * priorBufPos = b->CreateURem(priorProduced, bufSize);
361    Value * newBufPos = b->CreateURem(newProduced, bufSize);
362    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_circularCopyBack");
363    BasicBlock * done = b->CreateBasicBlock(Name + "_circularCopyBackDone");
364    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
365    b->CreateCondBr(wraparound, copyBack, done);
366
367    b->SetInsertPoint(copyBack);
368    Value * overFlowAreaPtr = b->CreateGEP(handle, b->getInt32(mBufferBlocks));
369    createBlockAlignedCopy(b, handle, overFlowAreaPtr, newBufPos);
370    b->CreateBr(done);
371
372    b->SetInsertPoint(done);
373}
374
375
376// SwizzledCopybackBuffer Buffer
377
378void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
379    Type * const ty = getType();
380    Constant * size = ConstantExpr::getSizeOf(ty);
381    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
382    mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
383}
384
385void SwizzledCopybackBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
386    Type * int8PtrTy = iBuilder->getInt8PtrTy();
387    DataLayout DL(iBuilder->getModule());
388    IntegerType * const intAddrTy = iBuilder->getIntPtrTy(DL);
389
390    Constant * blockSize = ConstantInt::get(itemsToCopy->getType(), iBuilder->getBitBlockWidth());
391    Function * f = iBuilder->GetInsertBlock()->getParent();
392    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
393    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
394    BasicBlock * copyDone = BasicBlock::Create(iBuilder->getContext(), "copyDone", f, 0);
395    const unsigned numStreams = getType()->getArrayNumElements();
396    const unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
397    const auto elemTy = getType()->getArrayElementType();
398    const unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
399    Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
400    Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
401    Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
402    Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
403    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(blocksToCopy, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
404
405    iBuilder->SetInsertPoint(wholeBlockCopy);
406    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
407    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, intAddrTy), iBuilder->CreatePtrToInt(targetBlockPtr, intAddrTy));
408    iBuilder->CreateMemMove(iBuilder->CreatePointerCast(targetBlockPtr, int8PtrTy), iBuilder->CreatePointerCast(sourceBlockPtr, int8PtrTy), copyLength, alignment);
409    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyDone);
410    iBuilder->SetInsertPoint(partialBlockCopy);
411    Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth * swizzleFactor));
412    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
413    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
414        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
415        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
416        iBuilder->CreateMemMove(iBuilder->CreatePointerCast(strmTargetPtr, int8PtrTy), iBuilder->CreatePointerCast(strmSourcePtr, int8PtrTy), copyBytes, alignment);
417    }
418    iBuilder->CreateBr(copyDone);
419
420    iBuilder->SetInsertPoint(copyDone);
421}
422
423Value * SwizzledCopybackBuffer::getBlockAddress(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
424    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
425}
426
427Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
428    Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(iBuilder, self, fromPosition, reverse);
429    if (reverse) return writableProper;
430    return iBuilder->CreateAdd(writableProper, iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
431}
432
433void SwizzledCopybackBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProduced, Value * newProduced, const std::string Name) const {
434    assert (priorProduced->getType() == newProduced->getType());
435    Constant * bufSize = ConstantInt::get(priorProduced->getType(), mBufferBlocks * b->getBitBlockWidth());
436    Value * priorBufPos = b->CreateURem(priorProduced, bufSize);
437    Value * newBufPos = b->CreateURem(newProduced, bufSize);
438    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_swizzledCopyBack");
439    BasicBlock * done = b->CreateBasicBlock(Name + "_swizzledCopyBackDone");
440    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
441    b->CreateCondBr(wraparound, copyBack, done);
442    b->SetInsertPoint(copyBack);
443    Value * overFlowAreaPtr = b->CreateGEP(handle, b->getSize(mBufferBlocks));
444    createBlockAlignedCopy(b, handle, overFlowAreaPtr, newBufPos);
445    b->CreateBr(done);
446    b->SetInsertPoint(done);
447}
448
449// Expandable Buffer
450
451void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
452    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType());
453    Value * const capacityPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
454    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
455    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
456    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
457    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
458    const auto alignment = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
459    Value * const ptr = iBuilder->CreateAlignedMalloc(size, alignment);
460    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
461    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
462    iBuilder->CreateStore(iBuilder->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
463}
464
465std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
466
467    // ENTRY
468    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
469    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
470    Value * const streamSetPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
471    Value * const streamSet = iBuilder->CreateLoad(streamSetPtr);
472    blockIndex = modByBufferBlocks(iBuilder, blockIndex);
473
474    assert (streamIndex->getType() == capacity->getType());
475    Value * const cond = iBuilder->CreateICmpULT(streamIndex, capacity);
476
477    // Are we guaranteed that we can access this stream?
478    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
479        iBuilder->CreateAssert(cond, "ExpandableBuffer: out-of-bounds stream access");
480        Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, capacity), streamIndex);
481        return {streamSet, offset};
482    }
483
484    BasicBlock * const entry = iBuilder->GetInsertBlock();
485    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
486    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
487
488    iBuilder->CreateLikelyCondBr(cond, resume, expand);
489
490    // EXPAND
491    iBuilder->SetInsertPoint(expand);
492
493    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
494    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
495
496    Value * newCapacity = iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1));
497    newCapacity = iBuilder->CreateCeilLog2(newCapacity);
498    newCapacity = iBuilder->CreateShl(iBuilder->getSize(1), newCapacity, "newCapacity");
499
500    std::string tmp;
501    raw_string_ostream out(tmp);
502    out << "__expand";
503    elementType->print(out);
504    std::string name = out.str();
505
506    Module * const m = iBuilder->getModule();
507    Function * expandFunction = m->getFunction(name);
508
509    if (expandFunction == nullptr) {
510
511        const auto ip = iBuilder->saveIP();
512
513        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), iBuilder->getSizeTy(), iBuilder->getSizeTy()}, false);
514        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
515
516        auto args = expandFunction->arg_begin();
517        Value * streamSet = &*args++;
518        Value * capacity = &*args++;
519        Value * newCapacity = &*args;
520
521        BasicBlock * entry = BasicBlock::Create(iBuilder->getContext(), "entry", expandFunction);
522        iBuilder->SetInsertPoint(entry);
523
524        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
525        const auto memAlign = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
526
527        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
528        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
529
530        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
531        for (unsigned i = 0; i < mBufferBlocks; ++i) {
532            ConstantInt * const offset = iBuilder->getSize(i);
533            Value * srcOffset = iBuilder->CreateMul(capacity, offset);
534            Value * srcPtr = iBuilder->CreateGEP(streamSet, srcOffset);
535            Value * destOffset = iBuilder->CreateMul(newCapacity, offset);
536            Value * destPtr = iBuilder->CreateGEP(newStreamSet, destOffset);
537            iBuilder->CreateMemCpy(destPtr, srcPtr, iBuilder->CreateMul(capacity, vectorWidth), alignment);
538            Value * destZeroOffset = iBuilder->CreateAdd(destOffset, capacity);
539            Value * destZeroPtr = iBuilder->CreateGEP(newStreamSet, destZeroOffset);
540            iBuilder->CreateMemZero(destZeroPtr, diffCapacity, alignment);
541        }
542
543        iBuilder->CreateFree(streamSet);
544
545        iBuilder->CreateRet(newStreamSet);
546
547        iBuilder->restoreIP(ip);
548    }
549
550    Value * newStreamSet = iBuilder->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
551    iBuilder->CreateStore(newStreamSet, streamSetPtr);
552    iBuilder->CreateStore(newCapacity, capacityPtr);
553
554    iBuilder->CreateBr(resume);
555
556    // RESUME
557    iBuilder->SetInsertPoint(resume);
558
559    PHINode * phiStreamSet = iBuilder->CreatePHI(streamSet->getType(), 2);
560    phiStreamSet->addIncoming(streamSet, entry);
561    phiStreamSet->addIncoming(newStreamSet, expand);
562
563    PHINode * phiCapacity = iBuilder->CreatePHI(capacity->getType(), 2);
564    phiCapacity->addIncoming(capacity, entry);
565    phiCapacity->addIncoming(newCapacity, expand);
566
567    Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, phiCapacity), streamIndex);
568
569    return {phiStreamSet, offset};
570}
571
572Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
573    report_fatal_error("temporarily not supported");
574//    Value * ptr, * offset;
575//    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
576//    return iBuilder->CreateGEP(ptr, offset);
577}
578
579Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
580    report_fatal_error("temporarily not supported");
581//    Value * ptr, * offset;
582//    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
583//    return iBuilder->CreateGEP(ptr, {offset, packIndex});
584}
585
586Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
587    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
588}
589
590Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
591    iBuilder->CreateAssert(self, "ExpandableBuffer: instance cannot be null");
592    Value * const baseAddr = iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
593    iBuilder->CreateAssert(self, "ExpandableBuffer: base address cannot be 0");
594    return baseAddr;
595}
596
597void ExpandableBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
598    b->CreateFree(getBaseAddress(b.get(), mStreamSetBufferPtr));
599}
600
601Value * ExpandableBuffer::getBlockAddress(IDISA::IDISA_Builder * const iBuilder, Value *, Value *) const {
602    report_fatal_error("Expandable buffers: getBlockAddress is not supported.");
603}
604
605Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value *, bool) const {
606    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
607}
608
609
610Value * DynamicBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
611    b->CreateAssert(handle, "DynamicBuffer: instance cannot be null");
612    Value * const p = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
613    Value * const addr = b->CreateLoad(p);
614    b->CreateAssert(addr, "DynamicBuffer: base address cannot be 0");
615    return addr;
616}
617
618Value * DynamicBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * handle, Value * blockIndex) const {
619    Value * const wkgBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
620    assert (blockIndex->getType() == wkgBlocks->getType());
621    return b->CreateGEP(getBaseAddress(b, handle), b->CreateURem(blockIndex, wkgBlocks));
622}
623
624Value * DynamicBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * handle, Value * absolutePosition) const {
625    Constant * blockSize = ConstantInt::get(absolutePosition->getType(), b->getBitBlockWidth());
626    Value * absBlock = b->CreateUDiv(absolutePosition, blockSize);
627    Value * blockPos = b->CreateURem(absolutePosition, blockSize);
628    Value * blockPtr = getBlockAddress(b, handle, absBlock);
629    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
630    if (bw < 8) {
631        assert (bw  == 1 || bw == 2 || bw == 4);
632        blockPos = b->CreateUDiv(blockPos, ConstantInt::get(blockPos->getType(), 8 / bw));
633        blockPtr = b->CreatePointerCast(blockPtr, b->getInt8PtrTy());
634    } else {
635        blockPtr = b->CreatePointerCast(blockPtr, b->getIntNTy(bw)->getPointerTo());
636    }
637    return b->CreateGEP(blockPtr, blockPos);
638}
639
640
641Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * handle, Value * fromPosition, Value * availItems, bool reverse) const {
642    Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
643    Constant * blockSize = ConstantInt::get(bufBlocks->getType(), b->getBitBlockWidth());
644    Value * bufSize = b->CreateMul(bufBlocks, blockSize);
645    assert (bufSize->getType() == fromPosition->getType());
646    Value * itemsFromBase = b->CreateURem(fromPosition, bufSize);
647    if (reverse) {
648        Value * bufAvail = b->CreateSelect(b->CreateICmpEQ(itemsFromBase, b->getSize(0)), bufSize, itemsFromBase);
649        return b->CreateSelect(b->CreateICmpULT(bufAvail, availItems), bufAvail, availItems);
650    } else {
651        Value * linearSpace = b->CreateSub(bufSize, itemsFromBase, "linearSpace");
652        return b->CreateSelect(b->CreateICmpULT(availItems, linearSpace), availItems, linearSpace);
653    }
654}
655
656Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * handle, Value * fromPosition, bool reverse) const {   
657    Value * bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
658    Constant * blockSize = ConstantInt::get(bufBlocks->getType(), b->getBitBlockWidth());
659    Value * bufSize = b->CreateMul(bufBlocks, blockSize);
660    assert (bufSize->getType() == fromPosition->getType());
661    Value * bufRem = b->CreateURem(fromPosition, bufSize);
662    if (reverse) {
663        return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufSize, bufRem);
664    }
665    Constant * overflow = ConstantInt::get(bufBlocks->getType(), mOverflowBlocks);
666    bufSize = b->CreateMul(b->CreateAdd(bufBlocks, overflow), blockSize);
667    return b->CreateSub(bufSize, bufRem, "linearWritable");
668}
669
670Value * DynamicBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
671    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(Field::WorkingBlocks))});
672    return iBuilder->CreateMul(iBuilder->CreateLoad(ptr), iBuilder->getSize(iBuilder->getBitBlockWidth()));
673}
674
675void DynamicBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProducedCount, Value * newProducedCount, const std::string Name) const {
676    assert (priorProducedCount->getType() == newProducedCount->getType());   
677    Value * workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
678    assert (workingBlocks->getType() == newProducedCount->getType());
679    Value * bufSize = b->CreateMul(workingBlocks, ConstantInt::get(workingBlocks->getType(), b->getBitBlockWidth()));
680    Value * priorBufPos = b->CreateURem(priorProducedCount, bufSize);
681    Value * newBufPos = b->CreateURem(newProducedCount, bufSize);
682    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_dynamicCopyBack");
683    BasicBlock * done = b->CreateBasicBlock(Name + "_dynamicCopyBackDone");
684
685    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
686    b->CreateCondBr(wraparound, copyBack, done);
687
688    b->SetInsertPoint(copyBack);
689    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
690    Value * bufBasePtr = b->CreateLoad(bufBasePtrField);
691    Value * overFlowAreaPtr = b->CreateGEP(bufBasePtr, workingBlocks);
692    createBlockAlignedCopy(b, bufBasePtr, overFlowAreaPtr, newBufPos);
693    b->CreateBr(done);
694
695    b->SetInsertPoint(done);
696}
697
698void DynamicBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
699    Value * handle = b->CreateCacheAlignedAlloca(mBufferStructType);
700    size_t numStreams = 1;
701    if (isa<ArrayType>(mBaseType)) {
702        numStreams = mBaseType->getArrayNumElements();
703    }
704    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
705    Value * bufSize = b->getSize((mBufferBlocks + mOverflowBlocks) * b->getBitBlockWidth() * numStreams * fieldWidth/8);
706    bufSize = b->CreateRoundUp(bufSize, b->getSize(b->getCacheAlignment()));
707    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::BaseAddress))});
708    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
709    Value * bufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufSize), bufPtrType);
710    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
711        b->CallPrintInt("allocated: ", bufPtr);
712        b->CallPrintInt("allocated capacity: ", bufSize);
713    }
714    b->CreateStore(bufPtr, bufBasePtrField);
715    b->CreateStore(ConstantPointerNull::getNullValue(bufPtrType), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))}));
716    b->CreateStore(bufSize, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::AllocatedCapacity))}));
717    b->CreateStore(b->getSize(mBufferBlocks), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
718    b->CreateStore(b->getSize(-1), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::Length))}));
719    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ProducedPosition))}));
720    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ConsumedPosition))}));
721    mStreamSetBufferPtr = handle;
722}
723
724void DynamicBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
725    Value * handle = mStreamSetBufferPtr;
726    /* Free the dynamically allocated buffer, but not the stack-allocated buffer struct. */
727    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
728    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
729    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
730    BasicBlock * freePrior = b->CreateBasicBlock("freePrior");
731    BasicBlock * freeCurrent = b->CreateBasicBlock("freeCurrent");
732    Value * priorBuf = b->CreateLoad(priorBasePtrField);
733    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
734    b->CreateCondBr(priorBufIsNonNull, freePrior, freeCurrent);
735    b->SetInsertPoint(freePrior);
736    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
737        b->CallPrintInt("releasing: ", priorBuf);
738    }
739    b->CreateFree(priorBuf);
740    b->CreateBr(freeCurrent);
741    b->SetInsertPoint(freeCurrent);
742    b->CreateFree(b->CreateLoad(bufBasePtrField));
743}
744
745//
746//  Simple capacity doubling.  Use the circular buffer property: duplicating buffer data
747//  ensures that we have correct data.   TODO: consider optimizing based on actual
748//  consumer and producer positions.
749//
750void DynamicBuffer::doubleCapacity(IDISA::IDISA_Builder * const b, Value * handle) {
751    size_t numStreams = 1;
752    if (isa<ArrayType>(mBaseType)) {
753        numStreams = mBaseType->getArrayNumElements();
754    }
755    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
756    Constant * blockBytes = b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8);
757    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
758    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
759    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
760    Value * workingBlocksField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))});
761    Value * capacityField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::AllocatedCapacity))});
762
763    Value * oldBufPtr = b->CreateLoad(bufBasePtrField);
764    Value * currentWorkingBlocks = b->CreateLoad(workingBlocksField);
765    Value * workingBytes = b->CreateMul(currentWorkingBlocks, blockBytes);
766    Value * const curAllocated = b->CreateLoad(capacityField);
767    Value * neededCapacity = b->CreateAdd(workingBytes, workingBytes);
768    if (mOverflowBlocks > 0) {
769        Constant * overflowBytes = b->getSize(mOverflowBlocks * b->getBitBlockWidth() * numStreams * fieldWidth/8);
770        neededCapacity = b->CreateAdd(neededCapacity, overflowBytes);
771    }
772    neededCapacity = b->CreateRoundUp(neededCapacity, b->getSize(b->getCacheAlignment()));
773    BasicBlock * doubleEntry = b->GetInsertBlock();
774    BasicBlock * doRealloc = b->CreateBasicBlock("doRealloc");
775    BasicBlock * doCopy2 = b->CreateBasicBlock("doCopy2");
776    b->CreateCondBr(b->CreateICmpULT(curAllocated, neededCapacity), doRealloc, doCopy2);
777    b->SetInsertPoint(doRealloc);
778    // If there is a non-null priorBasePtr, free it.
779    Value * priorBuf = b->CreateLoad(priorBasePtrField);
780    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
781    BasicBlock * deallocatePrior = b->CreateBasicBlock("deallocatePrior");
782    BasicBlock * allocateNew = b->CreateBasicBlock("allocateNew");
783    b->CreateCondBr(priorBufIsNonNull, deallocatePrior, allocateNew);
784    b->SetInsertPoint(deallocatePrior);
785    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
786        b->CallPrintInt("deallocating: ", priorBuf);
787    }
788    b->CreateFree(priorBuf);
789    b->CreateBr(allocateNew);
790    b->SetInsertPoint(allocateNew);
791    b->CreateStore(oldBufPtr, priorBasePtrField);
792    Value * newBufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(neededCapacity), bufPtrType);
793    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
794        b->CallPrintInt("re-allocated: ", newBufPtr);
795        b->CallPrintInt("allocated capacity: ", neededCapacity);
796    }
797    b->CreateStore(newBufPtr, bufBasePtrField);
798    createBlockCopy(b, newBufPtr, oldBufPtr, currentWorkingBlocks);
799    b->CreateStore(neededCapacity, capacityField);
800    b->CreateBr(doCopy2);
801    b->SetInsertPoint(doCopy2);
802    PHINode * bufPtr = b->CreatePHI(oldBufPtr->getType(), 2);
803    bufPtr->addIncoming(oldBufPtr, doubleEntry);
804    bufPtr->addIncoming(newBufPtr, allocateNew);
805    createBlockCopy(b, b->CreateGEP(bufPtr, currentWorkingBlocks), bufPtr, currentWorkingBlocks);
806    currentWorkingBlocks = b->CreateAdd(currentWorkingBlocks, currentWorkingBlocks);
807    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
808        b->CallPrintInt("currentWorkingBlocks: ", currentWorkingBlocks);
809    }
810    b->CreateStore(currentWorkingBlocks, workingBlocksField);
811}
812
813SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
814: StreamSetBuffer(BufferKind::SourceBuffer, type, StructType::get(b->getContext(), {resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), b->getSizeTy()}), 0, StructAddressSpace) {
815    mUniqueID = "B";
816    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
817        mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
818    }
819}
820
821ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, llvm::Value * addr, unsigned AddressSpace)
822: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, AddressSpace) {
823    mUniqueID = "E";
824    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
825    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
826}
827
828CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
829: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
830    mUniqueID = "C" + std::to_string(bufferBlocks);
831    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
832}
833
834CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
835: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
836
837}
838
839CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
840: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, AddressSpace)
841, mOverflowBlocks(overflowBlocks) {
842    if (bufferBlocks < 2 * overflowBlocks) {
843        report_fatal_error("CircularCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
844    }
845    mUniqueID = "CC" + std::to_string(bufferBlocks);
846    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
847    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
848}
849
850ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
851: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
852, mInitialCapacity(type->getArrayNumElements()) {
853    mUniqueID = "XP" + std::to_string(bufferBlocks);
854    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
855}
856
857SwizzledCopybackBuffer::SwizzledCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
858: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
859    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
860    if (bufferBlocks < 2 * overflowBlocks) {
861        report_fatal_error("SwizzledCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
862    }
863    if (mOverflowBlocks != 1) {
864        mUniqueID += "_" + std::to_string(mOverflowBlocks);
865    }
866    if (AddressSpace > 0) {
867        mUniqueID += "@" + std::to_string(AddressSpace);
868    }
869}
870
871inline StructType * getDynamicBufferStructType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * baseType, const unsigned addrSpace) {
872    IntegerType * sizeTy = b->getSizeTy();
873    PointerType * typePtr = baseType->getPointerTo(addrSpace);
874    return StructType::get(b->getContext(), {typePtr, typePtr, sizeTy, sizeTy, sizeTy, sizeTy, sizeTy});
875}
876
877DynamicBuffer::DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t initialCapacity, size_t overflow, unsigned swizzle, unsigned addrSpace)
878: StreamSetBuffer(BufferKind::DynamicBuffer, type, resolveStreamSetType(b, type), initialCapacity, addrSpace)
879, mBufferStructType(getDynamicBufferStructType(b, mType, addrSpace))
880, mSwizzleFactor(swizzle)
881, mOverflowBlocks(overflow)
882{
883    if (initialCapacity * b->getBitBlockWidth() < 2 * overflow) {
884        report_fatal_error("DynamicBuffer: initialCapacity * b->getBitBlockWidth() < 2 * overflow");
885    }
886    mUniqueID = "DB";
887    if (swizzle != 1) {
888        mUniqueID += "s" + std::to_string(swizzle);
889    }
890        if (overflow != 0) {
891        mUniqueID += "o" + std::to_string(overflow);
892    }
893    if (addrSpace != 0) {
894        mUniqueID += "@" + std::to_string(addrSpace);
895    }
896}
897
898
899inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned AddressSpace)
900: mBufferKind(k)
901, mType(resolvedType)
902, mBufferBlocks(BufferBlocks)
903, mAddressSpace(AddressSpace)
904, mStreamSetBufferPtr(nullptr)
905, mBaseType(baseType)
906, mProducer(nullptr) {
907    assert(k == BufferKind::SourceBuffer || k == BufferKind::ExternalBuffer || BufferBlocks);
908}
909
910StreamSetBuffer::~StreamSetBuffer() { }
911
912// Helper routines
913ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
914    unsigned numElements = 1;
915    if (LLVM_LIKELY(type->isArrayTy())) {
916        numElements = type->getArrayNumElements();
917        type = type->getArrayElementType();
918    }
919    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
920        type = type->getVectorElementType();
921        if (LLVM_LIKELY(type->isIntegerTy())) {
922            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
923            type = b->getBitBlockType();
924            if (fieldWidth != 1) {
925                type = ArrayType::get(type, fieldWidth);
926            }
927            return ArrayType::get(type, numElements);
928        }
929    }
930    std::string tmp;
931    raw_string_ostream out(tmp);
932    type->print(out);
933    out << " is an unvalid stream set buffer type.";
934    report_fatal_error(out.str());
935}
936
937StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
938    if (LLVM_LIKELY(type->isArrayTy())) {
939        type = type->getArrayElementType();
940    }
941    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
942        type = type->getVectorElementType();
943        if (LLVM_LIKELY(type->isIntegerTy())) {
944            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
945            type = b->getBitBlockType();
946            if (fieldWidth != 1) {
947                type = ArrayType::get(type, fieldWidth);
948            }
949            return StructType::get(b->getContext(), {b->getSizeTy(), type->getPointerTo()});
950        }
951    }
952    std::string tmp;
953    raw_string_ostream out(tmp);
954    type->print(out);
955    out << " is an unvalid stream set buffer type.";
956    report_fatal_error(out.str());
957}
Note: See TracBrowser for help on using the repository browser.