source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5646

Last change on this file since 5646 was 5646, checked in by nmedfort, 22 months ago

Minor clean up. Bug fix for object cache when the same cached kernel is used twice in a single run. Improvement to RE Minimizer.

File size: 50.3 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <llvm/IR/Module.h>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/kernel.h>
10#include <kernels/kernel_builder.h>
11#include <toolchain/toolchain.h>
12#include <llvm/Support/Debug.h>
13#include <llvm/Support/Format.h>
14
15namespace llvm { class Constant; }
16namespace llvm { class Function; }
17
18using namespace parabix;
19using namespace llvm;
20using namespace IDISA;
21
22
23Type * StreamSetBuffer::getStreamSetBlockType() const { return mType;}
24
25ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
26
27StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
28
29void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
30    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
31        Type * const ty = getType();
32        if (mAddressSpace == 0) {
33            Constant * size = ConstantExpr::getSizeOf(ty);
34            size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks));
35            mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
36        } else {
37            mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
38        }
39        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
40    } else {
41        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
42    }
43}
44
45void StreamSetBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const {
46    if (mAddressSpace == 0) {
47        iBuilder->CreateFree(mStreamSetBufferPtr);
48    }
49}
50
51Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
52    if (codegen::EnableAsserts) {
53        Value * const count = getStreamSetCount(iBuilder, self);
54        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
55        Value * const cond = iBuilder->CreateICmpULT(index, count);
56        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
57    }
58    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex});
59}
60
61Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
62    if (codegen::EnableAsserts) {
63        Value * const count = getStreamSetCount(iBuilder, self);
64        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
65        Value * const cond = iBuilder->CreateICmpULT(index, count);
66        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
67    }
68    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
69}
70
71void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, Value * /* addr */) const {
72    report_fatal_error("setBaseAddress is not supported by this buffer type");
73}
74
75Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
76    report_fatal_error("getBufferedSize is not supported by this buffer type");
77}
78
79void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* size */) const {
80    report_fatal_error("setBufferedSize is not supported by this buffer type");
81}
82
83Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
84    report_fatal_error("getCapacity is not supported by this buffer type");
85}
86
87void StreamSetBuffer::setCapacity(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* c */) const {
88    report_fatal_error("setCapacity is not supported by this buffer type");
89}
90
91inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
92    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
93        if (LLVM_LIKELY(cast<ConstantInt>(index)->getLimitedValue() < capacity)) {
94            return true;
95        }
96    }
97    return false;
98}
99
100Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value *) const {
101    size_t count = 1;
102    if (isa<ArrayType>(mBaseType)) {
103        count = mBaseType->getArrayNumElements();
104    }
105    return iBuilder->getSize(count);
106}
107
108inline Value * StreamSetBuffer::modByBufferBlocks(IDISA::IDISA_Builder * const iBuilder, Value * const offset) const {
109    assert (offset->getType()->isIntegerTy());
110    if (isCapacityGuaranteed(offset, mBufferBlocks)) {
111        return offset;
112    } else if (mBufferBlocks == 1) {
113        return ConstantInt::getNullValue(iBuilder->getSizeTy());
114    } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
115        return iBuilder->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
116    } else {
117        return iBuilder->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
118    }
119}
120
121/**
122 * @brief getRawItemPointer
123 *
124 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
125 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
126 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
127 */
128Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
129    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
130    Value * relativePosition = absolutePosition;
131    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
132    if (bw < 8) {
133        assert (bw  == 1 || bw == 2 || bw == 4);
134        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
135        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
136    } else {
137        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
138    }
139    return iBuilder->CreateGEP(ptr, relativePosition);
140}
141
142Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse, const unsigned lookAhead) const {
143    Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
144    Value * bufRem = iBuilder->CreateURem(fromPosition, bufSize);
145    if (reverse) {
146        return iBuilder->CreateSelect(iBuilder->CreateICmpEQ(bufRem, iBuilder->getSize(0)), bufSize, bufRem);
147    }
148    else return iBuilder->CreateSub(bufSize, bufRem, "linearItems");
149}
150
151Value * StreamSetBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse, const unsigned lookAhead) const {
152    Constant * bufBlocks = iBuilder->getSize(mBufferBlocks);
153    Value * bufRem = iBuilder->CreateURem(fromBlock, bufBlocks);
154    if (reverse) {
155        return iBuilder->CreateSelect(iBuilder->CreateICmpEQ(bufRem, iBuilder->getSize(0)), bufBlocks, bufRem);
156    }
157    else return iBuilder->CreateSub(bufBlocks, bufRem, "linearBlocks");
158}
159
160Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
161    return getLinearlyAccessibleItems(iBuilder, self, fromPosition, reverse);
162}
163
164Value * StreamSetBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
165    return getLinearlyAccessibleBlocks(iBuilder, self, fromBlock, reverse);
166}
167
168Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
169    iBuilder->CreateAssert(self, "StreamSetBuffer base address cannot be 0");
170    return self;
171}
172
173void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
174    Type * i8ptr = iBuilder->getInt8PtrTy();
175    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
176    size_t numStreams = 1;
177    if (isa<ArrayType>(mBaseType)) {
178        numStreams = mBaseType->getArrayNumElements();
179    }
180    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
181    Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
182    iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
183}
184
185void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
186    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
187    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
188    Constant * const blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
189    size_t numStreams = 1;
190    if (isa<ArrayType>(mBaseType)) {
191        numStreams = mBaseType->getArrayNumElements();
192    }
193    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
194    if (numStreams == 1) {
195        Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth));
196        Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
197        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), copyBytes, alignment);
198    } else {
199        Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
200        Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
201        Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
202        Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
203        Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
204        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), blockCopyBytes, alignment);
205        Value * partialCopyBitsPerStream = iBuilder->CreateMul(partialItems, iBuilder->getSize(fieldWidth));
206        Value * partialCopyBytesPerStream = iBuilder->CreateLShr(iBuilder->CreateAdd(partialCopyBitsPerStream, iBuilder->getSize(7)), iBuilder->getSize(3));
207        for (unsigned strm = 0; strm < numStreams; strm++) {
208            Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
209            Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
210            strmTargetPtr = iBuilder->CreateBitCast(strmTargetPtr, int8PtrTy);
211            strmSourcePtr = iBuilder->CreateBitCast(strmSourcePtr, int8PtrTy);
212            iBuilder->CreateMemMove(strmTargetPtr, strmSourcePtr, partialCopyBytesPerStream, alignment);
213        }
214    }
215}
216
217void StreamSetBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProduced, Value * newProduced, const std::string Name) {
218    report_fatal_error("Copy back not supported for this buffer type:" + Name);
219}
220
221// Source File Buffer
222
223Type * SourceBuffer::getStreamSetBlockType() const {
224    return cast<PointerType>(mType->getStructElementType(int(SourceBuffer::Field::BaseAddress)))->getElementType();
225}
226
227
228Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
229    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
230    return iBuilder->CreateLoad(ptr);
231}
232
233void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * size) const {
234    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
235    iBuilder->CreateStore(size, ptr);
236}
237
238Value * SourceBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
239    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
240    return iBuilder->CreateLoad(ptr);
241}
242
243void SourceBuffer::setCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * c) const {
244    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
245    iBuilder->CreateStore(c, ptr);
246}
247
248void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * addr) const {
249    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
250
251    iBuilder->CreateStore(iBuilder->CreatePointerCast(addr, ptr->getType()->getPointerElementType()), ptr);
252}
253
254Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * const self) const {
255    iBuilder->CreateAssert(self, "SourceBuffer: instance cannot be null");
256    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
257    Value * const addr = iBuilder->CreateLoad(ptr);
258    iBuilder->CreateAssert(addr, "SourceBuffer: base address cannot be 0");
259    return addr;
260}
261
262Value * SourceBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
263    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
264}
265
266Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse, const unsigned lookAhead) const {
267    if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
268    return iBuilder->CreateSub(getCapacity(iBuilder, self), fromPosition);
269}
270
271Value * SourceBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse, const unsigned lookAhead) const {
272    if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
273    return iBuilder->CreateSub(iBuilder->CreateUDiv(getCapacity(iBuilder, self), iBuilder->getSize(iBuilder->getBitBlockWidth())), fromBlock);
274}
275
276void SourceBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
277    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
278        Type * const ty = getType();
279        mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
280        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
281    } else {
282        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
283    }
284}
285
286void SourceBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const {
287
288}
289
290// External File Buffer
291void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
292    report_fatal_error("External buffers cannot be allocated.");
293}
294
295void ExternalBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> &) const {
296
297}
298
299Value * ExternalBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
300    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
301}
302
303Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, bool, const unsigned) const {
304    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
305}
306
307// Circular Buffer
308Value * CircularBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * const self, Value * const blockIndex) const {
309    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
310}
311
312Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
313    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
314    Value * relativePosition = iBuilder->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * iBuilder->getBitBlockWidth()));
315    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
316    if (bw < 8) {
317        assert (bw  == 1 || bw == 2 || bw == 4);
318        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
319        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
320    } else {
321        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
322    }
323    return iBuilder->CreateGEP(ptr, relativePosition);
324}
325
326// CircularCopybackBuffer Buffer
327void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
328    Type * const ty = getType();
329    Constant * size = ConstantExpr::getSizeOf(ty);
330    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
331    mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
332}
333
334Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
335    Value * accessibleItems = getLinearlyAccessibleItems(iBuilder, self, fromPosition, reverse);
336    if (reverse) return accessibleItems;
337    return iBuilder->CreateAdd(accessibleItems, iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
338}
339
340Value * CircularCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
341    Value * accessibleBlocks = getLinearlyAccessibleBlocks(iBuilder, self, fromBlock);
342    if (reverse) return accessibleBlocks;
343    return iBuilder->CreateAdd(accessibleBlocks, iBuilder->getSize(mOverflowBlocks));
344}
345
346void CircularCopybackBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProduced, Value * newProduced, const std::string Name) {
347    Constant * bufSize = b->getSize(mBufferBlocks * b->getBitBlockWidth());
348    Value * priorBufPos = b->CreateURem(priorProduced, bufSize);
349    Value * newBufPos = b->CreateURem(newProduced, bufSize);
350    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_copyBack");
351    BasicBlock * done = b->CreateBasicBlock(Name + "_copyBackDone");
352    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
353    b->CreateCondBr(wraparound, copyBack, done);
354    b->SetInsertPoint(copyBack);
355    Value * overFlowAreaPtr = b->CreateGEP(handle, b->getSize(mBufferBlocks));
356    createBlockAlignedCopy(b, handle, overFlowAreaPtr, newBufPos);
357    b->CreateBr(done);
358    b->SetInsertPoint(done);
359}
360
361
362// SwizzledCopybackBuffer Buffer
363
364void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
365    Type * const ty = getType();
366    Constant * size = ConstantExpr::getSizeOf(ty);
367    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
368    mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
369}
370
371void SwizzledCopybackBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
372    Type * int8PtrTy = iBuilder->getInt8PtrTy();
373    DataLayout DL(iBuilder->getModule());
374    IntegerType * const intAddrTy = iBuilder->getIntPtrTy(DL);
375
376    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
377    Function * f = iBuilder->GetInsertBlock()->getParent();
378    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
379    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
380    BasicBlock * copyDone = BasicBlock::Create(iBuilder->getContext(), "copyDone", f, 0);
381    const unsigned numStreams = getType()->getArrayNumElements();
382    const unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
383    const auto elemTy = getType()->getArrayElementType();
384    const unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
385    Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
386    Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
387    Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
388    Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
389    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(blocksToCopy, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
390
391    iBuilder->SetInsertPoint(wholeBlockCopy);
392    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
393    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, intAddrTy), iBuilder->CreatePtrToInt(targetBlockPtr, intAddrTy));
394    iBuilder->CreateMemMove(iBuilder->CreatePointerCast(targetBlockPtr, int8PtrTy), iBuilder->CreatePointerCast(sourceBlockPtr, int8PtrTy), copyLength, alignment);
395    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyDone);
396    iBuilder->SetInsertPoint(partialBlockCopy);
397    Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth * swizzleFactor));
398    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
399    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
400        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
401        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
402        iBuilder->CreateMemMove(iBuilder->CreatePointerCast(strmTargetPtr, int8PtrTy), iBuilder->CreatePointerCast(strmSourcePtr, int8PtrTy), copyBytes, alignment);
403    }
404    iBuilder->CreateBr(copyDone);
405
406    iBuilder->SetInsertPoint(copyDone);
407}
408
409Value * SwizzledCopybackBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
410    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
411}
412
413Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
414    Value * accessibleItems = getLinearlyAccessibleItems(iBuilder, self, fromPosition, reverse);
415    if (reverse) return accessibleItems;
416    return iBuilder->CreateAdd(accessibleItems, iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
417}
418
419Value * SwizzledCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
420    Value * accessibleBlocks = getLinearlyAccessibleBlocks(iBuilder, self, fromBlock);
421    if (reverse) return accessibleBlocks;
422    return iBuilder->CreateAdd(accessibleBlocks, iBuilder->getSize(mOverflowBlocks));
423}
424void SwizzledCopybackBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProduced, Value * newProduced, const std::string Name) {
425    Constant * bufSize = b->getSize(mBufferBlocks * b->getBitBlockWidth());
426    Value * priorBufPos = b->CreateURem(priorProduced, bufSize);
427    Value * newBufPos = b->CreateURem(newProduced, bufSize);
428    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_copyBack");
429    BasicBlock * done = b->CreateBasicBlock(Name + "_copyBackDone");
430    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
431    b->CreateCondBr(wraparound, copyBack, done);
432    b->SetInsertPoint(copyBack);
433    Value * overFlowAreaPtr = b->CreateGEP(handle, b->getSize(mBufferBlocks));
434    createBlockAlignedCopy(b, handle, overFlowAreaPtr, newBufPos);
435    b->CreateBr(done);
436    b->SetInsertPoint(done);
437}
438
439// Expandable Buffer
440
441void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
442    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType());
443    Value * const capacityPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
444    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
445    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
446    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
447    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
448    const auto alignment = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
449    Value * const ptr = iBuilder->CreateAlignedMalloc(size, alignment);
450    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
451    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
452    iBuilder->CreateStore(iBuilder->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
453}
454
455std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
456
457    // ENTRY
458    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
459    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
460    Value * const streamSetPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
461    Value * const streamSet = iBuilder->CreateLoad(streamSetPtr);
462    blockIndex = modByBufferBlocks(iBuilder, blockIndex);
463
464    assert (streamIndex->getType() == capacity->getType());
465    Value * const cond = iBuilder->CreateICmpULT(streamIndex, capacity);
466
467    // Are we guaranteed that we can access this stream?
468    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
469        iBuilder->CreateAssert(cond, "ExpandableBuffer: out-of-bounds stream access");
470        Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, capacity), streamIndex);
471        return {streamSet, offset};
472    }
473
474    BasicBlock * const entry = iBuilder->GetInsertBlock();
475    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
476    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
477
478    iBuilder->CreateLikelyCondBr(cond, resume, expand);
479
480    // EXPAND
481    iBuilder->SetInsertPoint(expand);
482
483    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
484    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
485
486    Value * newCapacity = iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1));
487    newCapacity = iBuilder->CreateCeilLog2(newCapacity);
488    newCapacity = iBuilder->CreateShl(iBuilder->getSize(1), newCapacity, "newCapacity");
489
490    std::string tmp;
491    raw_string_ostream out(tmp);
492    out << "__expand";
493    elementType->print(out);
494    std::string name = out.str();
495
496    Module * const m = iBuilder->getModule();
497    Function * expandFunction = m->getFunction(name);
498
499    if (expandFunction == nullptr) {
500
501        const auto ip = iBuilder->saveIP();
502
503        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), iBuilder->getSizeTy(), iBuilder->getSizeTy()}, false);
504        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
505
506        auto args = expandFunction->arg_begin();
507        Value * streamSet = &*args++;
508        Value * capacity = &*args++;
509        Value * newCapacity = &*args;
510
511        BasicBlock * entry = BasicBlock::Create(iBuilder->getContext(), "entry", expandFunction);
512        iBuilder->SetInsertPoint(entry);
513
514        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
515        const auto memAlign = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
516
517        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
518        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
519
520        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
521        for (unsigned i = 0; i < mBufferBlocks; ++i) {
522            ConstantInt * const offset = iBuilder->getSize(i);
523            Value * srcOffset = iBuilder->CreateMul(capacity, offset);
524            Value * srcPtr = iBuilder->CreateGEP(streamSet, srcOffset);
525            Value * destOffset = iBuilder->CreateMul(newCapacity, offset);
526            Value * destPtr = iBuilder->CreateGEP(newStreamSet, destOffset);
527            iBuilder->CreateMemCpy(destPtr, srcPtr, iBuilder->CreateMul(capacity, vectorWidth), alignment);
528            Value * destZeroOffset = iBuilder->CreateAdd(destOffset, capacity);
529            Value * destZeroPtr = iBuilder->CreateGEP(newStreamSet, destZeroOffset);
530            iBuilder->CreateMemZero(destZeroPtr, diffCapacity, alignment);
531        }
532
533        iBuilder->CreateFree(streamSet);
534
535        iBuilder->CreateRet(newStreamSet);
536
537        iBuilder->restoreIP(ip);
538    }
539
540    Value * newStreamSet = iBuilder->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
541    iBuilder->CreateStore(newStreamSet, streamSetPtr);
542    iBuilder->CreateStore(newCapacity, capacityPtr);
543
544    iBuilder->CreateBr(resume);
545
546    // RESUME
547    iBuilder->SetInsertPoint(resume);
548
549    PHINode * phiStreamSet = iBuilder->CreatePHI(streamSet->getType(), 2);
550    phiStreamSet->addIncoming(streamSet, entry);
551    phiStreamSet->addIncoming(newStreamSet, expand);
552
553    PHINode * phiCapacity = iBuilder->CreatePHI(capacity->getType(), 2);
554    phiCapacity->addIncoming(capacity, entry);
555    phiCapacity->addIncoming(newCapacity, expand);
556
557    Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, phiCapacity), streamIndex);
558
559    return {phiStreamSet, offset};
560}
561
562Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
563    Value * ptr, * offset;
564    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
565    return iBuilder->CreateGEP(ptr, offset);
566}
567
568Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
569    Value * ptr, * offset;
570    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
571    return iBuilder->CreateGEP(ptr, {offset, packIndex});
572}
573
574Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
575    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
576}
577
578Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
579    iBuilder->CreateAssert(self, "ExpandableBuffer: instance cannot be null");
580    Value * const baseAddr = iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
581    iBuilder->CreateAssert(self, "ExpandableBuffer: base address cannot be 0");
582    return baseAddr;
583}
584
585void ExpandableBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
586    b->CreateFree(getBaseAddress(b.get(), mStreamSetBufferPtr));
587}
588
589Value * ExpandableBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value *, Value *) const {
590    report_fatal_error("Expandable buffers: getStreamSetBlockPtr is not supported.");
591}
592
593Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, bool, const unsigned) const {
594    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
595}
596
597SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
598: StreamSetBuffer(BufferKind::SourceBuffer, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), b->getSizeTy(), nullptr), 0, StructAddressSpace) {
599    mUniqueID = "B";
600    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
601        mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
602    }
603}
604
605ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, llvm::Value * addr, unsigned AddressSpace)
606: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, AddressSpace) {
607    mUniqueID = "E";
608    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
609    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
610}
611
612CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
613: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
614    mUniqueID = "C" + std::to_string(bufferBlocks);
615    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
616}
617
618CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
619: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
620
621}
622
623CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
624: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, AddressSpace)
625, mOverflowBlocks(overflowBlocks) {
626    if (bufferBlocks < 2 * overflowBlocks) {
627        report_fatal_error("CircularCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
628    }
629    mUniqueID = "CC" + std::to_string(bufferBlocks);
630    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
631    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
632}
633
634ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
635: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
636, mInitialCapacity(type->getArrayNumElements()) {
637    mUniqueID = "XP" + std::to_string(bufferBlocks);
638    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
639}
640
641SwizzledCopybackBuffer::SwizzledCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
642: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
643    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
644    if (bufferBlocks < 2 * overflowBlocks) {
645        report_fatal_error("SwizzledCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
646    }
647    if (mOverflowBlocks != 1) {
648        mUniqueID += "_" + std::to_string(mOverflowBlocks);
649    }
650    if (AddressSpace > 0) {
651        mUniqueID += "@" + std::to_string(AddressSpace);
652    }
653}
654
655Value * DynamicBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
656    b->CreateAssert(handle, "DynamicBuffer: instance cannot be null");
657    Value * const p = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
658    Value * const addr = b->CreateLoad(p);
659    b->CreateAssert(addr, "DynamicBuffer: base address cannot be 0");
660    return addr;
661}
662
663Value * DynamicBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const b, Value * handle, Value * blockIndex) const {
664    Value * const wkgBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
665    return b->CreateGEP(getBaseAddress(b, handle), b->CreateURem(blockIndex, wkgBlocks));
666}
667
668Value * DynamicBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * handle, Value * streamIndex, Value * absolutePosition) const {
669    Value * absBlock = b->CreateUDiv(absolutePosition, b->getSize(b->getBitBlockWidth()));
670    Value * blockPos = b->CreateURem(absolutePosition, b->getSize(b->getBitBlockWidth()));
671    Value * blockPtr = b->CreateGEP(getStreamSetBlockPtr(b, handle, absBlock), {b->getInt32(0), streamIndex});
672    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
673    if (bw < 8) {
674        assert (bw  == 1 || bw == 2 || bw == 4);
675        blockPos = b->CreateUDiv(blockPos, ConstantInt::get(blockPos->getType(), 8 / bw));
676        blockPtr = b->CreatePointerCast(blockPtr, b->getInt8PtrTy());
677    } else {
678        blockPtr = b->CreatePointerCast(blockPtr, b->getIntNTy(bw)->getPointerTo());
679    }
680    return b->CreateGEP(blockPtr, blockPos);
681}
682
683
684Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * handle, Value * fromPosition, bool reverse, const unsigned lookAhead) const {
685    Constant * blockSize = b->getSize(b->getBitBlockWidth());
686    Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
687    Value * bufSize = b->CreateMul(bufBlocks, blockSize);
688    Value * bufRem = b->CreateURem(fromPosition, bufSize);
689    if (reverse) {
690        return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufSize, bufRem);
691    }
692    return b->CreateSub(bufSize, bufRem, "linearItems");
693}
694
695Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * handle, Value * fromPosition, bool reverse) const {
696    Value * accessibleItems = getLinearlyAccessibleItems(b, handle, fromPosition, reverse);
697    if (reverse || (mOverflowBlocks == 0))  return accessibleItems;
698    return b->CreateAdd(accessibleItems, b->getSize(mOverflowBlocks * b->getBitBlockWidth()));
699}
700
701Value * DynamicBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const b, Value * handle, Value * fromBlock, bool reverse, const unsigned lookAhead) const {
702    Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
703    Value * bufRem = b->CreateURem(fromBlock, bufBlocks);
704    if (reverse) {
705        return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufBlocks, bufRem);
706    }
707    return b->CreateSub(bufBlocks, bufRem, "linearBlocks");
708}
709
710Value * DynamicBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
711    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(Field::WorkingBlocks))});
712    return iBuilder->CreateMul(iBuilder->CreateLoad(ptr), iBuilder->getSize(iBuilder->getBitBlockWidth()));
713}
714
715void DynamicBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProducedCount, Value * newProducedCount, const std::string Name) {
716    Value * workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
717    Value * bufSize = b->CreateMul(workingBlocks, b->getSize(b->getBitBlockWidth()));
718    Value * priorBufPos = b->CreateURem(priorProducedCount, bufSize);
719    Value * newBufPos = b->CreateURem(newProducedCount, bufSize);
720    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_copyBack");
721    BasicBlock * done = b->CreateBasicBlock(Name + "_copyBackDone");
722    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
723    b->CreateCondBr(wraparound, copyBack, done);
724    b->SetInsertPoint(copyBack);
725    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
726    Value * bufBasePtr = b->CreateLoad(bufBasePtrField);
727    Value * overFlowAreaPtr = b->CreateGEP(bufBasePtr, workingBlocks);
728    createBlockAlignedCopy(b, bufBasePtr, overFlowAreaPtr, newBufPos);
729    b->CreateBr(done);
730    b->SetInsertPoint(done);
731}
732
733void DynamicBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
734    Value * handle = b->CreateCacheAlignedAlloca(mBufferStructType);
735    size_t numStreams = 1;
736    if (isa<ArrayType>(mBaseType)) {
737        numStreams = mBaseType->getArrayNumElements();
738    }
739    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
740    Value * bufSize = b->getSize((mBufferBlocks + mOverflowBlocks) * b->getBitBlockWidth() * numStreams * fieldWidth/8);
741    bufSize = b->CreateRoundUp(bufSize, b->getSize(b->getCacheAlignment()));
742    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::BaseAddress))});
743    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
744    Value * bufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufSize), bufPtrType);
745    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
746        b->CallPrintInt("allocated: ", bufPtr);
747        b->CallPrintInt("allocated capacity: ", bufSize);
748    }
749    b->CreateStore(bufPtr, bufBasePtrField);
750    b->CreateStore(ConstantPointerNull::getNullValue(bufPtrType), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))}));
751    b->CreateStore(bufSize, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::AllocatedCapacity))}));
752    b->CreateStore(b->getSize(mBufferBlocks), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
753    b->CreateStore(b->getSize(-1), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::Length))}));
754    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ProducedPosition))}));
755    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ConsumedPosition))}));
756    mStreamSetBufferPtr = handle;
757}
758
759void DynamicBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
760    Value * handle = mStreamSetBufferPtr;
761    /* Free the dynamically allocated buffer, but not the stack-allocated buffer struct. */
762    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
763    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
764    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
765    BasicBlock * freePrior = b->CreateBasicBlock("freePrior");
766    BasicBlock * freeCurrent = b->CreateBasicBlock("freeCurrent");
767    Value * priorBuf = b->CreateLoad(priorBasePtrField);
768    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
769    b->CreateCondBr(priorBufIsNonNull, freePrior, freeCurrent);
770    b->SetInsertPoint(freePrior);
771    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
772        b->CallPrintInt("releasing: ", priorBuf);
773    }
774    b->CreateFree(priorBuf);
775    b->CreateBr(freeCurrent);
776    b->SetInsertPoint(freeCurrent);
777    b->CreateFree(b->CreateLoad(bufBasePtrField));
778}
779
780//
781//  Simple capacity doubling.  Use the circular buffer property: duplicating buffer data
782//  ensures that we have correct data.   TODO: consider optimizing based on actual
783//  consumer and producer positions.
784//
785void DynamicBuffer::doubleCapacity(IDISA::IDISA_Builder * const b, Value * handle) {
786    size_t numStreams = 1;
787    if (isa<ArrayType>(mBaseType)) {
788        numStreams = mBaseType->getArrayNumElements();
789    }
790    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
791    Constant * blockBytes = b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8);
792    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
793    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
794    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
795    Value * workingBlocksField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))});
796    Value * capacityField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::AllocatedCapacity))});
797   
798    Value * oldBufPtr = b->CreateLoad(bufBasePtrField);
799    Value * currentWorkingBlocks = b->CreateLoad(workingBlocksField);
800    Value * workingBytes = b->CreateMul(currentWorkingBlocks, blockBytes);
801    Value * const curAllocated = b->CreateLoad(capacityField);
802    Value * neededCapacity = b->CreateAdd(workingBytes, workingBytes);
803    if (mOverflowBlocks > 0) {
804        Constant * overflowBytes = b->getSize(mOverflowBlocks * b->getBitBlockWidth() * numStreams * fieldWidth/8);
805        neededCapacity = b->CreateAdd(neededCapacity, overflowBytes);
806    }
807    neededCapacity = b->CreateRoundUp(neededCapacity, b->getSize(b->getCacheAlignment()));
808    BasicBlock * doubleEntry = b->GetInsertBlock();
809    BasicBlock * doRealloc = b->CreateBasicBlock("doRealloc");
810    BasicBlock * doCopy2 = b->CreateBasicBlock("doCopy2");
811    b->CreateCondBr(b->CreateICmpULT(curAllocated, neededCapacity), doRealloc, doCopy2);
812    b->SetInsertPoint(doRealloc);
813    // If there is a non-null priorBasePtr, free it.
814    Value * priorBuf = b->CreateLoad(priorBasePtrField);
815    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
816    BasicBlock * deallocatePrior = b->CreateBasicBlock("deallocatePrior");
817    BasicBlock * allocateNew = b->CreateBasicBlock("allocateNew");
818    b->CreateCondBr(priorBufIsNonNull, deallocatePrior, allocateNew);
819    b->SetInsertPoint(deallocatePrior);
820    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
821        b->CallPrintInt("deallocating: ", priorBuf);
822    }
823    b->CreateFree(priorBuf);
824    b->CreateBr(allocateNew);
825    b->SetInsertPoint(allocateNew);
826    b->CreateStore(oldBufPtr, priorBasePtrField);
827    Value * newBufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(neededCapacity), bufPtrType);
828    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
829        b->CallPrintInt("re-allocated: ", newBufPtr);
830        b->CallPrintInt("allocated capacity: ", neededCapacity);
831    }
832    b->CreateStore(newBufPtr, bufBasePtrField);
833    createBlockCopy(b, newBufPtr, oldBufPtr, currentWorkingBlocks);
834    b->CreateStore(neededCapacity, capacityField);
835    b->CreateBr(doCopy2);
836    b->SetInsertPoint(doCopy2);
837    PHINode * bufPtr = b->CreatePHI(oldBufPtr->getType(), 2);
838    bufPtr->addIncoming(oldBufPtr, doubleEntry);
839    bufPtr->addIncoming(newBufPtr, allocateNew);
840    createBlockCopy(b, b->CreateGEP(bufPtr, currentWorkingBlocks), bufPtr, currentWorkingBlocks);
841    currentWorkingBlocks = b->CreateAdd(currentWorkingBlocks, currentWorkingBlocks);
842    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
843        b->CallPrintInt("currentWorkingBlocks: ", currentWorkingBlocks);
844    }
845    b->CreateStore(currentWorkingBlocks, workingBlocksField);
846}
847
848inline StructType * getDynamicBufferStructType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * baseType, const unsigned addrSpace) {
849    IntegerType * sizeTy = b->getSizeTy();
850    PointerType * typePtr = baseType->getPointerTo(addrSpace);
851    return StructType::get(typePtr, typePtr, sizeTy, sizeTy, sizeTy, sizeTy, sizeTy, nullptr);
852}
853
854DynamicBuffer::DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t initialCapacity, size_t overflow, unsigned swizzle, unsigned addrSpace)
855: StreamSetBuffer(BufferKind::DynamicBuffer, type, resolveStreamSetType(b, type), initialCapacity, addrSpace)
856, mBufferStructType(getDynamicBufferStructType(b, mType, addrSpace))
857, mSwizzleFactor(swizzle)
858, mOverflowBlocks(overflow)
859{
860    if (initialCapacity * b->getBitBlockWidth() < 2 * overflow) {
861        report_fatal_error("DynamicBuffer: initialCapacity * b->getBitBlockWidth() < 2 * overflow");
862    }
863    mUniqueID = "DB";
864    if (swizzle != 1) {
865        mUniqueID += "s" + std::to_string(swizzle);
866    }
867        if (overflow != 0) {
868        mUniqueID += "o" + std::to_string(overflow);
869    }
870    if (addrSpace != 0) {
871        mUniqueID += "@" + std::to_string(addrSpace);
872    }
873}
874
875
876inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned AddressSpace)
877: mBufferKind(k)
878, mType(resolvedType)
879, mBufferBlocks(BufferBlocks)
880, mAddressSpace(AddressSpace)
881, mStreamSetBufferPtr(nullptr)
882, mBaseType(baseType)
883, mProducer(nullptr) {
884
885}
886
887StreamSetBuffer::~StreamSetBuffer() { }
888
889// Helper routines
890ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
891    unsigned numElements = 1;
892    if (LLVM_LIKELY(type->isArrayTy())) {
893        numElements = type->getArrayNumElements();
894        type = type->getArrayElementType();
895    }
896    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
897        type = type->getVectorElementType();
898        if (LLVM_LIKELY(type->isIntegerTy())) {
899            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
900            type = b->getBitBlockType();
901            if (fieldWidth != 1) {
902                type = ArrayType::get(type, fieldWidth);
903            }
904            return ArrayType::get(type, numElements);
905        }
906    }
907    std::string tmp;
908    raw_string_ostream out(tmp);
909    type->print(out);
910    out << " is an unvalid stream set buffer type.";
911    report_fatal_error(out.str());
912}
913
914StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
915    if (LLVM_LIKELY(type->isArrayTy())) {
916        type = type->getArrayElementType();
917    }
918    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
919        type = type->getVectorElementType();
920        if (LLVM_LIKELY(type->isIntegerTy())) {
921            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
922            type = b->getBitBlockType();
923            if (fieldWidth != 1) {
924                type = ArrayType::get(type, fieldWidth);
925            }
926            return StructType::get(b->getSizeTy(), type->getPointerTo(), nullptr);
927        }
928    }
929    std::string tmp;
930    raw_string_ostream out(tmp);
931    type->print(out);
932    out << " is an unvalid stream set buffer type.";
933    report_fatal_error(out.str());
934}
Note: See TracBrowser for help on using the repository browser.