source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5699

Last change on this file since 5699 was 5650, checked in by cameron, 22 months ago

Multiblock kernel builder support for ExternalBuffer? inputs; clean-up

File size: 49.3 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <llvm/IR/Module.h>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/kernel.h>
10#include <kernels/kernel_builder.h>
11#include <toolchain/toolchain.h>
12#include <llvm/Support/Debug.h>
13#include <llvm/Support/Format.h>
14
15namespace llvm { class Constant; }
16namespace llvm { class Function; }
17
18using namespace parabix;
19using namespace llvm;
20using namespace IDISA;
21
22
23Type * StreamSetBuffer::getStreamSetBlockType() const { return mType;}
24
25ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
26
27StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
28
29void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
30    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
31        Type * const ty = getType();
32        if (mAddressSpace == 0) {
33            Constant * size = ConstantExpr::getSizeOf(ty);
34            size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks));
35            mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
36        } else {
37            mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
38        }
39        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
40    } else {
41        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
42    }
43}
44
45void StreamSetBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const {
46    if (mAddressSpace == 0) {
47        iBuilder->CreateFree(mStreamSetBufferPtr);
48    }
49}
50
51Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
52    if (codegen::EnableAsserts) {
53        Value * const count = getStreamSetCount(iBuilder, self);
54        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
55        Value * const cond = iBuilder->CreateICmpULT(index, count);
56        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
57    }
58    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex});
59}
60
61Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
62    if (codegen::EnableAsserts) {
63        Value * const count = getStreamSetCount(iBuilder, self);
64        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
65        Value * const cond = iBuilder->CreateICmpULT(index, count);
66        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
67    }
68    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
69}
70
71void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, Value * /* addr */) const {
72    report_fatal_error("setBaseAddress is not supported by this buffer type");
73}
74
75Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
76    report_fatal_error("getBufferedSize is not supported by this buffer type");
77}
78
79void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* size */) const {
80    report_fatal_error("setBufferedSize is not supported by this buffer type");
81}
82
83Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
84    report_fatal_error("getCapacity is not supported by this buffer type");
85}
86
87void StreamSetBuffer::setCapacity(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* c */) const {
88    report_fatal_error("setCapacity is not supported by this buffer type");
89}
90
91inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
92    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
93        if (LLVM_LIKELY(cast<ConstantInt>(index)->getLimitedValue() < capacity)) {
94            return true;
95        }
96    }
97    return false;
98}
99
100Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value *) const {
101    size_t count = 1;
102    if (isa<ArrayType>(mBaseType)) {
103        count = mBaseType->getArrayNumElements();
104    }
105    return iBuilder->getSize(count);
106}
107
108inline Value * StreamSetBuffer::modByBufferBlocks(IDISA::IDISA_Builder * const iBuilder, Value * const offset) const {
109    assert (offset->getType()->isIntegerTy());
110    if (isCapacityGuaranteed(offset, mBufferBlocks)) {
111        return offset;
112    } else if (mBufferBlocks == 1) {
113        return ConstantInt::getNullValue(iBuilder->getSizeTy());
114    } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
115        return iBuilder->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
116    } else {
117        return iBuilder->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
118    }
119}
120
121/**
122 * @brief getRawItemPointer
123 *
124 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
125 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
126 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
127 */
128Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
129    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
130    Value * relativePosition = absolutePosition;
131    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
132    if (bw < 8) {
133        assert (bw  == 1 || bw == 2 || bw == 4);
134        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
135        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
136    } else {
137        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
138    }
139    return iBuilder->CreateGEP(ptr, relativePosition);
140}
141
142Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * self, Value * fromPosition, Value * availItems, bool reverse) const {
143    Constant * bufSize = b->getSize(mBufferBlocks * b->getStride());
144    Value * itemsFromBase = b->CreateURem(fromPosition, bufSize);
145    if (reverse) {
146        Value * bufAvail = b->CreateSelect(b->CreateICmpEQ(itemsFromBase, b->getSize(0)), bufSize, itemsFromBase);
147        return b->CreateSelect(b->CreateICmpULT(bufAvail, availItems), bufAvail, availItems);
148    }
149    else {
150        Value * linearSpace = b->CreateSub(bufSize, itemsFromBase, "linearSpace");
151        return b->CreateSelect(b->CreateICmpULT(availItems, linearSpace), availItems, linearSpace);
152    }
153}
154
155Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
156    Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
157    Value * bufRem = iBuilder->CreateURem(fromPosition, bufSize);
158    if (reverse) {
159        return iBuilder->CreateSelect(iBuilder->CreateICmpEQ(bufRem, iBuilder->getSize(0)), bufSize, bufRem);
160    }
161    else return iBuilder->CreateSub(bufSize, bufRem, "linearSpace");
162}
163
164Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
165    iBuilder->CreateAssert(self, "StreamSetBuffer base address cannot be 0");
166    return self;
167}
168
169void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
170    Type * i8ptr = iBuilder->getInt8PtrTy();
171    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
172    size_t numStreams = 1;
173    if (isa<ArrayType>(mBaseType)) {
174        numStreams = mBaseType->getArrayNumElements();
175    }
176    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
177    Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
178    iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
179}
180
181void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
182    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
183    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
184    Constant * const blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
185    size_t numStreams = 1;
186    if (isa<ArrayType>(mBaseType)) {
187        numStreams = mBaseType->getArrayNumElements();
188    }
189    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
190    if (numStreams == 1) {
191        Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth));
192        Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
193        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), copyBytes, alignment);
194    } else {
195        Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
196        Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
197        Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
198        Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
199        Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
200        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), blockCopyBytes, alignment);
201        Value * partialCopyBitsPerStream = iBuilder->CreateMul(partialItems, iBuilder->getSize(fieldWidth));
202        Value * partialCopyBytesPerStream = iBuilder->CreateLShr(iBuilder->CreateAdd(partialCopyBitsPerStream, iBuilder->getSize(7)), iBuilder->getSize(3));
203        for (unsigned strm = 0; strm < numStreams; strm++) {
204            Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
205            Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
206            strmTargetPtr = iBuilder->CreateBitCast(strmTargetPtr, int8PtrTy);
207            strmSourcePtr = iBuilder->CreateBitCast(strmSourcePtr, int8PtrTy);
208            iBuilder->CreateMemMove(strmTargetPtr, strmSourcePtr, partialCopyBytesPerStream, alignment);
209        }
210    }
211}
212
213void StreamSetBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProduced, Value * newProduced, const std::string Name) {
214    report_fatal_error("Copy back not supported for this buffer type:" + Name);
215}
216
217// Source File Buffer
218
219Type * SourceBuffer::getStreamSetBlockType() const {
220    return cast<PointerType>(mType->getStructElementType(int(SourceBuffer::Field::BaseAddress)))->getElementType();
221}
222
223
224Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
225    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
226    return iBuilder->CreateLoad(ptr);
227}
228
229void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * size) const {
230    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
231    iBuilder->CreateStore(size, ptr);
232}
233
234Value * SourceBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
235    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
236    return iBuilder->CreateLoad(ptr);
237}
238
239void SourceBuffer::setCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * c) const {
240    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
241    iBuilder->CreateStore(c, ptr);
242}
243
244void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * addr) const {
245    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
246
247    iBuilder->CreateStore(iBuilder->CreatePointerCast(addr, ptr->getType()->getPointerElementType()), ptr);
248}
249
250Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * const self) const {
251    iBuilder->CreateAssert(self, "SourceBuffer: instance cannot be null");
252    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
253    Value * const addr = iBuilder->CreateLoad(ptr);
254    iBuilder->CreateAssert(addr, "SourceBuffer: base address cannot be 0");
255    return addr;
256}
257
258Value * SourceBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
259    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
260}
261
262Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, Value * availItems, bool reverse) const {
263    if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
264    Value * maxAvail = iBuilder->CreateSub(getCapacity(iBuilder, self), fromPosition);
265    return iBuilder->CreateSelect(iBuilder->CreateICmpULT(availItems, maxAvail), availItems, maxAvail);
266}
267
268Value * SourceBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
269    report_fatal_error("SourceBuffers cannot be written");
270}
271
272void SourceBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
273    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
274        Type * const ty = getType();
275        mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
276        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
277    } else {
278        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
279    }
280}
281
282void SourceBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const {
283
284}
285
286// External File Buffer
287void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
288    report_fatal_error("External buffers cannot be allocated.");
289}
290
291void ExternalBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> &) const {
292
293}
294
295Value * ExternalBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
296    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
297}
298
299// All available items can be accessed.
300Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value * availItems, bool) const {
301    return availItems;
302}
303
304// Circular Buffer
305Value * CircularBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * const self, Value * const blockIndex) const {
306    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
307}
308
309Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
310    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
311    Value * relativePosition = iBuilder->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * iBuilder->getBitBlockWidth()));
312    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
313    if (bw < 8) {
314        assert (bw  == 1 || bw == 2 || bw == 4);
315        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
316        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
317    } else {
318        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
319    }
320    return iBuilder->CreateGEP(ptr, relativePosition);
321}
322
323// CircularCopybackBuffer Buffer
324void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
325    Type * const ty = getType();
326    Constant * size = ConstantExpr::getSizeOf(ty);
327    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
328    mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
329}
330
331Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
332    Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(iBuilder, self, fromPosition, reverse);
333    if (reverse) return writableProper;
334    return iBuilder->CreateAdd(writableProper, iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
335}
336
337void CircularCopybackBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProduced, Value * newProduced, const std::string Name) {
338    Constant * bufSize = b->getSize(mBufferBlocks * b->getBitBlockWidth());
339    Value * priorBufPos = b->CreateURem(priorProduced, bufSize);
340    Value * newBufPos = b->CreateURem(newProduced, bufSize);
341    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_copyBack");
342    BasicBlock * done = b->CreateBasicBlock(Name + "_copyBackDone");
343    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
344    b->CreateCondBr(wraparound, copyBack, done);
345    b->SetInsertPoint(copyBack);
346    Value * overFlowAreaPtr = b->CreateGEP(handle, b->getSize(mBufferBlocks));
347    createBlockAlignedCopy(b, handle, overFlowAreaPtr, newBufPos);
348    b->CreateBr(done);
349    b->SetInsertPoint(done);
350}
351
352
353// SwizzledCopybackBuffer Buffer
354
355void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
356    Type * const ty = getType();
357    Constant * size = ConstantExpr::getSizeOf(ty);
358    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
359    mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
360}
361
362void SwizzledCopybackBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
363    Type * int8PtrTy = iBuilder->getInt8PtrTy();
364    DataLayout DL(iBuilder->getModule());
365    IntegerType * const intAddrTy = iBuilder->getIntPtrTy(DL);
366
367    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
368    Function * f = iBuilder->GetInsertBlock()->getParent();
369    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
370    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
371    BasicBlock * copyDone = BasicBlock::Create(iBuilder->getContext(), "copyDone", f, 0);
372    const unsigned numStreams = getType()->getArrayNumElements();
373    const unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
374    const auto elemTy = getType()->getArrayElementType();
375    const unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
376    Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
377    Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
378    Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
379    Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
380    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(blocksToCopy, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
381
382    iBuilder->SetInsertPoint(wholeBlockCopy);
383    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
384    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, intAddrTy), iBuilder->CreatePtrToInt(targetBlockPtr, intAddrTy));
385    iBuilder->CreateMemMove(iBuilder->CreatePointerCast(targetBlockPtr, int8PtrTy), iBuilder->CreatePointerCast(sourceBlockPtr, int8PtrTy), copyLength, alignment);
386    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyDone);
387    iBuilder->SetInsertPoint(partialBlockCopy);
388    Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth * swizzleFactor));
389    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
390    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
391        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
392        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
393        iBuilder->CreateMemMove(iBuilder->CreatePointerCast(strmTargetPtr, int8PtrTy), iBuilder->CreatePointerCast(strmSourcePtr, int8PtrTy), copyBytes, alignment);
394    }
395    iBuilder->CreateBr(copyDone);
396
397    iBuilder->SetInsertPoint(copyDone);
398}
399
400Value * SwizzledCopybackBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
401    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
402}
403
404Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
405    Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(iBuilder, self, fromPosition, reverse);
406    if (reverse) return writableProper;
407    return iBuilder->CreateAdd(writableProper, iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
408}
409
410void SwizzledCopybackBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProduced, Value * newProduced, const std::string Name) {
411    Constant * bufSize = b->getSize(mBufferBlocks * b->getBitBlockWidth());
412    Value * priorBufPos = b->CreateURem(priorProduced, bufSize);
413    Value * newBufPos = b->CreateURem(newProduced, bufSize);
414    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_copyBack");
415    BasicBlock * done = b->CreateBasicBlock(Name + "_copyBackDone");
416    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
417    b->CreateCondBr(wraparound, copyBack, done);
418    b->SetInsertPoint(copyBack);
419    Value * overFlowAreaPtr = b->CreateGEP(handle, b->getSize(mBufferBlocks));
420    createBlockAlignedCopy(b, handle, overFlowAreaPtr, newBufPos);
421    b->CreateBr(done);
422    b->SetInsertPoint(done);
423}
424
425// Expandable Buffer
426
427void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
428    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType());
429    Value * const capacityPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
430    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
431    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
432    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
433    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
434    const auto alignment = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
435    Value * const ptr = iBuilder->CreateAlignedMalloc(size, alignment);
436    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
437    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
438    iBuilder->CreateStore(iBuilder->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
439}
440
441std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
442
443    // ENTRY
444    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
445    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
446    Value * const streamSetPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
447    Value * const streamSet = iBuilder->CreateLoad(streamSetPtr);
448    blockIndex = modByBufferBlocks(iBuilder, blockIndex);
449
450    assert (streamIndex->getType() == capacity->getType());
451    Value * const cond = iBuilder->CreateICmpULT(streamIndex, capacity);
452
453    // Are we guaranteed that we can access this stream?
454    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
455        iBuilder->CreateAssert(cond, "ExpandableBuffer: out-of-bounds stream access");
456        Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, capacity), streamIndex);
457        return {streamSet, offset};
458    }
459
460    BasicBlock * const entry = iBuilder->GetInsertBlock();
461    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
462    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
463
464    iBuilder->CreateLikelyCondBr(cond, resume, expand);
465
466    // EXPAND
467    iBuilder->SetInsertPoint(expand);
468
469    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
470    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
471
472    Value * newCapacity = iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1));
473    newCapacity = iBuilder->CreateCeilLog2(newCapacity);
474    newCapacity = iBuilder->CreateShl(iBuilder->getSize(1), newCapacity, "newCapacity");
475
476    std::string tmp;
477    raw_string_ostream out(tmp);
478    out << "__expand";
479    elementType->print(out);
480    std::string name = out.str();
481
482    Module * const m = iBuilder->getModule();
483    Function * expandFunction = m->getFunction(name);
484
485    if (expandFunction == nullptr) {
486
487        const auto ip = iBuilder->saveIP();
488
489        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), iBuilder->getSizeTy(), iBuilder->getSizeTy()}, false);
490        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
491
492        auto args = expandFunction->arg_begin();
493        Value * streamSet = &*args++;
494        Value * capacity = &*args++;
495        Value * newCapacity = &*args;
496
497        BasicBlock * entry = BasicBlock::Create(iBuilder->getContext(), "entry", expandFunction);
498        iBuilder->SetInsertPoint(entry);
499
500        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
501        const auto memAlign = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
502
503        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
504        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
505
506        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
507        for (unsigned i = 0; i < mBufferBlocks; ++i) {
508            ConstantInt * const offset = iBuilder->getSize(i);
509            Value * srcOffset = iBuilder->CreateMul(capacity, offset);
510            Value * srcPtr = iBuilder->CreateGEP(streamSet, srcOffset);
511            Value * destOffset = iBuilder->CreateMul(newCapacity, offset);
512            Value * destPtr = iBuilder->CreateGEP(newStreamSet, destOffset);
513            iBuilder->CreateMemCpy(destPtr, srcPtr, iBuilder->CreateMul(capacity, vectorWidth), alignment);
514            Value * destZeroOffset = iBuilder->CreateAdd(destOffset, capacity);
515            Value * destZeroPtr = iBuilder->CreateGEP(newStreamSet, destZeroOffset);
516            iBuilder->CreateMemZero(destZeroPtr, diffCapacity, alignment);
517        }
518
519        iBuilder->CreateFree(streamSet);
520
521        iBuilder->CreateRet(newStreamSet);
522
523        iBuilder->restoreIP(ip);
524    }
525
526    Value * newStreamSet = iBuilder->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
527    iBuilder->CreateStore(newStreamSet, streamSetPtr);
528    iBuilder->CreateStore(newCapacity, capacityPtr);
529
530    iBuilder->CreateBr(resume);
531
532    // RESUME
533    iBuilder->SetInsertPoint(resume);
534
535    PHINode * phiStreamSet = iBuilder->CreatePHI(streamSet->getType(), 2);
536    phiStreamSet->addIncoming(streamSet, entry);
537    phiStreamSet->addIncoming(newStreamSet, expand);
538
539    PHINode * phiCapacity = iBuilder->CreatePHI(capacity->getType(), 2);
540    phiCapacity->addIncoming(capacity, entry);
541    phiCapacity->addIncoming(newCapacity, expand);
542
543    Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, phiCapacity), streamIndex);
544
545    return {phiStreamSet, offset};
546}
547
548Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
549    Value * ptr, * offset;
550    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
551    return iBuilder->CreateGEP(ptr, offset);
552}
553
554Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
555    Value * ptr, * offset;
556    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
557    return iBuilder->CreateGEP(ptr, {offset, packIndex});
558}
559
560Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
561    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
562}
563
564Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
565    iBuilder->CreateAssert(self, "ExpandableBuffer: instance cannot be null");
566    Value * const baseAddr = iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
567    iBuilder->CreateAssert(self, "ExpandableBuffer: base address cannot be 0");
568    return baseAddr;
569}
570
571void ExpandableBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
572    b->CreateFree(getBaseAddress(b.get(), mStreamSetBufferPtr));
573}
574
575Value * ExpandableBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value *, Value *) const {
576    report_fatal_error("Expandable buffers: getStreamSetBlockPtr is not supported.");
577}
578
579Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value *, bool) const {
580    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
581}
582
583SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
584: StreamSetBuffer(BufferKind::SourceBuffer, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), b->getSizeTy(), nullptr), 0, StructAddressSpace) {
585    mUniqueID = "B";
586    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
587        mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
588    }
589}
590
591ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, llvm::Value * addr, unsigned AddressSpace)
592: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, AddressSpace) {
593    mUniqueID = "E";
594    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
595    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
596}
597
598CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
599: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
600    mUniqueID = "C" + std::to_string(bufferBlocks);
601    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
602}
603
604CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
605: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
606
607}
608
609CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
610: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, AddressSpace)
611, mOverflowBlocks(overflowBlocks) {
612    if (bufferBlocks < 2 * overflowBlocks) {
613        report_fatal_error("CircularCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
614    }
615    mUniqueID = "CC" + std::to_string(bufferBlocks);
616    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
617    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
618}
619
620ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
621: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
622, mInitialCapacity(type->getArrayNumElements()) {
623    mUniqueID = "XP" + std::to_string(bufferBlocks);
624    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
625}
626
627SwizzledCopybackBuffer::SwizzledCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
628: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
629    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
630    if (bufferBlocks < 2 * overflowBlocks) {
631        report_fatal_error("SwizzledCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
632    }
633    if (mOverflowBlocks != 1) {
634        mUniqueID += "_" + std::to_string(mOverflowBlocks);
635    }
636    if (AddressSpace > 0) {
637        mUniqueID += "@" + std::to_string(AddressSpace);
638    }
639}
640
641Value * DynamicBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
642    b->CreateAssert(handle, "DynamicBuffer: instance cannot be null");
643    Value * const p = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
644    Value * const addr = b->CreateLoad(p);
645    b->CreateAssert(addr, "DynamicBuffer: base address cannot be 0");
646    return addr;
647}
648
649Value * DynamicBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const b, Value * handle, Value * blockIndex) const {
650    Value * const wkgBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
651    return b->CreateGEP(getBaseAddress(b, handle), b->CreateURem(blockIndex, wkgBlocks));
652}
653
654Value * DynamicBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * handle, Value * streamIndex, Value * absolutePosition) const {
655    Value * absBlock = b->CreateUDiv(absolutePosition, b->getSize(b->getBitBlockWidth()));
656    Value * blockPos = b->CreateURem(absolutePosition, b->getSize(b->getBitBlockWidth()));
657    Value * blockPtr = b->CreateGEP(getStreamSetBlockPtr(b, handle, absBlock), {b->getInt32(0), streamIndex});
658    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
659    if (bw < 8) {
660        assert (bw  == 1 || bw == 2 || bw == 4);
661        blockPos = b->CreateUDiv(blockPos, ConstantInt::get(blockPos->getType(), 8 / bw));
662        blockPtr = b->CreatePointerCast(blockPtr, b->getInt8PtrTy());
663    } else {
664        blockPtr = b->CreatePointerCast(blockPtr, b->getIntNTy(bw)->getPointerTo());
665    }
666    return b->CreateGEP(blockPtr, blockPos);
667}
668
669
670Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * handle, Value * fromPosition, Value * availItems, bool reverse) const {
671    Constant * blockSize = b->getSize(b->getBitBlockWidth());
672    Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
673    Value * bufSize = b->CreateMul(bufBlocks, blockSize);
674    Value * itemsFromBase = b->CreateURem(fromPosition, bufSize);
675    if (reverse) {
676        Value * bufAvail = b->CreateSelect(b->CreateICmpEQ(itemsFromBase, b->getSize(0)), bufSize, itemsFromBase);
677        return b->CreateSelect(b->CreateICmpULT(bufAvail, availItems), bufAvail, availItems);
678    }
679    else {
680        Value * linearSpace = b->CreateSub(bufSize, itemsFromBase, "linearSpace");
681        return b->CreateSelect(b->CreateICmpULT(availItems, linearSpace), availItems, linearSpace);
682    }
683}
684
685Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * handle, Value * fromPosition, bool reverse) const {
686    Constant * blockSize = b->getSize(b->getBitBlockWidth());
687    Value * bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
688    Value * bufSize = b->CreateMul(bufBlocks, blockSize);
689    Value * bufRem = b->CreateURem(fromPosition, bufSize);
690    if (reverse) {
691        return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufSize, bufRem);
692    }
693    bufSize = b->CreateMul(b->CreateAdd(bufBlocks, b->getSize(mOverflowBlocks)), blockSize);
694    return b->CreateSub(bufSize, bufRem, "linearWritable");
695}
696
697Value * DynamicBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
698    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(Field::WorkingBlocks))});
699    return iBuilder->CreateMul(iBuilder->CreateLoad(ptr), iBuilder->getSize(iBuilder->getBitBlockWidth()));
700}
701
702void DynamicBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProducedCount, Value * newProducedCount, const std::string Name) {
703    Value * workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
704    Value * bufSize = b->CreateMul(workingBlocks, b->getSize(b->getBitBlockWidth()));
705    Value * priorBufPos = b->CreateURem(priorProducedCount, bufSize);
706    Value * newBufPos = b->CreateURem(newProducedCount, bufSize);
707    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_copyBack");
708    BasicBlock * done = b->CreateBasicBlock(Name + "_copyBackDone");
709    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
710    b->CreateCondBr(wraparound, copyBack, done);
711    b->SetInsertPoint(copyBack);
712    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
713    Value * bufBasePtr = b->CreateLoad(bufBasePtrField);
714    Value * overFlowAreaPtr = b->CreateGEP(bufBasePtr, workingBlocks);
715    createBlockAlignedCopy(b, bufBasePtr, overFlowAreaPtr, newBufPos);
716    b->CreateBr(done);
717    b->SetInsertPoint(done);
718}
719
720void DynamicBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
721    Value * handle = b->CreateCacheAlignedAlloca(mBufferStructType);
722    size_t numStreams = 1;
723    if (isa<ArrayType>(mBaseType)) {
724        numStreams = mBaseType->getArrayNumElements();
725    }
726    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
727    Value * bufSize = b->getSize((mBufferBlocks + mOverflowBlocks) * b->getBitBlockWidth() * numStreams * fieldWidth/8);
728    bufSize = b->CreateRoundUp(bufSize, b->getSize(b->getCacheAlignment()));
729    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::BaseAddress))});
730    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
731    Value * bufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufSize), bufPtrType);
732    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
733        b->CallPrintInt("allocated: ", bufPtr);
734        b->CallPrintInt("allocated capacity: ", bufSize);
735    }
736    b->CreateStore(bufPtr, bufBasePtrField);
737    b->CreateStore(ConstantPointerNull::getNullValue(bufPtrType), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))}));
738    b->CreateStore(bufSize, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::AllocatedCapacity))}));
739    b->CreateStore(b->getSize(mBufferBlocks), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
740    b->CreateStore(b->getSize(-1), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::Length))}));
741    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ProducedPosition))}));
742    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ConsumedPosition))}));
743    mStreamSetBufferPtr = handle;
744}
745
746void DynamicBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
747    Value * handle = mStreamSetBufferPtr;
748    /* Free the dynamically allocated buffer, but not the stack-allocated buffer struct. */
749    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
750    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
751    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
752    BasicBlock * freePrior = b->CreateBasicBlock("freePrior");
753    BasicBlock * freeCurrent = b->CreateBasicBlock("freeCurrent");
754    Value * priorBuf = b->CreateLoad(priorBasePtrField);
755    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
756    b->CreateCondBr(priorBufIsNonNull, freePrior, freeCurrent);
757    b->SetInsertPoint(freePrior);
758    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
759        b->CallPrintInt("releasing: ", priorBuf);
760    }
761    b->CreateFree(priorBuf);
762    b->CreateBr(freeCurrent);
763    b->SetInsertPoint(freeCurrent);
764    b->CreateFree(b->CreateLoad(bufBasePtrField));
765}
766
767//
768//  Simple capacity doubling.  Use the circular buffer property: duplicating buffer data
769//  ensures that we have correct data.   TODO: consider optimizing based on actual
770//  consumer and producer positions.
771//
772void DynamicBuffer::doubleCapacity(IDISA::IDISA_Builder * const b, Value * handle) {
773    size_t numStreams = 1;
774    if (isa<ArrayType>(mBaseType)) {
775        numStreams = mBaseType->getArrayNumElements();
776    }
777    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
778    Constant * blockBytes = b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8);
779    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
780    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
781    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
782    Value * workingBlocksField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))});
783    Value * capacityField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::AllocatedCapacity))});
784   
785    Value * oldBufPtr = b->CreateLoad(bufBasePtrField);
786    Value * currentWorkingBlocks = b->CreateLoad(workingBlocksField);
787    Value * workingBytes = b->CreateMul(currentWorkingBlocks, blockBytes);
788    Value * const curAllocated = b->CreateLoad(capacityField);
789    Value * neededCapacity = b->CreateAdd(workingBytes, workingBytes);
790    if (mOverflowBlocks > 0) {
791        Constant * overflowBytes = b->getSize(mOverflowBlocks * b->getBitBlockWidth() * numStreams * fieldWidth/8);
792        neededCapacity = b->CreateAdd(neededCapacity, overflowBytes);
793    }
794    neededCapacity = b->CreateRoundUp(neededCapacity, b->getSize(b->getCacheAlignment()));
795    BasicBlock * doubleEntry = b->GetInsertBlock();
796    BasicBlock * doRealloc = b->CreateBasicBlock("doRealloc");
797    BasicBlock * doCopy2 = b->CreateBasicBlock("doCopy2");
798    b->CreateCondBr(b->CreateICmpULT(curAllocated, neededCapacity), doRealloc, doCopy2);
799    b->SetInsertPoint(doRealloc);
800    // If there is a non-null priorBasePtr, free it.
801    Value * priorBuf = b->CreateLoad(priorBasePtrField);
802    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
803    BasicBlock * deallocatePrior = b->CreateBasicBlock("deallocatePrior");
804    BasicBlock * allocateNew = b->CreateBasicBlock("allocateNew");
805    b->CreateCondBr(priorBufIsNonNull, deallocatePrior, allocateNew);
806    b->SetInsertPoint(deallocatePrior);
807    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
808        b->CallPrintInt("deallocating: ", priorBuf);
809    }
810    b->CreateFree(priorBuf);
811    b->CreateBr(allocateNew);
812    b->SetInsertPoint(allocateNew);
813    b->CreateStore(oldBufPtr, priorBasePtrField);
814    Value * newBufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(neededCapacity), bufPtrType);
815    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
816        b->CallPrintInt("re-allocated: ", newBufPtr);
817        b->CallPrintInt("allocated capacity: ", neededCapacity);
818    }
819    b->CreateStore(newBufPtr, bufBasePtrField);
820    createBlockCopy(b, newBufPtr, oldBufPtr, currentWorkingBlocks);
821    b->CreateStore(neededCapacity, capacityField);
822    b->CreateBr(doCopy2);
823    b->SetInsertPoint(doCopy2);
824    PHINode * bufPtr = b->CreatePHI(oldBufPtr->getType(), 2);
825    bufPtr->addIncoming(oldBufPtr, doubleEntry);
826    bufPtr->addIncoming(newBufPtr, allocateNew);
827    createBlockCopy(b, b->CreateGEP(bufPtr, currentWorkingBlocks), bufPtr, currentWorkingBlocks);
828    currentWorkingBlocks = b->CreateAdd(currentWorkingBlocks, currentWorkingBlocks);
829    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
830        b->CallPrintInt("currentWorkingBlocks: ", currentWorkingBlocks);
831    }
832    b->CreateStore(currentWorkingBlocks, workingBlocksField);
833}
834
835inline StructType * getDynamicBufferStructType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * baseType, const unsigned addrSpace) {
836    IntegerType * sizeTy = b->getSizeTy();
837    PointerType * typePtr = baseType->getPointerTo(addrSpace);
838    return StructType::get(typePtr, typePtr, sizeTy, sizeTy, sizeTy, sizeTy, sizeTy, nullptr);
839}
840
841DynamicBuffer::DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t initialCapacity, size_t overflow, unsigned swizzle, unsigned addrSpace)
842: StreamSetBuffer(BufferKind::DynamicBuffer, type, resolveStreamSetType(b, type), initialCapacity, addrSpace)
843, mBufferStructType(getDynamicBufferStructType(b, mType, addrSpace))
844, mSwizzleFactor(swizzle)
845, mOverflowBlocks(overflow)
846{
847    if (initialCapacity * b->getBitBlockWidth() < 2 * overflow) {
848        report_fatal_error("DynamicBuffer: initialCapacity * b->getBitBlockWidth() < 2 * overflow");
849    }
850    mUniqueID = "DB";
851    if (swizzle != 1) {
852        mUniqueID += "s" + std::to_string(swizzle);
853    }
854        if (overflow != 0) {
855        mUniqueID += "o" + std::to_string(overflow);
856    }
857    if (addrSpace != 0) {
858        mUniqueID += "@" + std::to_string(addrSpace);
859    }
860}
861
862
863inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned AddressSpace)
864: mBufferKind(k)
865, mType(resolvedType)
866, mBufferBlocks(BufferBlocks)
867, mAddressSpace(AddressSpace)
868, mStreamSetBufferPtr(nullptr)
869, mBaseType(baseType)
870, mProducer(nullptr) {
871
872}
873
874StreamSetBuffer::~StreamSetBuffer() { }
875
876// Helper routines
877ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
878    unsigned numElements = 1;
879    if (LLVM_LIKELY(type->isArrayTy())) {
880        numElements = type->getArrayNumElements();
881        type = type->getArrayElementType();
882    }
883    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
884        type = type->getVectorElementType();
885        if (LLVM_LIKELY(type->isIntegerTy())) {
886            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
887            type = b->getBitBlockType();
888            if (fieldWidth != 1) {
889                type = ArrayType::get(type, fieldWidth);
890            }
891            return ArrayType::get(type, numElements);
892        }
893    }
894    std::string tmp;
895    raw_string_ostream out(tmp);
896    type->print(out);
897    out << " is an unvalid stream set buffer type.";
898    report_fatal_error(out.str());
899}
900
901StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
902    if (LLVM_LIKELY(type->isArrayTy())) {
903        type = type->getArrayElementType();
904    }
905    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
906        type = type->getVectorElementType();
907        if (LLVM_LIKELY(type->isIntegerTy())) {
908            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
909            type = b->getBitBlockType();
910            if (fieldWidth != 1) {
911                type = ArrayType::get(type, fieldWidth);
912            }
913            return StructType::get(b->getSizeTy(), type->getPointerTo(), nullptr);
914        }
915    }
916    std::string tmp;
917    raw_string_ostream out(tmp);
918    type->print(out);
919    out << " is an unvalid stream set buffer type.";
920    report_fatal_error(out.str());
921}
Note: See TracBrowser for help on using the repository browser.