source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5618

Last change on this file since 5618 was 5618, checked in by cameron, 19 months ago

Dynamic buffer tracing; calculations for available items/blocks in reverse mode

File size: 47.6 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <llvm/IR/Module.h>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/kernel.h>
10#include <kernels/kernel_builder.h>
11#include <toolchain/toolchain.h>
12#include <llvm/Support/Debug.h>
13#include <llvm/Support/Format.h>
14
15namespace llvm { class Constant; }
16namespace llvm { class Function; }
17
18using namespace parabix;
19using namespace llvm;
20using namespace IDISA;
21
22
23Type * StreamSetBuffer::getStreamSetBlockType() const { return mType;}
24
25ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
26
27StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
28
29void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
30    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
31        Type * const ty = getType();
32        if (mAddressSpace == 0) {
33            Constant * size = ConstantExpr::getSizeOf(ty);
34            size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks));
35            mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
36        } else {
37            mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
38        }
39        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
40    } else {
41        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
42    }
43}
44
45void StreamSetBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const {
46    if (mAddressSpace == 0) {
47        iBuilder->CreateFree(mStreamSetBufferPtr);
48    }
49}
50
51Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
52    if (codegen::EnableAsserts) {
53        Value * const count = getStreamSetCount(iBuilder, self);
54        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
55        Value * const cond = iBuilder->CreateICmpULT(index, count);
56        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
57    }
58    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex});
59}
60
61Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
62    if (codegen::EnableAsserts) {
63        Value * const count = getStreamSetCount(iBuilder, self);
64        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
65        Value * const cond = iBuilder->CreateICmpULT(index, count);
66        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
67    }
68    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
69}
70
71void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, Value * /* addr */) const {
72    report_fatal_error("setBaseAddress is not supported by this buffer type");
73}
74
75Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
76    report_fatal_error("getBufferedSize is not supported by this buffer type");
77}
78
79void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* size */) const {
80    report_fatal_error("setBufferedSize is not supported by this buffer type");
81}
82
83Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
84    report_fatal_error("getCapacity is not supported by this buffer type");
85}
86
87void StreamSetBuffer::setCapacity(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* c */) const {
88    report_fatal_error("setCapacity is not supported by this buffer type");
89}
90
91inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
92    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
93        if (LLVM_LIKELY(cast<ConstantInt>(index)->getLimitedValue() < capacity)) {
94            return true;
95        }
96    }
97    return false;
98}
99
100Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value *) const {
101    size_t count = 1;
102    if (isa<ArrayType>(mBaseType)) {
103        count = mBaseType->getArrayNumElements();
104    }
105    return iBuilder->getSize(count);
106}
107
108inline Value * StreamSetBuffer::modByBufferBlocks(IDISA::IDISA_Builder * const iBuilder, Value * const offset) const {
109    assert (offset->getType()->isIntegerTy());
110    if (isCapacityGuaranteed(offset, mBufferBlocks)) {
111        return offset;
112    } else if (mBufferBlocks == 1) {
113        return ConstantInt::getNullValue(iBuilder->getSizeTy());
114    } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
115        return iBuilder->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
116    } else {
117        return iBuilder->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
118    }
119}
120
121/**
122 * @brief getRawItemPointer
123 *
124 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
125 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
126 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
127 */
128Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
129    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
130    Value * relativePosition = absolutePosition;
131    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
132    if (bw < 8) {
133        assert (bw  == 1 || bw == 2 || bw == 4);
134        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
135        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
136    } else {
137        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
138    }
139    return iBuilder->CreateGEP(ptr, relativePosition);
140}
141
142Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
143    if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
144        Constant * stride = iBuilder->getSize(iBuilder->getStride());
145        Value * strideRem = iBuilder->CreateURem(fromPosition, stride);
146        if (reverse) {
147            return iBuilder->CreateSelect(iBuilder->CreateICmpEQ(strideRem, iBuilder->getSize(0)), stride, strideRem);
148        }
149        else return iBuilder->CreateSub(stride, strideRem);
150    } else {
151        Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
152        Value * bufRem = iBuilder->CreateURem(fromPosition, bufSize);
153        if (reverse) {
154            return iBuilder->CreateSelect(iBuilder->CreateICmpEQ(bufRem, iBuilder->getSize(0)), bufSize, bufRem);
155        }
156        else return iBuilder->CreateSub(bufSize, bufRem, "linearItems");
157    }
158}
159
160Value * StreamSetBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
161    Constant * bufBlocks = iBuilder->getSize(mBufferBlocks);
162    Value * bufRem = iBuilder->CreateURem(fromBlock, bufBlocks);
163    if (reverse) {
164        return iBuilder->CreateSelect(iBuilder->CreateICmpEQ(bufRem, iBuilder->getSize(0)), bufBlocks, bufRem);
165    }
166    else return iBuilder->CreateSub(bufBlocks, bufRem, "linearBlocks");
167}
168
169Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
170    return getLinearlyAccessibleItems(iBuilder, self, fromPosition, reverse);
171}
172
173Value * StreamSetBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
174    return getLinearlyAccessibleBlocks(iBuilder, self, fromBlock, reverse);
175}
176
177Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
178    iBuilder->CreateAssert(self, "StreamSetBuffer base address cannot be 0");
179    return self;
180}
181
182void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
183    Type * i8ptr = iBuilder->getInt8PtrTy();
184    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
185    size_t numStreams = 1;
186    if (isa<ArrayType>(mBaseType)) {
187        numStreams = mBaseType->getArrayNumElements();
188    }
189    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
190    Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
191    iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
192}
193
194void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
195    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
196    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
197    Constant * const blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
198    size_t numStreams = 1;
199    if (isa<ArrayType>(mBaseType)) {
200        numStreams = mBaseType->getArrayNumElements();
201    }
202    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
203    if (numStreams == 1) {
204        Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth));
205        Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
206        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), copyBytes, alignment);
207    } else {
208        Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
209        Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
210        Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
211        Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
212        Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
213        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), blockCopyBytes, alignment);
214        Value * partialCopyBitsPerStream = iBuilder->CreateMul(partialItems, iBuilder->getSize(fieldWidth));
215        Value * partialCopyBytesPerStream = iBuilder->CreateLShr(iBuilder->CreateAdd(partialCopyBitsPerStream, iBuilder->getSize(7)), iBuilder->getSize(3));
216        for (unsigned strm = 0; strm < numStreams; strm++) {
217            Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
218            Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
219            strmTargetPtr = iBuilder->CreateBitCast(strmTargetPtr, int8PtrTy);
220            strmSourcePtr = iBuilder->CreateBitCast(strmSourcePtr, int8PtrTy);
221            iBuilder->CreateMemMove(strmTargetPtr, strmSourcePtr, partialCopyBytesPerStream, alignment);
222        }
223    }
224}
225
226// Source File Buffer
227
228Type * SourceBuffer::getStreamSetBlockType() const {
229    return cast<PointerType>(mType->getStructElementType(int(SourceBuffer::Field::BaseAddress)))->getElementType();
230}
231
232
233Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
234    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
235    return iBuilder->CreateLoad(ptr);
236}
237
238void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * size) const {
239    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
240    iBuilder->CreateStore(size, ptr);
241}
242
243Value * SourceBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
244    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
245    return iBuilder->CreateLoad(ptr);
246}
247
248void SourceBuffer::setCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * c) const {
249    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
250    iBuilder->CreateStore(c, ptr);
251}
252
253void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * addr) const {
254    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
255
256    iBuilder->CreateStore(iBuilder->CreatePointerCast(addr, ptr->getType()->getPointerElementType()), ptr);
257}
258
259Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * const self) const {
260    iBuilder->CreateAssert(self, "SourceBuffer: instance cannot be null");
261    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
262    Value * const addr = iBuilder->CreateLoad(ptr);
263    iBuilder->CreateAssert(addr, "SourceBuffer: base address cannot be 0");
264    return addr;
265}
266
267Value * SourceBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
268    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
269}
270
271Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
272    if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
273    return iBuilder->CreateSub(getCapacity(iBuilder, self), fromPosition);
274}
275
276Value * SourceBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
277    if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
278    return iBuilder->CreateSub(iBuilder->CreateUDiv(getCapacity(iBuilder, self), iBuilder->getSize(iBuilder->getBitBlockWidth())), fromBlock);
279}
280
281void SourceBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
282    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
283        Type * const ty = getType();
284        mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
285        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
286    } else {
287        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
288    }
289}
290
291void SourceBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const {
292
293}
294
295// External File Buffer
296void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
297    report_fatal_error("External buffers cannot be allocated.");
298}
299
300void ExternalBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> &) const {
301
302}
303
304Value * ExternalBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
305    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
306}
307
308Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, bool reverse) const {
309    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
310}
311
312// Circular Buffer
313Value * CircularBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * const self, Value * const blockIndex) const {
314    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
315}
316
317Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
318    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
319    Value * relativePosition = iBuilder->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * iBuilder->getBitBlockWidth()));
320    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
321    if (bw < 8) {
322        assert (bw  == 1 || bw == 2 || bw == 4);
323        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
324        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
325    } else {
326        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
327    }
328    return iBuilder->CreateGEP(ptr, relativePosition);
329}
330
331// CircularCopybackBuffer Buffer
332void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
333    Type * const ty = getType();
334    Constant * size = ConstantExpr::getSizeOf(ty);
335    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
336    mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
337}
338
339void CircularCopybackBuffer::createCopyBack(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * overFlowItems) const {
340    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
341    createBlockAlignedCopy(iBuilder, self, overFlowAreaPtr, overFlowItems);
342}
343
344Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
345    Value * accessibleItems = getLinearlyAccessibleItems(iBuilder, self, fromPosition, reverse);
346    if (reverse) return accessibleItems;
347    return iBuilder->CreateAdd(accessibleItems, iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
348}
349
350Value * CircularCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
351    Value * accessibleBlocks = getLinearlyAccessibleBlocks(iBuilder, self, fromBlock);
352    if (reverse) return accessibleBlocks;
353    return iBuilder->CreateAdd(accessibleBlocks, iBuilder->getSize(mOverflowBlocks));
354}
355
356// SwizzledCopybackBuffer Buffer
357
358void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
359    Type * const ty = getType();
360    Constant * size = ConstantExpr::getSizeOf(ty);
361    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
362    mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
363}
364
365void SwizzledCopybackBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
366    Type * int8PtrTy = iBuilder->getInt8PtrTy();
367    DataLayout DL(iBuilder->getModule());
368    IntegerType * const intAddrTy = iBuilder->getIntPtrTy(DL);
369
370    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
371    Function * f = iBuilder->GetInsertBlock()->getParent();
372    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
373    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
374    BasicBlock * copyDone = BasicBlock::Create(iBuilder->getContext(), "copyDone", f, 0);
375    const unsigned numStreams = getType()->getArrayNumElements();
376    const unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
377    const auto elemTy = getType()->getArrayElementType();
378    const unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
379    Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
380    Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
381    Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
382    Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
383    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(blocksToCopy, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
384
385    iBuilder->SetInsertPoint(wholeBlockCopy);
386    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
387    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, intAddrTy), iBuilder->CreatePtrToInt(targetBlockPtr, intAddrTy));
388    iBuilder->CreateMemMove(iBuilder->CreatePointerCast(targetBlockPtr, int8PtrTy), iBuilder->CreatePointerCast(sourceBlockPtr, int8PtrTy), copyLength, alignment);
389    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyDone);
390    iBuilder->SetInsertPoint(partialBlockCopy);
391    Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth * swizzleFactor));
392    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
393    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
394        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
395        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
396        iBuilder->CreateMemMove(iBuilder->CreatePointerCast(strmTargetPtr, int8PtrTy), iBuilder->CreatePointerCast(strmSourcePtr, int8PtrTy), copyBytes, alignment);
397    }
398    iBuilder->CreateBr(copyDone);
399
400    iBuilder->SetInsertPoint(copyDone);
401}
402
403void SwizzledCopybackBuffer::createCopyBack(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * overFlowItems) const {
404    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
405    createBlockAlignedCopy(iBuilder, self, overFlowAreaPtr, overFlowItems);
406}
407
408Value * SwizzledCopybackBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
409    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
410}
411
412Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
413    Value * accessibleItems = getLinearlyAccessibleItems(iBuilder, self, fromPosition, reverse);
414    if (reverse) return accessibleItems;
415    return iBuilder->CreateAdd(accessibleItems, iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
416}
417
418Value * SwizzledCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
419    Value * accessibleBlocks = getLinearlyAccessibleBlocks(iBuilder, self, fromBlock);
420    if (reverse) return accessibleBlocks;
421    return iBuilder->CreateAdd(accessibleBlocks, iBuilder->getSize(mOverflowBlocks));
422}
423
424// Expandable Buffer
425
426void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
427    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType());
428    Value * const capacityPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
429    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
430    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
431    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
432    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
433    const auto alignment = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
434    Value * const ptr = iBuilder->CreateAlignedMalloc(size, alignment);
435    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
436    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
437    iBuilder->CreateStore(iBuilder->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
438}
439
440std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
441
442    // ENTRY
443    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
444    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
445    Value * const streamSetPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
446    Value * const streamSet = iBuilder->CreateLoad(streamSetPtr);
447    blockIndex = modByBufferBlocks(iBuilder, blockIndex);
448
449    assert (streamIndex->getType() == capacity->getType());
450    Value * const cond = iBuilder->CreateICmpULT(streamIndex, capacity);
451
452    // Are we guaranteed that we can access this stream?
453    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
454        iBuilder->CreateAssert(cond, "ExpandableBuffer: out-of-bounds stream access");
455        Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, capacity), streamIndex);
456        return {streamSet, offset};
457    }
458
459    BasicBlock * const entry = iBuilder->GetInsertBlock();
460    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
461    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
462
463    iBuilder->CreateLikelyCondBr(cond, resume, expand);
464
465    // EXPAND
466    iBuilder->SetInsertPoint(expand);
467
468    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
469    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
470
471    Value * newCapacity = iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1));
472    newCapacity = iBuilder->CreateCeilLog2(newCapacity);
473    newCapacity = iBuilder->CreateShl(iBuilder->getSize(1), newCapacity, "newCapacity");
474
475    std::string tmp;
476    raw_string_ostream out(tmp);
477    out << "__expand";
478    elementType->print(out);
479    std::string name = out.str();
480
481    Module * const m = iBuilder->getModule();
482    Function * expandFunction = m->getFunction(name);
483
484    if (expandFunction == nullptr) {
485
486        const auto ip = iBuilder->saveIP();
487
488        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), iBuilder->getSizeTy(), iBuilder->getSizeTy()}, false);
489        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
490
491        auto args = expandFunction->arg_begin();
492        Value * streamSet = &*args++;
493        Value * capacity = &*args++;
494        Value * newCapacity = &*args;
495
496        BasicBlock * entry = BasicBlock::Create(iBuilder->getContext(), "entry", expandFunction);
497        iBuilder->SetInsertPoint(entry);
498
499        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
500        const auto memAlign = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
501
502        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
503        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
504
505        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
506        for (unsigned i = 0; i < mBufferBlocks; ++i) {
507            ConstantInt * const offset = iBuilder->getSize(i);
508            Value * srcOffset = iBuilder->CreateMul(capacity, offset);
509            Value * srcPtr = iBuilder->CreateGEP(streamSet, srcOffset);
510            Value * destOffset = iBuilder->CreateMul(newCapacity, offset);
511            Value * destPtr = iBuilder->CreateGEP(newStreamSet, destOffset);
512            iBuilder->CreateMemCpy(destPtr, srcPtr, iBuilder->CreateMul(capacity, vectorWidth), alignment);
513            Value * destZeroOffset = iBuilder->CreateAdd(destOffset, capacity);
514            Value * destZeroPtr = iBuilder->CreateGEP(newStreamSet, destZeroOffset);
515            iBuilder->CreateMemZero(destZeroPtr, diffCapacity, alignment);
516        }
517
518        iBuilder->CreateFree(streamSet);
519
520        iBuilder->CreateRet(newStreamSet);
521
522        iBuilder->restoreIP(ip);
523    }
524
525    Value * newStreamSet = iBuilder->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
526    iBuilder->CreateStore(newStreamSet, streamSetPtr);
527    iBuilder->CreateStore(newCapacity, capacityPtr);
528
529    iBuilder->CreateBr(resume);
530
531    // RESUME
532    iBuilder->SetInsertPoint(resume);
533
534    PHINode * phiStreamSet = iBuilder->CreatePHI(streamSet->getType(), 2);
535    phiStreamSet->addIncoming(streamSet, entry);
536    phiStreamSet->addIncoming(newStreamSet, expand);
537
538    PHINode * phiCapacity = iBuilder->CreatePHI(capacity->getType(), 2);
539    phiCapacity->addIncoming(capacity, entry);
540    phiCapacity->addIncoming(newCapacity, expand);
541
542    Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, phiCapacity), streamIndex);
543
544    return {phiStreamSet, offset};
545}
546
547Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
548    Value * ptr, * offset;
549    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
550    return iBuilder->CreateGEP(ptr, offset);
551}
552
553Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
554    Value * ptr, * offset;
555    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
556    return iBuilder->CreateGEP(ptr, {offset, packIndex});
557}
558
559Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
560    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
561}
562
563Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
564    iBuilder->CreateAssert(self, "ExpandableBuffer: instance cannot be null");
565    Value * const baseAddr = iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
566    iBuilder->CreateAssert(self, "ExpandableBuffer: base address cannot be 0");
567    return baseAddr;
568}
569
570void ExpandableBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
571    b->CreateFree(getBaseAddress(b.get(), mStreamSetBufferPtr));
572}
573
574Value * ExpandableBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value *, Value *) const {
575    report_fatal_error("Expandable buffers: getStreamSetBlockPtr is not supported.");
576}
577
578Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value *, bool reverse) const {
579    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
580}
581
582SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
583: StreamSetBuffer(BufferKind::SourceBuffer, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), b->getSizeTy(), nullptr), 0, StructAddressSpace) {
584    mUniqueID = "B";
585    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
586        mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
587    }
588}
589
590ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, llvm::Value * addr, unsigned AddressSpace)
591: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, AddressSpace) {
592    mUniqueID = "E";
593    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
594    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
595}
596
597CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
598: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
599    mUniqueID = "C" + std::to_string(bufferBlocks);
600    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
601}
602
603CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
604: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
605
606}
607
608CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
609: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, AddressSpace)
610, mOverflowBlocks(overflowBlocks) {
611    mUniqueID = "CC" + std::to_string(bufferBlocks);
612    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
613    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
614}
615
616ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
617: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
618, mInitialCapacity(type->getArrayNumElements()) {
619    mUniqueID = "XP" + std::to_string(bufferBlocks);
620    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
621}
622
623SwizzledCopybackBuffer::SwizzledCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
624: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
625    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
626    if (mOverflowBlocks != 1) {
627        mUniqueID += "_" + std::to_string(mOverflowBlocks);
628    }
629    if (AddressSpace > 0) {
630        mUniqueID += "@" + std::to_string(AddressSpace);
631    }
632}
633
634Value * DynamicBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
635    b->CreateAssert(handle, "DynamicBuffer: instance cannot be null");
636    Value * const p = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
637    Value * const addr = b->CreateLoad(p);
638    b->CreateAssert(addr, "DynamicBuffer: base address cannot be 0");
639    return addr;
640}
641
642Value * DynamicBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const b, Value * handle, Value * blockIndex) const {
643    Value * const wkgBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
644    return b->CreateGEP(getBaseAddress(b, handle), b->CreateURem(blockIndex, wkgBlocks));
645}
646
647Value * DynamicBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * handle, Value * streamIndex, Value * absolutePosition) const {
648    Value * absBlock = b->CreateUDiv(absolutePosition, b->getSize(b->getBitBlockWidth()));
649    Value * blockPos = b->CreateURem(absolutePosition, b->getSize(b->getBitBlockWidth()));
650    Value * blockPtr = b->CreateGEP(getStreamSetBlockPtr(b, handle, absBlock), {b->getInt32(0), streamIndex});
651    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
652    if (bw < 8) {
653        assert (bw  == 1 || bw == 2 || bw == 4);
654        blockPos = b->CreateUDiv(blockPos, ConstantInt::get(blockPos->getType(), 8 / bw));
655        blockPtr = b->CreatePointerCast(blockPtr, b->getInt8PtrTy());
656    } else {
657        blockPtr = b->CreatePointerCast(blockPtr, b->getIntNTy(bw)->getPointerTo());
658    }
659    return b->CreateGEP(blockPtr, blockPos);
660}
661
662
663Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * handle, Value * fromPosition, bool reverse) const {
664    Constant * blockSize = b->getSize(b->getBitBlockWidth());
665    if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
666        Value * blockRem = b->CreateURem(fromPosition, blockSize);
667        if (reverse) {
668            return b->CreateSelect(b->CreateICmpEQ(blockRem, b->getSize(0)), blockSize, blockRem);
669        }
670        else return b->CreateSub(blockSize, blockRem);
671    } else {
672        Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
673        Value * bufSize = b->CreateMul(bufBlocks, blockSize);
674        Value * bufRem = b->CreateURem(fromPosition, bufSize);
675        if (reverse) {
676            return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufSize, bufRem);
677        }
678        else return b->CreateSub(bufSize, bufRem, "linearItems");
679    }
680}
681
682Value * DynamicBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const b, Value * handle, Value * fromBlock, bool reverse) const {
683    Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
684    Value * bufRem = b->CreateURem(fromBlock, bufBlocks);
685    if (reverse) {
686        return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufBlocks, bufRem);
687    }
688    else return b->CreateSub(bufBlocks, bufRem, "linearBlocks");
689}
690
691Value * DynamicBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
692    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(Field::WorkingBlocks))});
693    return iBuilder->CreateMul(iBuilder->CreateLoad(ptr), iBuilder->getSize(iBuilder->getBitBlockWidth()));
694}
695
696
697
698void DynamicBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
699    Value * handle = b->CreateCacheAlignedAlloca(mBufferStructType);
700    size_t numStreams = 1;
701    if (isa<ArrayType>(mBaseType)) {
702        numStreams = mBaseType->getArrayNumElements();
703    }
704    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
705    Value * bufSize = b->getSize((mBufferBlocks + mOverflowBlocks) * b->getBitBlockWidth() * numStreams * fieldWidth/8);
706    bufSize = b->CreateRoundUp(bufSize, b->getSize(b->getCacheAlignment()));
707    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::BaseAddress))});
708    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
709    Value * bufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufSize), bufPtrType);
710    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
711        b->CallPrintInt("allocated: ", bufPtr);
712        b->CallPrintInt("allocated capacity: ", bufSize);
713    }
714    b->CreateStore(bufPtr, bufBasePtrField);
715    b->CreateStore(ConstantPointerNull::getNullValue(bufPtrType), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))}));
716    b->CreateStore(bufSize, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::AllocatedCapacity))}));
717    b->CreateStore(b->getSize(mBufferBlocks), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
718    b->CreateStore(b->getSize(-1), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::Length))}));
719    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ProducedPosition))}));
720    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ConsumedPosition))}));
721    mStreamSetBufferPtr = handle;
722}
723
724void DynamicBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
725    Value * handle = mStreamSetBufferPtr;
726    /* Free the dynamically allocated buffer, but not the stack-allocated buffer struct. */
727    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
728    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
729    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
730    BasicBlock * freePrior = b->CreateBasicBlock("freePrior");
731    BasicBlock * freeCurrent = b->CreateBasicBlock("freeCurrent");
732    Value * priorBuf = b->CreateLoad(priorBasePtrField);
733    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
734    b->CreateCondBr(priorBufIsNonNull, freePrior, freeCurrent);
735    b->SetInsertPoint(freePrior);
736    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
737        b->CallPrintInt("releasing: ", priorBuf);
738    }
739    b->CreateFree(priorBuf);
740    b->CreateBr(freeCurrent);
741    b->SetInsertPoint(freeCurrent);
742    b->CreateFree(b->CreateLoad(bufBasePtrField));
743}
744
745//
746//  Simple capacity doubling.  Use the circular buffer property: duplicating buffer data
747//  ensures that we have correct data.   TODO: consider optimizing based on actual
748//  consumer and producer positions.
749//
750void DynamicBuffer::doubleCapacity(IDISA::IDISA_Builder * const b, Value * handle) {
751    size_t numStreams = 1;
752    if (isa<ArrayType>(mBaseType)) {
753        numStreams = mBaseType->getArrayNumElements();
754    }
755    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
756    Constant * blockBytes = b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8);
757    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
758    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
759    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
760    Value * workingBlocksField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))});
761    Value * capacityField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::AllocatedCapacity))});
762   
763    Value * oldBufPtr = b->CreateLoad(bufBasePtrField);
764    Value * currentWorkingBlocks = b->CreateLoad(workingBlocksField);
765    Value * workingBytes = b->CreateMul(currentWorkingBlocks, blockBytes);
766    Value * const curAllocated = b->CreateLoad(capacityField);
767    Value * neededCapacity = b->CreateAdd(workingBytes, workingBytes);
768    if (mOverflowBlocks > 0) {
769        Constant * overflowBytes = b->getSize(mOverflowBlocks * b->getBitBlockWidth() * numStreams * fieldWidth/8);
770        neededCapacity = b->CreateAdd(neededCapacity, overflowBytes);
771    }
772    neededCapacity = b->CreateRoundUp(neededCapacity, b->getSize(b->getCacheAlignment()));
773    BasicBlock * doubleEntry = b->GetInsertBlock();
774    BasicBlock * doRealloc = b->CreateBasicBlock("doRealloc");
775    BasicBlock * doCopy2 = b->CreateBasicBlock("doCopy2");
776    b->CreateCondBr(b->CreateICmpULT(curAllocated, neededCapacity), doRealloc, doCopy2);
777    b->SetInsertPoint(doRealloc);
778    // If there is a non-null priorBasePtr, free it.
779    Value * priorBuf = b->CreateLoad(priorBasePtrField);
780    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
781    BasicBlock * deallocatePrior = b->CreateBasicBlock("deallocatePrior");
782    BasicBlock * allocateNew = b->CreateBasicBlock("allocateNew");
783    b->CreateCondBr(priorBufIsNonNull, deallocatePrior, allocateNew);
784    b->SetInsertPoint(deallocatePrior);
785    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
786        b->CallPrintInt("deallocating: ", priorBuf);
787    }
788    b->CreateFree(priorBuf);
789    b->CreateBr(allocateNew);
790    b->SetInsertPoint(allocateNew);
791    b->CreateStore(oldBufPtr, priorBasePtrField);
792    Value * newBufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(neededCapacity), bufPtrType);
793    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
794        b->CallPrintInt("re-allocated: ", newBufPtr);
795        b->CallPrintInt("allocated capacity: ", neededCapacity);
796    }
797    b->CreateStore(newBufPtr, bufBasePtrField);
798    createBlockCopy(b, newBufPtr, oldBufPtr, currentWorkingBlocks);
799    b->CreateStore(neededCapacity, capacityField);
800    b->CreateBr(doCopy2);
801    b->SetInsertPoint(doCopy2);
802    PHINode * bufPtr = b->CreatePHI(oldBufPtr->getType(), 2);
803    bufPtr->addIncoming(oldBufPtr, doubleEntry);
804    bufPtr->addIncoming(newBufPtr, allocateNew);
805    createBlockCopy(b, b->CreateGEP(bufPtr, currentWorkingBlocks), bufPtr, currentWorkingBlocks);
806    currentWorkingBlocks = b->CreateAdd(currentWorkingBlocks, currentWorkingBlocks);
807    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
808        b->CallPrintInt("currentWorkingBlocks: ", currentWorkingBlocks);
809    }
810    b->CreateStore(currentWorkingBlocks, workingBlocksField);
811}
812
813DynamicBuffer::DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t initialCapacity, size_t overflow, unsigned swizzle, unsigned addrSpace)
814: StreamSetBuffer(BufferKind::DynamicBuffer, type, resolveStreamSetType(b, type), initialCapacity, addrSpace)
815, mBufferStructType(StructType::get(resolveStreamSetType(b, type)->getPointerTo(addrSpace), resolveStreamSetType(b, type)->getPointerTo(addrSpace),
816                                    b->getSizeTy(), b->getSizeTy(), b->getSizeTy(), b->getSizeTy(), b->getSizeTy(), nullptr))
817, mSwizzleFactor(swizzle)
818, mOverflowBlocks(overflow)
819{
820    mUniqueID = "DB";
821    if (swizzle != 1) {
822        mUniqueID += "s" + std::to_string(swizzle);
823    }
824        if (overflow != 0) {
825        mUniqueID += "o" + std::to_string(overflow);
826    }
827    if (addrSpace != 0) {
828        mUniqueID += "@" + std::to_string(addrSpace);
829    }
830}
831
832
833inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned AddressSpace)
834: mBufferKind(k)
835, mType(resolvedType)
836, mBufferBlocks(BufferBlocks)
837, mAddressSpace(AddressSpace)
838, mStreamSetBufferPtr(nullptr)
839, mBaseType(baseType)
840, mProducer(nullptr) {
841
842}
843
844StreamSetBuffer::~StreamSetBuffer() { }
845
846// Helper routines
847ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
848    unsigned numElements = 1;
849    if (LLVM_LIKELY(type->isArrayTy())) {
850        numElements = type->getArrayNumElements();
851        type = type->getArrayElementType();
852    }
853    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
854        type = type->getVectorElementType();
855        if (LLVM_LIKELY(type->isIntegerTy())) {
856            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
857            type = b->getBitBlockType();
858            if (fieldWidth != 1) {
859                type = ArrayType::get(type, fieldWidth);
860            }
861            return ArrayType::get(type, numElements);
862        }
863    }
864    std::string tmp;
865    raw_string_ostream out(tmp);
866    type->print(out);
867    out << " is an unvalid stream set buffer type.";
868    report_fatal_error(out.str());
869}
870
871StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
872    if (LLVM_LIKELY(type->isArrayTy())) {
873        type = type->getArrayElementType();
874    }
875    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
876        type = type->getVectorElementType();
877        if (LLVM_LIKELY(type->isIntegerTy())) {
878            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
879            type = b->getBitBlockType();
880            if (fieldWidth != 1) {
881                type = ArrayType::get(type, fieldWidth);
882            }
883            return StructType::get(b->getSizeTy(), type->getPointerTo(), nullptr);
884        }
885    }
886    std::string tmp;
887    raw_string_ostream out(tmp);
888    type->print(out);
889    out << " is an unvalid stream set buffer type.";
890    report_fatal_error(out.str());
891}
Note: See TracBrowser for help on using the repository browser.