source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5620

Last change on this file since 5620 was 5620, checked in by nmedfort, 21 months ago

Bug fixes for multigrep mode. Optional PabloKernel? branch hit counter added. Minor optimizations.

File size: 47.7 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <llvm/IR/Module.h>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/kernel.h>
10#include <kernels/kernel_builder.h>
11#include <toolchain/toolchain.h>
12#include <llvm/Support/Debug.h>
13#include <llvm/Support/Format.h>
14
15namespace llvm { class Constant; }
16namespace llvm { class Function; }
17
18using namespace parabix;
19using namespace llvm;
20using namespace IDISA;
21
22
23Type * StreamSetBuffer::getStreamSetBlockType() const { return mType;}
24
25ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
26
27StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
28
29void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
30    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
31        Type * const ty = getType();
32        if (mAddressSpace == 0) {
33            Constant * size = ConstantExpr::getSizeOf(ty);
34            size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks));
35            mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
36        } else {
37            mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
38        }
39        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
40    } else {
41        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
42    }
43}
44
45void StreamSetBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const {
46    if (mAddressSpace == 0) {
47        iBuilder->CreateFree(mStreamSetBufferPtr);
48    }
49}
50
51Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
52    if (codegen::EnableAsserts) {
53        Value * const count = getStreamSetCount(iBuilder, self);
54        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
55        Value * const cond = iBuilder->CreateICmpULT(index, count);
56        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
57    }
58    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex});
59}
60
61Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
62    if (codegen::EnableAsserts) {
63        Value * const count = getStreamSetCount(iBuilder, self);
64        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
65        Value * const cond = iBuilder->CreateICmpULT(index, count);
66        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
67    }
68    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
69}
70
71void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, Value * /* addr */) const {
72    report_fatal_error("setBaseAddress is not supported by this buffer type");
73}
74
75Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
76    report_fatal_error("getBufferedSize is not supported by this buffer type");
77}
78
79void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* size */) const {
80    report_fatal_error("setBufferedSize is not supported by this buffer type");
81}
82
83Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
84    report_fatal_error("getCapacity is not supported by this buffer type");
85}
86
87void StreamSetBuffer::setCapacity(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* c */) const {
88    report_fatal_error("setCapacity is not supported by this buffer type");
89}
90
91inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
92    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
93        if (LLVM_LIKELY(cast<ConstantInt>(index)->getLimitedValue() < capacity)) {
94            return true;
95        }
96    }
97    return false;
98}
99
100Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value *) const {
101    size_t count = 1;
102    if (isa<ArrayType>(mBaseType)) {
103        count = mBaseType->getArrayNumElements();
104    }
105    return iBuilder->getSize(count);
106}
107
108inline Value * StreamSetBuffer::modByBufferBlocks(IDISA::IDISA_Builder * const iBuilder, Value * const offset) const {
109    assert (offset->getType()->isIntegerTy());
110    if (isCapacityGuaranteed(offset, mBufferBlocks)) {
111        return offset;
112    } else if (mBufferBlocks == 1) {
113        return ConstantInt::getNullValue(iBuilder->getSizeTy());
114    } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
115        return iBuilder->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
116    } else {
117        return iBuilder->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
118    }
119}
120
121/**
122 * @brief getRawItemPointer
123 *
124 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
125 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
126 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
127 */
128Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
129    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
130    Value * relativePosition = absolutePosition;
131    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
132    if (bw < 8) {
133        assert (bw  == 1 || bw == 2 || bw == 4);
134        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
135        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
136    } else {
137        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
138    }
139    return iBuilder->CreateGEP(ptr, relativePosition);
140}
141
142Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
143    if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
144        Constant * stride = iBuilder->getSize(iBuilder->getStride());
145        Value * strideRem = iBuilder->CreateURem(fromPosition, stride);
146        if (reverse) {
147            return iBuilder->CreateSelect(iBuilder->CreateICmpEQ(strideRem, iBuilder->getSize(0)), stride, strideRem);
148        }
149        else return iBuilder->CreateSub(stride, strideRem);
150    } else {
151        Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
152        Value * bufRem = iBuilder->CreateURem(fromPosition, bufSize);
153        if (reverse) {
154            return iBuilder->CreateSelect(iBuilder->CreateICmpEQ(bufRem, iBuilder->getSize(0)), bufSize, bufRem);
155        }
156        else return iBuilder->CreateSub(bufSize, bufRem, "linearItems");
157    }
158}
159
160Value * StreamSetBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
161    Constant * bufBlocks = iBuilder->getSize(mBufferBlocks);
162    Value * bufRem = iBuilder->CreateURem(fromBlock, bufBlocks);
163    if (reverse) {
164        return iBuilder->CreateSelect(iBuilder->CreateICmpEQ(bufRem, iBuilder->getSize(0)), bufBlocks, bufRem);
165    }
166    else return iBuilder->CreateSub(bufBlocks, bufRem, "linearBlocks");
167}
168
169Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
170    return getLinearlyAccessibleItems(iBuilder, self, fromPosition, reverse);
171}
172
173Value * StreamSetBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
174    return getLinearlyAccessibleBlocks(iBuilder, self, fromBlock, reverse);
175}
176
177Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
178    iBuilder->CreateAssert(self, "StreamSetBuffer base address cannot be 0");
179    return self;
180}
181
182void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
183    Type * i8ptr = iBuilder->getInt8PtrTy();
184    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
185    size_t numStreams = 1;
186    if (isa<ArrayType>(mBaseType)) {
187        numStreams = mBaseType->getArrayNumElements();
188    }
189    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
190    Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
191    iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
192}
193
194void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
195    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
196    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
197    Constant * const blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
198    size_t numStreams = 1;
199    if (isa<ArrayType>(mBaseType)) {
200        numStreams = mBaseType->getArrayNumElements();
201    }
202    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
203    if (numStreams == 1) {
204        Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth));
205        Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
206        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), copyBytes, alignment);
207    } else {
208        Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
209        Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
210        Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
211        Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
212        Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
213        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), blockCopyBytes, alignment);
214        Value * partialCopyBitsPerStream = iBuilder->CreateMul(partialItems, iBuilder->getSize(fieldWidth));
215        Value * partialCopyBytesPerStream = iBuilder->CreateLShr(iBuilder->CreateAdd(partialCopyBitsPerStream, iBuilder->getSize(7)), iBuilder->getSize(3));
216        for (unsigned strm = 0; strm < numStreams; strm++) {
217            Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
218            Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
219            strmTargetPtr = iBuilder->CreateBitCast(strmTargetPtr, int8PtrTy);
220            strmSourcePtr = iBuilder->CreateBitCast(strmSourcePtr, int8PtrTy);
221            iBuilder->CreateMemMove(strmTargetPtr, strmSourcePtr, partialCopyBytesPerStream, alignment);
222        }
223    }
224}
225
226// Source File Buffer
227
228Type * SourceBuffer::getStreamSetBlockType() const {
229    return cast<PointerType>(mType->getStructElementType(int(SourceBuffer::Field::BaseAddress)))->getElementType();
230}
231
232
233Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
234    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
235    return iBuilder->CreateLoad(ptr);
236}
237
238void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * size) const {
239    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
240    iBuilder->CreateStore(size, ptr);
241}
242
243Value * SourceBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
244    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
245    return iBuilder->CreateLoad(ptr);
246}
247
248void SourceBuffer::setCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * c) const {
249    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
250    iBuilder->CreateStore(c, ptr);
251}
252
253void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * addr) const {
254    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
255
256    iBuilder->CreateStore(iBuilder->CreatePointerCast(addr, ptr->getType()->getPointerElementType()), ptr);
257}
258
259Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * const self) const {
260    iBuilder->CreateAssert(self, "SourceBuffer: instance cannot be null");
261    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
262    Value * const addr = iBuilder->CreateLoad(ptr);
263    iBuilder->CreateAssert(addr, "SourceBuffer: base address cannot be 0");
264    return addr;
265}
266
267Value * SourceBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
268    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
269}
270
271Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
272    if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
273    return iBuilder->CreateSub(getCapacity(iBuilder, self), fromPosition);
274}
275
276Value * SourceBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
277    if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
278    return iBuilder->CreateSub(iBuilder->CreateUDiv(getCapacity(iBuilder, self), iBuilder->getSize(iBuilder->getBitBlockWidth())), fromBlock);
279}
280
281void SourceBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
282    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
283        Type * const ty = getType();
284        mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
285        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
286    } else {
287        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
288    }
289}
290
291void SourceBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const {
292
293}
294
295// External File Buffer
296void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
297    report_fatal_error("External buffers cannot be allocated.");
298}
299
300void ExternalBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> &) const {
301
302}
303
304Value * ExternalBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
305    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
306}
307
308Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, bool reverse) const {
309    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
310}
311
312// Circular Buffer
313Value * CircularBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * const self, Value * const blockIndex) const {
314    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
315}
316
317Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
318    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
319    Value * relativePosition = iBuilder->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * iBuilder->getBitBlockWidth()));
320    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
321    if (bw < 8) {
322        assert (bw  == 1 || bw == 2 || bw == 4);
323        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
324        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
325    } else {
326        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
327    }
328    return iBuilder->CreateGEP(ptr, relativePosition);
329}
330
331// CircularCopybackBuffer Buffer
332void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
333    Type * const ty = getType();
334    Constant * size = ConstantExpr::getSizeOf(ty);
335    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
336    mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
337}
338
339void CircularCopybackBuffer::createCopyBack(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * overFlowItems) const {
340    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
341    createBlockAlignedCopy(iBuilder, self, overFlowAreaPtr, overFlowItems);
342}
343
344Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
345    Value * accessibleItems = getLinearlyAccessibleItems(iBuilder, self, fromPosition, reverse);
346    if (reverse) return accessibleItems;
347    return iBuilder->CreateAdd(accessibleItems, iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
348}
349
350Value * CircularCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
351    Value * accessibleBlocks = getLinearlyAccessibleBlocks(iBuilder, self, fromBlock);
352    if (reverse) return accessibleBlocks;
353    return iBuilder->CreateAdd(accessibleBlocks, iBuilder->getSize(mOverflowBlocks));
354}
355
356// SwizzledCopybackBuffer Buffer
357
358void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
359    Type * const ty = getType();
360    Constant * size = ConstantExpr::getSizeOf(ty);
361    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
362    mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
363}
364
365void SwizzledCopybackBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
366    Type * int8PtrTy = iBuilder->getInt8PtrTy();
367    DataLayout DL(iBuilder->getModule());
368    IntegerType * const intAddrTy = iBuilder->getIntPtrTy(DL);
369
370    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
371    Function * f = iBuilder->GetInsertBlock()->getParent();
372    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
373    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
374    BasicBlock * copyDone = BasicBlock::Create(iBuilder->getContext(), "copyDone", f, 0);
375    const unsigned numStreams = getType()->getArrayNumElements();
376    const unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
377    const auto elemTy = getType()->getArrayElementType();
378    const unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
379    Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
380    Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
381    Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
382    Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
383    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(blocksToCopy, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
384
385    iBuilder->SetInsertPoint(wholeBlockCopy);
386    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
387    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, intAddrTy), iBuilder->CreatePtrToInt(targetBlockPtr, intAddrTy));
388    iBuilder->CreateMemMove(iBuilder->CreatePointerCast(targetBlockPtr, int8PtrTy), iBuilder->CreatePointerCast(sourceBlockPtr, int8PtrTy), copyLength, alignment);
389    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyDone);
390    iBuilder->SetInsertPoint(partialBlockCopy);
391    Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth * swizzleFactor));
392    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
393    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
394        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
395        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
396        iBuilder->CreateMemMove(iBuilder->CreatePointerCast(strmTargetPtr, int8PtrTy), iBuilder->CreatePointerCast(strmSourcePtr, int8PtrTy), copyBytes, alignment);
397    }
398    iBuilder->CreateBr(copyDone);
399
400    iBuilder->SetInsertPoint(copyDone);
401}
402
403void SwizzledCopybackBuffer::createCopyBack(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * overFlowItems) const {
404    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
405    createBlockAlignedCopy(iBuilder, self, overFlowAreaPtr, overFlowItems);
406}
407
408Value * SwizzledCopybackBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
409    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
410}
411
412Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
413    Value * accessibleItems = getLinearlyAccessibleItems(iBuilder, self, fromPosition, reverse);
414    if (reverse) return accessibleItems;
415    return iBuilder->CreateAdd(accessibleItems, iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
416}
417
418Value * SwizzledCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
419    Value * accessibleBlocks = getLinearlyAccessibleBlocks(iBuilder, self, fromBlock);
420    if (reverse) return accessibleBlocks;
421    return iBuilder->CreateAdd(accessibleBlocks, iBuilder->getSize(mOverflowBlocks));
422}
423
424// Expandable Buffer
425
426void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
427    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType());
428    Value * const capacityPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
429    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
430    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
431    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
432    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
433    const auto alignment = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
434    Value * const ptr = iBuilder->CreateAlignedMalloc(size, alignment);
435    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
436    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
437    iBuilder->CreateStore(iBuilder->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
438}
439
440std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
441
442    // ENTRY
443    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
444    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
445    Value * const streamSetPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
446    Value * const streamSet = iBuilder->CreateLoad(streamSetPtr);
447    blockIndex = modByBufferBlocks(iBuilder, blockIndex);
448
449    assert (streamIndex->getType() == capacity->getType());
450    Value * const cond = iBuilder->CreateICmpULT(streamIndex, capacity);
451
452    // Are we guaranteed that we can access this stream?
453    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
454        iBuilder->CreateAssert(cond, "ExpandableBuffer: out-of-bounds stream access");
455        Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, capacity), streamIndex);
456        return {streamSet, offset};
457    }
458
459    BasicBlock * const entry = iBuilder->GetInsertBlock();
460    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
461    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
462
463    iBuilder->CreateLikelyCondBr(cond, resume, expand);
464
465    // EXPAND
466    iBuilder->SetInsertPoint(expand);
467
468    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
469    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
470
471    Value * newCapacity = iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1));
472    newCapacity = iBuilder->CreateCeilLog2(newCapacity);
473    newCapacity = iBuilder->CreateShl(iBuilder->getSize(1), newCapacity, "newCapacity");
474
475    std::string tmp;
476    raw_string_ostream out(tmp);
477    out << "__expand";
478    elementType->print(out);
479    std::string name = out.str();
480
481    Module * const m = iBuilder->getModule();
482    Function * expandFunction = m->getFunction(name);
483
484    if (expandFunction == nullptr) {
485
486        const auto ip = iBuilder->saveIP();
487
488        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), iBuilder->getSizeTy(), iBuilder->getSizeTy()}, false);
489        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
490
491        auto args = expandFunction->arg_begin();
492        Value * streamSet = &*args++;
493        Value * capacity = &*args++;
494        Value * newCapacity = &*args;
495
496        BasicBlock * entry = BasicBlock::Create(iBuilder->getContext(), "entry", expandFunction);
497        iBuilder->SetInsertPoint(entry);
498
499        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
500        const auto memAlign = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
501
502        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
503        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
504
505        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
506        for (unsigned i = 0; i < mBufferBlocks; ++i) {
507            ConstantInt * const offset = iBuilder->getSize(i);
508            Value * srcOffset = iBuilder->CreateMul(capacity, offset);
509            Value * srcPtr = iBuilder->CreateGEP(streamSet, srcOffset);
510            Value * destOffset = iBuilder->CreateMul(newCapacity, offset);
511            Value * destPtr = iBuilder->CreateGEP(newStreamSet, destOffset);
512            iBuilder->CreateMemCpy(destPtr, srcPtr, iBuilder->CreateMul(capacity, vectorWidth), alignment);
513            Value * destZeroOffset = iBuilder->CreateAdd(destOffset, capacity);
514            Value * destZeroPtr = iBuilder->CreateGEP(newStreamSet, destZeroOffset);
515            iBuilder->CreateMemZero(destZeroPtr, diffCapacity, alignment);
516        }
517
518        iBuilder->CreateFree(streamSet);
519
520        iBuilder->CreateRet(newStreamSet);
521
522        iBuilder->restoreIP(ip);
523    }
524
525    Value * newStreamSet = iBuilder->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
526    iBuilder->CreateStore(newStreamSet, streamSetPtr);
527    iBuilder->CreateStore(newCapacity, capacityPtr);
528
529    iBuilder->CreateBr(resume);
530
531    // RESUME
532    iBuilder->SetInsertPoint(resume);
533
534    PHINode * phiStreamSet = iBuilder->CreatePHI(streamSet->getType(), 2);
535    phiStreamSet->addIncoming(streamSet, entry);
536    phiStreamSet->addIncoming(newStreamSet, expand);
537
538    PHINode * phiCapacity = iBuilder->CreatePHI(capacity->getType(), 2);
539    phiCapacity->addIncoming(capacity, entry);
540    phiCapacity->addIncoming(newCapacity, expand);
541
542    Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, phiCapacity), streamIndex);
543
544    return {phiStreamSet, offset};
545}
546
547Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
548    Value * ptr, * offset;
549    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
550    return iBuilder->CreateGEP(ptr, offset);
551}
552
553Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
554    Value * ptr, * offset;
555    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
556    return iBuilder->CreateGEP(ptr, {offset, packIndex});
557}
558
559Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
560    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
561}
562
563Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
564    iBuilder->CreateAssert(self, "ExpandableBuffer: instance cannot be null");
565    Value * const baseAddr = iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
566    iBuilder->CreateAssert(self, "ExpandableBuffer: base address cannot be 0");
567    return baseAddr;
568}
569
570void ExpandableBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
571    b->CreateFree(getBaseAddress(b.get(), mStreamSetBufferPtr));
572}
573
574Value * ExpandableBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value *, Value *) const {
575    report_fatal_error("Expandable buffers: getStreamSetBlockPtr is not supported.");
576}
577
578Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value *, bool reverse) const {
579    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
580}
581
582SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
583: StreamSetBuffer(BufferKind::SourceBuffer, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), b->getSizeTy(), nullptr), 0, StructAddressSpace) {
584    mUniqueID = "B";
585    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
586        mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
587    }
588}
589
590ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, llvm::Value * addr, unsigned AddressSpace)
591: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, AddressSpace) {
592    mUniqueID = "E";
593    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
594    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
595}
596
597CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
598: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
599    mUniqueID = "C" + std::to_string(bufferBlocks);
600    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
601}
602
603CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
604: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
605
606}
607
608CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
609: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, AddressSpace)
610, mOverflowBlocks(overflowBlocks) {
611    mUniqueID = "CC" + std::to_string(bufferBlocks);
612    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
613    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
614}
615
616ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
617: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
618, mInitialCapacity(type->getArrayNumElements()) {
619    mUniqueID = "XP" + std::to_string(bufferBlocks);
620    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
621}
622
623SwizzledCopybackBuffer::SwizzledCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
624: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
625    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
626    if (mOverflowBlocks != 1) {
627        mUniqueID += "_" + std::to_string(mOverflowBlocks);
628    }
629    if (AddressSpace > 0) {
630        mUniqueID += "@" + std::to_string(AddressSpace);
631    }
632}
633
634Value * DynamicBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
635    b->CreateAssert(handle, "DynamicBuffer: instance cannot be null");
636    Value * const p = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
637    Value * const addr = b->CreateLoad(p);
638    b->CreateAssert(addr, "DynamicBuffer: base address cannot be 0");
639    return addr;
640}
641
642Value * DynamicBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const b, Value * handle, Value * blockIndex) const {
643    Value * const wkgBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
644    return b->CreateGEP(getBaseAddress(b, handle), b->CreateURem(blockIndex, wkgBlocks));
645}
646
647Value * DynamicBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * handle, Value * streamIndex, Value * absolutePosition) const {
648    Value * absBlock = b->CreateUDiv(absolutePosition, b->getSize(b->getBitBlockWidth()));
649    Value * blockPos = b->CreateURem(absolutePosition, b->getSize(b->getBitBlockWidth()));
650    Value * blockPtr = b->CreateGEP(getStreamSetBlockPtr(b, handle, absBlock), {b->getInt32(0), streamIndex});
651    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
652    if (bw < 8) {
653        assert (bw  == 1 || bw == 2 || bw == 4);
654        blockPos = b->CreateUDiv(blockPos, ConstantInt::get(blockPos->getType(), 8 / bw));
655        blockPtr = b->CreatePointerCast(blockPtr, b->getInt8PtrTy());
656    } else {
657        blockPtr = b->CreatePointerCast(blockPtr, b->getIntNTy(bw)->getPointerTo());
658    }
659    return b->CreateGEP(blockPtr, blockPos);
660}
661
662
663Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * handle, Value * fromPosition, bool reverse) const {
664    Constant * blockSize = b->getSize(b->getBitBlockWidth());
665    if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
666        Value * blockRem = b->CreateURem(fromPosition, blockSize);
667        if (reverse) {
668            return b->CreateSelect(b->CreateICmpEQ(blockRem, b->getSize(0)), blockSize, blockRem);
669        }
670        else return b->CreateSub(blockSize, blockRem);
671    } else {
672        Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
673        Value * bufSize = b->CreateMul(bufBlocks, blockSize);
674        Value * bufRem = b->CreateURem(fromPosition, bufSize);
675        if (reverse) {
676            return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufSize, bufRem);
677        }
678        else return b->CreateSub(bufSize, bufRem, "linearItems");
679    }
680}
681
682Value * DynamicBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const b, Value * handle, Value * fromBlock, bool reverse) const {
683    Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
684    Value * bufRem = b->CreateURem(fromBlock, bufBlocks);
685    if (reverse) {
686        return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufBlocks, bufRem);
687    }
688    else return b->CreateSub(bufBlocks, bufRem, "linearBlocks");
689}
690
691Value * DynamicBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
692    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(Field::WorkingBlocks))});
693    return iBuilder->CreateMul(iBuilder->CreateLoad(ptr), iBuilder->getSize(iBuilder->getBitBlockWidth()));
694}
695
696
697
698void DynamicBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
699    Value * handle = b->CreateCacheAlignedAlloca(mBufferStructType);
700    size_t numStreams = 1;
701    if (isa<ArrayType>(mBaseType)) {
702        numStreams = mBaseType->getArrayNumElements();
703    }
704    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
705    Value * bufSize = b->getSize((mBufferBlocks + mOverflowBlocks) * b->getBitBlockWidth() * numStreams * fieldWidth/8);
706    bufSize = b->CreateRoundUp(bufSize, b->getSize(b->getCacheAlignment()));
707    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::BaseAddress))});
708    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
709    Value * bufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufSize), bufPtrType);
710    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
711        b->CallPrintInt("allocated: ", bufPtr);
712        b->CallPrintInt("allocated capacity: ", bufSize);
713    }
714    b->CreateStore(bufPtr, bufBasePtrField);
715    b->CreateStore(ConstantPointerNull::getNullValue(bufPtrType), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))}));
716    b->CreateStore(bufSize, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::AllocatedCapacity))}));
717    b->CreateStore(b->getSize(mBufferBlocks), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
718    b->CreateStore(b->getSize(-1), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::Length))}));
719    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ProducedPosition))}));
720    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ConsumedPosition))}));
721    mStreamSetBufferPtr = handle;
722}
723
724void DynamicBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
725    Value * handle = mStreamSetBufferPtr;
726    /* Free the dynamically allocated buffer, but not the stack-allocated buffer struct. */
727    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
728    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
729    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
730    BasicBlock * freePrior = b->CreateBasicBlock("freePrior");
731    BasicBlock * freeCurrent = b->CreateBasicBlock("freeCurrent");
732    Value * priorBuf = b->CreateLoad(priorBasePtrField);
733    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
734    b->CreateCondBr(priorBufIsNonNull, freePrior, freeCurrent);
735    b->SetInsertPoint(freePrior);
736    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
737        b->CallPrintInt("releasing: ", priorBuf);
738    }
739    b->CreateFree(priorBuf);
740    b->CreateBr(freeCurrent);
741    b->SetInsertPoint(freeCurrent);
742    b->CreateFree(b->CreateLoad(bufBasePtrField));
743}
744
745//
746//  Simple capacity doubling.  Use the circular buffer property: duplicating buffer data
747//  ensures that we have correct data.   TODO: consider optimizing based on actual
748//  consumer and producer positions.
749//
750void DynamicBuffer::doubleCapacity(IDISA::IDISA_Builder * const b, Value * handle) {
751    size_t numStreams = 1;
752    if (isa<ArrayType>(mBaseType)) {
753        numStreams = mBaseType->getArrayNumElements();
754    }
755    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
756    Constant * blockBytes = b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8);
757    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
758    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
759    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
760    Value * workingBlocksField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))});
761    Value * capacityField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::AllocatedCapacity))});
762   
763    Value * oldBufPtr = b->CreateLoad(bufBasePtrField);
764    Value * currentWorkingBlocks = b->CreateLoad(workingBlocksField);
765    Value * workingBytes = b->CreateMul(currentWorkingBlocks, blockBytes);
766    Value * const curAllocated = b->CreateLoad(capacityField);
767    Value * neededCapacity = b->CreateAdd(workingBytes, workingBytes);
768    if (mOverflowBlocks > 0) {
769        Constant * overflowBytes = b->getSize(mOverflowBlocks * b->getBitBlockWidth() * numStreams * fieldWidth/8);
770        neededCapacity = b->CreateAdd(neededCapacity, overflowBytes);
771    }
772    neededCapacity = b->CreateRoundUp(neededCapacity, b->getSize(b->getCacheAlignment()));
773    BasicBlock * doubleEntry = b->GetInsertBlock();
774    BasicBlock * doRealloc = b->CreateBasicBlock("doRealloc");
775    BasicBlock * doCopy2 = b->CreateBasicBlock("doCopy2");
776    b->CreateCondBr(b->CreateICmpULT(curAllocated, neededCapacity), doRealloc, doCopy2);
777    b->SetInsertPoint(doRealloc);
778    // If there is a non-null priorBasePtr, free it.
779    Value * priorBuf = b->CreateLoad(priorBasePtrField);
780    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
781    BasicBlock * deallocatePrior = b->CreateBasicBlock("deallocatePrior");
782    BasicBlock * allocateNew = b->CreateBasicBlock("allocateNew");
783    b->CreateCondBr(priorBufIsNonNull, deallocatePrior, allocateNew);
784    b->SetInsertPoint(deallocatePrior);
785    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
786        b->CallPrintInt("deallocating: ", priorBuf);
787    }
788    b->CreateFree(priorBuf);
789    b->CreateBr(allocateNew);
790    b->SetInsertPoint(allocateNew);
791    b->CreateStore(oldBufPtr, priorBasePtrField);
792    Value * newBufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(neededCapacity), bufPtrType);
793    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
794        b->CallPrintInt("re-allocated: ", newBufPtr);
795        b->CallPrintInt("allocated capacity: ", neededCapacity);
796    }
797    b->CreateStore(newBufPtr, bufBasePtrField);
798    createBlockCopy(b, newBufPtr, oldBufPtr, currentWorkingBlocks);
799    b->CreateStore(neededCapacity, capacityField);
800    b->CreateBr(doCopy2);
801    b->SetInsertPoint(doCopy2);
802    PHINode * bufPtr = b->CreatePHI(oldBufPtr->getType(), 2);
803    bufPtr->addIncoming(oldBufPtr, doubleEntry);
804    bufPtr->addIncoming(newBufPtr, allocateNew);
805    createBlockCopy(b, b->CreateGEP(bufPtr, currentWorkingBlocks), bufPtr, currentWorkingBlocks);
806    currentWorkingBlocks = b->CreateAdd(currentWorkingBlocks, currentWorkingBlocks);
807    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
808        b->CallPrintInt("currentWorkingBlocks: ", currentWorkingBlocks);
809    }
810    b->CreateStore(currentWorkingBlocks, workingBlocksField);
811}
812
813inline StructType * getDynamicBufferStructType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * baseType, const unsigned addrSpace) {
814    IntegerType * sizeTy = b->getSizeTy();
815    PointerType * typePtr = baseType->getPointerTo(addrSpace);
816    return StructType::get(typePtr, typePtr, sizeTy, sizeTy, sizeTy, sizeTy, sizeTy, nullptr);
817}
818
819DynamicBuffer::DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t initialCapacity, size_t overflow, unsigned swizzle, unsigned addrSpace)
820: StreamSetBuffer(BufferKind::DynamicBuffer, type, resolveStreamSetType(b, type), initialCapacity, addrSpace)
821, mBufferStructType(getDynamicBufferStructType(b, mType, addrSpace))
822, mSwizzleFactor(swizzle)
823, mOverflowBlocks(overflow)
824{
825    mUniqueID = "DB";
826    if (swizzle != 1) {
827        mUniqueID += "s" + std::to_string(swizzle);
828    }
829        if (overflow != 0) {
830        mUniqueID += "o" + std::to_string(overflow);
831    }
832    if (addrSpace != 0) {
833        mUniqueID += "@" + std::to_string(addrSpace);
834    }
835}
836
837
838inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned AddressSpace)
839: mBufferKind(k)
840, mType(resolvedType)
841, mBufferBlocks(BufferBlocks)
842, mAddressSpace(AddressSpace)
843, mStreamSetBufferPtr(nullptr)
844, mBaseType(baseType)
845, mProducer(nullptr) {
846
847}
848
849StreamSetBuffer::~StreamSetBuffer() { }
850
851// Helper routines
852ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
853    unsigned numElements = 1;
854    if (LLVM_LIKELY(type->isArrayTy())) {
855        numElements = type->getArrayNumElements();
856        type = type->getArrayElementType();
857    }
858    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
859        type = type->getVectorElementType();
860        if (LLVM_LIKELY(type->isIntegerTy())) {
861            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
862            type = b->getBitBlockType();
863            if (fieldWidth != 1) {
864                type = ArrayType::get(type, fieldWidth);
865            }
866            return ArrayType::get(type, numElements);
867        }
868    }
869    std::string tmp;
870    raw_string_ostream out(tmp);
871    type->print(out);
872    out << " is an unvalid stream set buffer type.";
873    report_fatal_error(out.str());
874}
875
876StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
877    if (LLVM_LIKELY(type->isArrayTy())) {
878        type = type->getArrayElementType();
879    }
880    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
881        type = type->getVectorElementType();
882        if (LLVM_LIKELY(type->isIntegerTy())) {
883            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
884            type = b->getBitBlockType();
885            if (fieldWidth != 1) {
886                type = ArrayType::get(type, fieldWidth);
887            }
888            return StructType::get(b->getSizeTy(), type->getPointerTo(), nullptr);
889        }
890    }
891    std::string tmp;
892    raw_string_ostream out(tmp);
893    type->print(out);
894    out << " is an unvalid stream set buffer type.";
895    report_fatal_error(out.str());
896}
Note: See TracBrowser for help on using the repository browser.