source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5647

Last change on this file since 5647 was 5647, checked in by nmedfort, 22 months ago

Minor bug fixes and removal of inadvertent check in for StreamSet?.cpp/h

File size: 50.1 KB
RevLine 
[5044]1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
[5260]6#include "streamset.h"
[5436]7#include <llvm/IR/Module.h>
[5307]8#include <llvm/Support/raw_ostream.h>
[5408]9#include <kernels/kernel.h>
[5436]10#include <kernels/kernel_builder.h>
[5425]11#include <toolchain/toolchain.h>
[5542]12#include <llvm/Support/Debug.h>
13#include <llvm/Support/Format.h>
[5307]14
[5260]15namespace llvm { class Constant; }
16namespace llvm { class Function; }
[5191]17
[5100]18using namespace parabix;
[5260]19using namespace llvm;
20using namespace IDISA;
[5044]21
[5506]22
23Type * StreamSetBuffer::getStreamSetBlockType() const { return mType;}
24
[5436]25ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
[5320]26
[5436]27StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
[5320]28
[5436]29void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[5429]30    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
31        Type * const ty = getType();
[5597]32        if (mAddressSpace == 0) {
33            Constant * size = ConstantExpr::getSizeOf(ty);
34            size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks));
35            mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
36        } else {
37            mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
38        }
[5429]39        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
40    } else {
41        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
42    }
[5044]43}
44
[5597]45void StreamSetBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const {
46    if (mAddressSpace == 0) {
47        iBuilder->CreateFree(mStreamSetBufferPtr);
48    }
49}
50
[5435]51Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
[5486]52    if (codegen::EnableAsserts) {
53        Value * const count = getStreamSetCount(iBuilder, self);
54        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
55        Value * const cond = iBuilder->CreateICmpULT(index, count);
56        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
57    }
[5431]58    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex});
[5260]59}
60
[5435]61Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
[5486]62    if (codegen::EnableAsserts) {
63        Value * const count = getStreamSetCount(iBuilder, self);
64        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
65        Value * const cond = iBuilder->CreateICmpULT(index, count);
66        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
67    }
[5431]68    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
[5260]69}
70
[5435]71void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, Value * /* addr */) const {
[5398]72    report_fatal_error("setBaseAddress is not supported by this buffer type");
73}
74
[5435]75Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
[5398]76    report_fatal_error("getBufferedSize is not supported by this buffer type");
77}
78
[5435]79void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* size */) const {
[5398]80    report_fatal_error("setBufferedSize is not supported by this buffer type");
81}
82
[5501]83Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
84    report_fatal_error("getCapacity is not supported by this buffer type");
85}
86
87void StreamSetBuffer::setCapacity(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* c */) const {
88    report_fatal_error("setCapacity is not supported by this buffer type");
89}
90
[5377]91inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
[5353]92    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
93        if (LLVM_LIKELY(cast<ConstantInt>(index)->getLimitedValue() < capacity)) {
94            return true;
95        }
96    }
97    return false;
98}
99
[5435]100Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value *) const {
[5498]101    size_t count = 1;
[5329]102    if (isa<ArrayType>(mBaseType)) {
103        count = mBaseType->getArrayNumElements();
104    }
[5337]105    return iBuilder->getSize(count);
[5329]106}
107
[5435]108inline Value * StreamSetBuffer::modByBufferBlocks(IDISA::IDISA_Builder * const iBuilder, Value * const offset) const {
[5353]109    assert (offset->getType()->isIntegerTy());
110    if (isCapacityGuaranteed(offset, mBufferBlocks)) {
111        return offset;
112    } else if (mBufferBlocks == 1) {
113        return ConstantInt::getNullValue(iBuilder->getSizeTy());
114    } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
115        return iBuilder->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
116    } else {
117        return iBuilder->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
118    }
119}
120
[5307]121/**
122 * @brief getRawItemPointer
123 *
124 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
125 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
126 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
127 */
[5435]128Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
[5446]129    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
130    Value * relativePosition = absolutePosition;
[5445]131    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
132    if (bw < 8) {
[5446]133        assert (bw  == 1 || bw == 2 || bw == 4);
[5445]134        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
135        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
[5446]136    } else {
[5445]137        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
138    }
[5446]139    return iBuilder->CreateGEP(ptr, relativePosition);
[5260]140}
141
[5647]142Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
[5639]143    Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
144    Value * bufRem = iBuilder->CreateURem(fromPosition, bufSize);
145    if (reverse) {
146        return iBuilder->CreateSelect(iBuilder->CreateICmpEQ(bufRem, iBuilder->getSize(0)), bufSize, bufRem);
[5301]147    }
[5639]148    else return iBuilder->CreateSub(bufSize, bufRem, "linearItems");
[5301]149}
150
[5647]151Value * StreamSetBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
[5355]152    Constant * bufBlocks = iBuilder->getSize(mBufferBlocks);
[5618]153    Value * bufRem = iBuilder->CreateURem(fromBlock, bufBlocks);
154    if (reverse) {
155        return iBuilder->CreateSelect(iBuilder->CreateICmpEQ(bufRem, iBuilder->getSize(0)), bufBlocks, bufRem);
156    }
157    else return iBuilder->CreateSub(bufBlocks, bufRem, "linearBlocks");
[5355]158}
[5301]159
[5618]160Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
161    return getLinearlyAccessibleItems(iBuilder, self, fromPosition, reverse);
[5434]162}
163
[5618]164Value * StreamSetBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
165    return getLinearlyAccessibleBlocks(iBuilder, self, fromBlock, reverse);
[5434]166}
167
[5446]168Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
169    iBuilder->CreateAssert(self, "StreamSetBuffer base address cannot be 0");
[5377]170    return self;
171}
172
[5479]173void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
174    Type * i8ptr = iBuilder->getInt8PtrTy();
175    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
[5498]176    size_t numStreams = 1;
[5493]177    if (isa<ArrayType>(mBaseType)) {
178        numStreams = mBaseType->getArrayNumElements();
179    }
180    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
[5479]181    Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
182    iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
183}
184
[5435]185void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
[5486]186    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
187    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
188    Constant * const blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
[5498]189    size_t numStreams = 1;
[5493]190    if (isa<ArrayType>(mBaseType)) {
191        numStreams = mBaseType->getArrayNumElements();
192    }
193    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
[5448]194    if (numStreams == 1) {
195        Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth));
196        Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
[5486]197        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), copyBytes, alignment);
198    } else {
199        Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
200        Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
201        Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
202        Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
203        Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
204        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), blockCopyBytes, alignment);
205        Value * partialCopyBitsPerStream = iBuilder->CreateMul(partialItems, iBuilder->getSize(fieldWidth));
206        Value * partialCopyBytesPerStream = iBuilder->CreateLShr(iBuilder->CreateAdd(partialCopyBitsPerStream, iBuilder->getSize(7)), iBuilder->getSize(3));
207        for (unsigned strm = 0; strm < numStreams; strm++) {
208            Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
209            Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
210            strmTargetPtr = iBuilder->CreateBitCast(strmTargetPtr, int8PtrTy);
211            strmSourcePtr = iBuilder->CreateBitCast(strmSourcePtr, int8PtrTy);
212            iBuilder->CreateMemMove(strmTargetPtr, strmSourcePtr, partialCopyBytesPerStream, alignment);
213        }
[5448]214    }
[5432]215}
216
[5641]217void StreamSetBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProduced, Value * newProduced, const std::string Name) {
218    report_fatal_error("Copy back not supported for this buffer type:" + Name);
219}
220
[5398]221// Source File Buffer
[5506]222
223Type * SourceBuffer::getStreamSetBlockType() const {
224    return cast<PointerType>(mType->getStructElementType(int(SourceBuffer::Field::BaseAddress)))->getElementType();
225}
226
227
[5435]228Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
[5501]229    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
[5398]230    return iBuilder->CreateLoad(ptr);
231}
232
[5435]233void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * size) const {
[5501]234    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
[5398]235    iBuilder->CreateStore(size, ptr);
236}
237
[5501]238Value * SourceBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
239    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
240    return iBuilder->CreateLoad(ptr);
241}
242
243void SourceBuffer::setCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * c) const {
244    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
245    iBuilder->CreateStore(c, ptr);
246}
247
[5435]248void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * addr) const {
[5501]249    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
[5503]250
[5418]251    iBuilder->CreateStore(iBuilder->CreatePointerCast(addr, ptr->getType()->getPointerElementType()), ptr);
[5398]252}
253
[5435]254Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * const self) const {
[5446]255    iBuilder->CreateAssert(self, "SourceBuffer: instance cannot be null");
[5501]256    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
[5429]257    Value * const addr = iBuilder->CreateLoad(ptr);
[5446]258    iBuilder->CreateAssert(addr, "SourceBuffer: base address cannot be 0");
[5429]259    return addr;
[5398]260}
261
[5435]262Value * SourceBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
[5431]263    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
[5398]264}
265
[5647]266Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
[5618]267    if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
[5501]268    return iBuilder->CreateSub(getCapacity(iBuilder, self), fromPosition);
[5398]269}
270
[5647]271Value * SourceBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
[5618]272    if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
[5503]273    return iBuilder->CreateSub(iBuilder->CreateUDiv(getCapacity(iBuilder, self), iBuilder->getSize(iBuilder->getBitBlockWidth())), fromBlock);
274}
[5501]275
[5597]276void SourceBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
277    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
278        Type * const ty = getType();
279        mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
280        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
281    } else {
282        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
283    }
284}
[5503]285
[5597]286void SourceBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const {
287
288}
289
[5429]290// External File Buffer
[5436]291void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
[5429]292    report_fatal_error("External buffers cannot be allocated.");
[5377]293}
294
[5597]295void ExternalBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> &) const {
296
297}
298
[5435]299Value * ExternalBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
[5431]300    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
[5411]301}
302
[5647]303Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, bool) const {
[5429]304    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
[5377]305}
306
[5260]307// Circular Buffer
[5435]308Value * CircularBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * const self, Value * const blockIndex) const {
[5431]309    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
[5100]310}
311
[5446]312Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
313    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
314    Value * relativePosition = iBuilder->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * iBuilder->getBitBlockWidth()));
315    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
316    if (bw < 8) {
317        assert (bw  == 1 || bw == 2 || bw == 4);
318        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
319        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
320    } else {
321        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
322    }
323    return iBuilder->CreateGEP(ptr, relativePosition);
324}
325
[5301]326// CircularCopybackBuffer Buffer
[5436]327void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[5597]328    Type * const ty = getType();
329    Constant * size = ConstantExpr::getSizeOf(ty);
330    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
331    mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
[5301]332}
333
[5618]334Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
335    Value * accessibleItems = getLinearlyAccessibleItems(iBuilder, self, fromPosition, reverse);
336    if (reverse) return accessibleItems;
337    return iBuilder->CreateAdd(accessibleItems, iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
[5434]338}
339
[5618]340Value * CircularCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
341    Value * accessibleBlocks = getLinearlyAccessibleBlocks(iBuilder, self, fromBlock);
342    if (reverse) return accessibleBlocks;
343    return iBuilder->CreateAdd(accessibleBlocks, iBuilder->getSize(mOverflowBlocks));
[5434]344}
345
[5641]346void CircularCopybackBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProduced, Value * newProduced, const std::string Name) {
347    Constant * bufSize = b->getSize(mBufferBlocks * b->getBitBlockWidth());
348    Value * priorBufPos = b->CreateURem(priorProduced, bufSize);
349    Value * newBufPos = b->CreateURem(newProduced, bufSize);
350    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_copyBack");
351    BasicBlock * done = b->CreateBasicBlock(Name + "_copyBackDone");
352    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
353    b->CreateCondBr(wraparound, copyBack, done);
354    b->SetInsertPoint(copyBack);
355    Value * overFlowAreaPtr = b->CreateGEP(handle, b->getSize(mBufferBlocks));
356    createBlockAlignedCopy(b, handle, overFlowAreaPtr, newBufPos);
357    b->CreateBr(done);
358    b->SetInsertPoint(done);
359}
360
361
[5355]362// SwizzledCopybackBuffer Buffer
363
[5436]364void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[5597]365    Type * const ty = getType();
366    Constant * size = ConstantExpr::getSizeOf(ty);
367    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
368    mStreamSetBufferPtr = iBuilder->CreatePointerCast(iBuilder->CreateCacheAlignedMalloc(size), ty->getPointerTo());
[5355]369}
370
[5435]371void SwizzledCopybackBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
[5454]372    Type * int8PtrTy = iBuilder->getInt8PtrTy();
373    DataLayout DL(iBuilder->getModule());
374    IntegerType * const intAddrTy = iBuilder->getIntPtrTy(DL);
375
[5355]376    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
377    Function * f = iBuilder->GetInsertBlock()->getParent();
378    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
379    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
[5432]380    BasicBlock * copyDone = BasicBlock::Create(iBuilder->getContext(), "copyDone", f, 0);
[5454]381    const unsigned numStreams = getType()->getArrayNumElements();
382    const unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
383    const auto elemTy = getType()->getArrayElementType();
384    const unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
[5432]385    Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
386    Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
387    Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
388    Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
389    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(blocksToCopy, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
[5454]390
[5355]391    iBuilder->SetInsertPoint(wholeBlockCopy);
[5454]392    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
393    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, intAddrTy), iBuilder->CreatePtrToInt(targetBlockPtr, intAddrTy));
394    iBuilder->CreateMemMove(iBuilder->CreatePointerCast(targetBlockPtr, int8PtrTy), iBuilder->CreatePointerCast(sourceBlockPtr, int8PtrTy), copyLength, alignment);
[5432]395    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyDone);
[5355]396    iBuilder->SetInsertPoint(partialBlockCopy);
[5432]397    Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth * swizzleFactor));
[5355]398    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
399    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
400        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
401        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
[5454]402        iBuilder->CreateMemMove(iBuilder->CreatePointerCast(strmTargetPtr, int8PtrTy), iBuilder->CreatePointerCast(strmSourcePtr, int8PtrTy), copyBytes, alignment);
[5355]403    }
[5432]404    iBuilder->CreateBr(copyDone);
[5454]405
[5432]406    iBuilder->SetInsertPoint(copyDone);
[5355]407}
408
[5435]409Value * SwizzledCopybackBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
[5431]410    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
[5355]411}
412
[5618]413Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
414    Value * accessibleItems = getLinearlyAccessibleItems(iBuilder, self, fromPosition, reverse);
415    if (reverse) return accessibleItems;
416    return iBuilder->CreateAdd(accessibleItems, iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
[5434]417}
418
[5618]419Value * SwizzledCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock, bool reverse) const {
420    Value * accessibleBlocks = getLinearlyAccessibleBlocks(iBuilder, self, fromBlock);
421    if (reverse) return accessibleBlocks;
422    return iBuilder->CreateAdd(accessibleBlocks, iBuilder->getSize(mOverflowBlocks));
[5434]423}
[5641]424void SwizzledCopybackBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProduced, Value * newProduced, const std::string Name) {
425    Constant * bufSize = b->getSize(mBufferBlocks * b->getBitBlockWidth());
426    Value * priorBufPos = b->CreateURem(priorProduced, bufSize);
427    Value * newBufPos = b->CreateURem(newProduced, bufSize);
428    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_copyBack");
429    BasicBlock * done = b->CreateBasicBlock(Name + "_copyBackDone");
430    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
431    b->CreateCondBr(wraparound, copyBack, done);
432    b->SetInsertPoint(copyBack);
433    Value * overFlowAreaPtr = b->CreateGEP(handle, b->getSize(mBufferBlocks));
434    createBlockAlignedCopy(b, handle, overFlowAreaPtr, newBufPos);
435    b->CreateBr(done);
436    b->SetInsertPoint(done);
437}
[5434]438
[5260]439// Expandable Buffer
440
[5436]441void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[5320]442    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType());
443    Value * const capacityPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
444    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
445    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
[5361]446    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
447    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
[5464]448    const auto alignment = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
449    Value * const ptr = iBuilder->CreateAlignedMalloc(size, alignment);
[5361]450    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
[5320]451    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
[5361]452    iBuilder->CreateStore(iBuilder->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
[5260]453}
454
[5435]455std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
[5340]456
[5320]457    // ENTRY
458    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
459    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
460    Value * const streamSetPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
461    Value * const streamSet = iBuilder->CreateLoad(streamSetPtr);
[5431]462    blockIndex = modByBufferBlocks(iBuilder, blockIndex);
[5311]463
[5353]464    assert (streamIndex->getType() == capacity->getType());
465    Value * const cond = iBuilder->CreateICmpULT(streamIndex, capacity);
466
[5320]467    // Are we guaranteed that we can access this stream?
[5353]468    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
469        iBuilder->CreateAssert(cond, "ExpandableBuffer: out-of-bounds stream access");
470        Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, capacity), streamIndex);
471        return {streamSet, offset};
[5320]472    }
[5260]473
[5320]474    BasicBlock * const entry = iBuilder->GetInsertBlock();
475    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
476    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
[5311]477
[5353]478    iBuilder->CreateLikelyCondBr(cond, resume, expand);
479
[5320]480    // EXPAND
481    iBuilder->SetInsertPoint(expand);
[5353]482
483    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
484    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
485
[5361]486    Value * newCapacity = iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1));
487    newCapacity = iBuilder->CreateCeilLog2(newCapacity);
488    newCapacity = iBuilder->CreateShl(iBuilder->getSize(1), newCapacity, "newCapacity");
489
[5353]490    std::string tmp;
491    raw_string_ostream out(tmp);
492    out << "__expand";
493    elementType->print(out);
494    std::string name = out.str();
495
496    Module * const m = iBuilder->getModule();
497    Function * expandFunction = m->getFunction(name);
498
499    if (expandFunction == nullptr) {
500
501        const auto ip = iBuilder->saveIP();
502
503        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), iBuilder->getSizeTy(), iBuilder->getSizeTy()}, false);
504        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
505
506        auto args = expandFunction->arg_begin();
507        Value * streamSet = &*args++;
508        Value * capacity = &*args++;
509        Value * newCapacity = &*args;
510
511        BasicBlock * entry = BasicBlock::Create(iBuilder->getContext(), "entry", expandFunction);
512        iBuilder->SetInsertPoint(entry);
513
514        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
[5464]515        const auto memAlign = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
[5486]516
[5464]517        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
[5353]518        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
519
520        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
521        for (unsigned i = 0; i < mBufferBlocks; ++i) {
522            ConstantInt * const offset = iBuilder->getSize(i);
523            Value * srcOffset = iBuilder->CreateMul(capacity, offset);
524            Value * srcPtr = iBuilder->CreateGEP(streamSet, srcOffset);
525            Value * destOffset = iBuilder->CreateMul(newCapacity, offset);
526            Value * destPtr = iBuilder->CreateGEP(newStreamSet, destOffset);
527            iBuilder->CreateMemCpy(destPtr, srcPtr, iBuilder->CreateMul(capacity, vectorWidth), alignment);
528            Value * destZeroOffset = iBuilder->CreateAdd(destOffset, capacity);
529            Value * destZeroPtr = iBuilder->CreateGEP(newStreamSet, destZeroOffset);
530            iBuilder->CreateMemZero(destZeroPtr, diffCapacity, alignment);
531        }
532
[5464]533        iBuilder->CreateFree(streamSet);
[5353]534
535        iBuilder->CreateRet(newStreamSet);
536
537        iBuilder->restoreIP(ip);
[5320]538    }
[5311]539
[5353]540    Value * newStreamSet = iBuilder->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
541    iBuilder->CreateStore(newStreamSet, streamSetPtr);
542    iBuilder->CreateStore(newCapacity, capacityPtr);
543
[5320]544    iBuilder->CreateBr(resume);
[5353]545
[5320]546    // RESUME
547    iBuilder->SetInsertPoint(resume);
548
549    PHINode * phiStreamSet = iBuilder->CreatePHI(streamSet->getType(), 2);
550    phiStreamSet->addIncoming(streamSet, entry);
551    phiStreamSet->addIncoming(newStreamSet, expand);
552
553    PHINode * phiCapacity = iBuilder->CreatePHI(capacity->getType(), 2);
554    phiCapacity->addIncoming(capacity, entry);
555    phiCapacity->addIncoming(newCapacity, expand);
556
557    Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, phiCapacity), streamIndex);
558
559    return {phiStreamSet, offset};
[5260]560}
561
[5435]562Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
[5320]563    Value * ptr, * offset;
[5431]564    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
[5320]565    return iBuilder->CreateGEP(ptr, offset);
566}
567
[5435]568Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
[5320]569    Value * ptr, * offset;
[5431]570    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
[5320]571    return iBuilder->CreateGEP(ptr, {offset, packIndex});
572}
573
[5435]574Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
[5329]575    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
576}
577
[5435]578Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
[5446]579    iBuilder->CreateAssert(self, "ExpandableBuffer: instance cannot be null");
580    Value * const baseAddr = iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
581    iBuilder->CreateAssert(self, "ExpandableBuffer: base address cannot be 0");
582    return baseAddr;
[5377]583}
584
[5544]585void ExpandableBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
586    b->CreateFree(getBaseAddress(b.get(), mStreamSetBufferPtr));
[5386]587}
588
[5435]589Value * ExpandableBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value *, Value *) const {
[5329]590    report_fatal_error("Expandable buffers: getStreamSetBlockPtr is not supported.");
[5316]591}
592
[5647]593Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, bool) const {
[5320]594    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
[5311]595}
596
[5440]597SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
[5501]598: StreamSetBuffer(BufferKind::SourceBuffer, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), b->getSizeTy(), nullptr), 0, StructAddressSpace) {
[5440]599    mUniqueID = "B";
600    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
601        mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
602    }
[5260]603}
604
[5436]605ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, llvm::Value * addr, unsigned AddressSpace)
[5454]606: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, AddressSpace) {
[5429]607    mUniqueID = "E";
[5395]608    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
[5429]609    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
[5377]610}
611
[5436]612CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
[5431]613: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
[5395]614    mUniqueID = "C" + std::to_string(bufferBlocks);
615    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
[5260]616}
617
[5446]618CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
619: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
620
621}
622
[5436]623CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
[5446]624: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, AddressSpace)
625, mOverflowBlocks(overflowBlocks) {
[5641]626    if (bufferBlocks < 2 * overflowBlocks) {
627        report_fatal_error("CircularCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
628    }
[5395]629    mUniqueID = "CC" + std::to_string(bufferBlocks);
630    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
631    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
[5260]632}
633
[5436]634ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
[5431]635: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
[5320]636, mInitialCapacity(type->getArrayNumElements()) {
[5395]637    mUniqueID = "XP" + std::to_string(bufferBlocks);
638    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
[5260]639}
640
[5436]641SwizzledCopybackBuffer::SwizzledCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
[5431]642: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
643    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
[5641]644    if (bufferBlocks < 2 * overflowBlocks) {
645        report_fatal_error("SwizzledCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
646    }
[5431]647    if (mOverflowBlocks != 1) {
648        mUniqueID += "_" + std::to_string(mOverflowBlocks);
649    }
650    if (AddressSpace > 0) {
651        mUniqueID += "@" + std::to_string(AddressSpace);
652    }
653}
654
[5541]655Value * DynamicBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
656    b->CreateAssert(handle, "DynamicBuffer: instance cannot be null");
657    Value * const p = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
658    Value * const addr = b->CreateLoad(p);
659    b->CreateAssert(addr, "DynamicBuffer: base address cannot be 0");
660    return addr;
661}
662
663Value * DynamicBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const b, Value * handle, Value * blockIndex) const {
664    Value * const wkgBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
665    return b->CreateGEP(getBaseAddress(b, handle), b->CreateURem(blockIndex, wkgBlocks));
666}
667
668Value * DynamicBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * handle, Value * streamIndex, Value * absolutePosition) const {
669    Value * absBlock = b->CreateUDiv(absolutePosition, b->getSize(b->getBitBlockWidth()));
670    Value * blockPos = b->CreateURem(absolutePosition, b->getSize(b->getBitBlockWidth()));
671    Value * blockPtr = b->CreateGEP(getStreamSetBlockPtr(b, handle, absBlock), {b->getInt32(0), streamIndex});
672    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
673    if (bw < 8) {
674        assert (bw  == 1 || bw == 2 || bw == 4);
675        blockPos = b->CreateUDiv(blockPos, ConstantInt::get(blockPos->getType(), 8 / bw));
676        blockPtr = b->CreatePointerCast(blockPtr, b->getInt8PtrTy());
677    } else {
678        blockPtr = b->CreatePointerCast(blockPtr, b->getIntNTy(bw)->getPointerTo());
679    }
680    return b->CreateGEP(blockPtr, blockPos);
681}
682
[5597]683
[5647]684Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * handle, Value * fromPosition, bool reverse) const {
[5541]685    Constant * blockSize = b->getSize(b->getBitBlockWidth());
[5639]686    Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
687    Value * bufSize = b->CreateMul(bufBlocks, blockSize);
688    Value * bufRem = b->CreateURem(fromPosition, bufSize);
689    if (reverse) {
690        return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufSize, bufRem);
[5541]691    }
[5646]692    return b->CreateSub(bufSize, bufRem, "linearItems");
[5541]693}
694
[5639]695Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * handle, Value * fromPosition, bool reverse) const {
696    Value * accessibleItems = getLinearlyAccessibleItems(b, handle, fromPosition, reverse);
697    if (reverse || (mOverflowBlocks == 0))  return accessibleItems;
698    return b->CreateAdd(accessibleItems, b->getSize(mOverflowBlocks * b->getBitBlockWidth()));
699}
700
[5647]701Value * DynamicBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const b, Value * handle, Value * fromBlock, bool reverse) const {
[5597]702    Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
[5618]703    Value * bufRem = b->CreateURem(fromBlock, bufBlocks);
704    if (reverse) {
705        return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufBlocks, bufRem);
706    }
[5646]707    return b->CreateSub(bufBlocks, bufRem, "linearBlocks");
[5541]708}
709
[5612]710Value * DynamicBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
711    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(Field::WorkingBlocks))});
712    return iBuilder->CreateMul(iBuilder->CreateLoad(ptr), iBuilder->getSize(iBuilder->getBitBlockWidth()));
713}
714
[5641]715void DynamicBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * handle, Value * priorProducedCount, Value * newProducedCount, const std::string Name) {
716    Value * workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
717    Value * bufSize = b->CreateMul(workingBlocks, b->getSize(b->getBitBlockWidth()));
718    Value * priorBufPos = b->CreateURem(priorProducedCount, bufSize);
719    Value * newBufPos = b->CreateURem(newProducedCount, bufSize);
720    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_copyBack");
721    BasicBlock * done = b->CreateBasicBlock(Name + "_copyBackDone");
722    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
723    b->CreateCondBr(wraparound, copyBack, done);
724    b->SetInsertPoint(copyBack);
725    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
726    Value * bufBasePtr = b->CreateLoad(bufBasePtrField);
727    Value * overFlowAreaPtr = b->CreateGEP(bufBasePtr, workingBlocks);
728    createBlockAlignedCopy(b, bufBasePtr, overFlowAreaPtr, newBufPos);
729    b->CreateBr(done);
730    b->SetInsertPoint(done);
731}
[5612]732
[5541]733void DynamicBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
[5542]734    Value * handle = b->CreateCacheAlignedAlloca(mBufferStructType);
[5541]735    size_t numStreams = 1;
736    if (isa<ArrayType>(mBaseType)) {
737        numStreams = mBaseType->getArrayNumElements();
738    }
739    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
740    Value * bufSize = b->getSize((mBufferBlocks + mOverflowBlocks) * b->getBitBlockWidth() * numStreams * fieldWidth/8);
[5543]741    bufSize = b->CreateRoundUp(bufSize, b->getSize(b->getCacheAlignment()));
[5597]742    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::BaseAddress))});
[5612]743    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
744    Value * bufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufSize), bufPtrType);
[5618]745    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
746        b->CallPrintInt("allocated: ", bufPtr);
747        b->CallPrintInt("allocated capacity: ", bufSize);
748    }
[5541]749    b->CreateStore(bufPtr, bufBasePtrField);
[5612]750    b->CreateStore(ConstantPointerNull::getNullValue(bufPtrType), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))}));
[5597]751    b->CreateStore(bufSize, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::AllocatedCapacity))}));
752    b->CreateStore(b->getSize(mBufferBlocks), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
753    b->CreateStore(b->getSize(-1), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::Length))}));
754    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ProducedPosition))}));
755    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ConsumedPosition))}));
[5541]756    mStreamSetBufferPtr = handle;
757}
758
[5544]759void DynamicBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
[5612]760    Value * handle = mStreamSetBufferPtr;
[5541]761    /* Free the dynamically allocated buffer, but not the stack-allocated buffer struct. */
[5612]762    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
763    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
764    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
765    BasicBlock * freePrior = b->CreateBasicBlock("freePrior");
766    BasicBlock * freeCurrent = b->CreateBasicBlock("freeCurrent");
767    Value * priorBuf = b->CreateLoad(priorBasePtrField);
768    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
769    b->CreateCondBr(priorBufIsNonNull, freePrior, freeCurrent);
770    b->SetInsertPoint(freePrior);
[5618]771    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
772        b->CallPrintInt("releasing: ", priorBuf);
773    }
[5612]774    b->CreateFree(priorBuf);
775    b->CreateBr(freeCurrent);
776    b->SetInsertPoint(freeCurrent);
777    b->CreateFree(b->CreateLoad(bufBasePtrField));
[5541]778}
779
[5612]780//
781//  Simple capacity doubling.  Use the circular buffer property: duplicating buffer data
782//  ensures that we have correct data.   TODO: consider optimizing based on actual
783//  consumer and producer positions.
784//
785void DynamicBuffer::doubleCapacity(IDISA::IDISA_Builder * const b, Value * handle) {
786    size_t numStreams = 1;
787    if (isa<ArrayType>(mBaseType)) {
788        numStreams = mBaseType->getArrayNumElements();
789    }
790    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
791    Constant * blockBytes = b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8);
792    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
793    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
794    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
795    Value * workingBlocksField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))});
796    Value * capacityField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::AllocatedCapacity))});
797   
798    Value * oldBufPtr = b->CreateLoad(bufBasePtrField);
[5618]799    Value * currentWorkingBlocks = b->CreateLoad(workingBlocksField);
[5612]800    Value * workingBytes = b->CreateMul(currentWorkingBlocks, blockBytes);
801    Value * const curAllocated = b->CreateLoad(capacityField);
802    Value * neededCapacity = b->CreateAdd(workingBytes, workingBytes);
803    if (mOverflowBlocks > 0) {
804        Constant * overflowBytes = b->getSize(mOverflowBlocks * b->getBitBlockWidth() * numStreams * fieldWidth/8);
805        neededCapacity = b->CreateAdd(neededCapacity, overflowBytes);
806    }
807    neededCapacity = b->CreateRoundUp(neededCapacity, b->getSize(b->getCacheAlignment()));
808    BasicBlock * doubleEntry = b->GetInsertBlock();
809    BasicBlock * doRealloc = b->CreateBasicBlock("doRealloc");
810    BasicBlock * doCopy2 = b->CreateBasicBlock("doCopy2");
811    b->CreateCondBr(b->CreateICmpULT(curAllocated, neededCapacity), doRealloc, doCopy2);
812    b->SetInsertPoint(doRealloc);
813    // If there is a non-null priorBasePtr, free it.
814    Value * priorBuf = b->CreateLoad(priorBasePtrField);
815    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
816    BasicBlock * deallocatePrior = b->CreateBasicBlock("deallocatePrior");
817    BasicBlock * allocateNew = b->CreateBasicBlock("allocateNew");
818    b->CreateCondBr(priorBufIsNonNull, deallocatePrior, allocateNew);
819    b->SetInsertPoint(deallocatePrior);
[5618]820    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
821        b->CallPrintInt("deallocating: ", priorBuf);
822    }
[5612]823    b->CreateFree(priorBuf);
824    b->CreateBr(allocateNew);
825    b->SetInsertPoint(allocateNew);
826    b->CreateStore(oldBufPtr, priorBasePtrField);
827    Value * newBufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(neededCapacity), bufPtrType);
[5618]828    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
829        b->CallPrintInt("re-allocated: ", newBufPtr);
830        b->CallPrintInt("allocated capacity: ", neededCapacity);
831    }
[5612]832    b->CreateStore(newBufPtr, bufBasePtrField);
833    createBlockCopy(b, newBufPtr, oldBufPtr, currentWorkingBlocks);
834    b->CreateStore(neededCapacity, capacityField);
835    b->CreateBr(doCopy2);
836    b->SetInsertPoint(doCopy2);
837    PHINode * bufPtr = b->CreatePHI(oldBufPtr->getType(), 2);
838    bufPtr->addIncoming(oldBufPtr, doubleEntry);
[5615]839    bufPtr->addIncoming(newBufPtr, allocateNew);
[5612]840    createBlockCopy(b, b->CreateGEP(bufPtr, currentWorkingBlocks), bufPtr, currentWorkingBlocks);
[5618]841    currentWorkingBlocks = b->CreateAdd(currentWorkingBlocks, currentWorkingBlocks);
842    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
843        b->CallPrintInt("currentWorkingBlocks: ", currentWorkingBlocks);
844    }
845    b->CreateStore(currentWorkingBlocks, workingBlocksField);
[5612]846}
[5541]847
[5620]848inline StructType * getDynamicBufferStructType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * baseType, const unsigned addrSpace) {
849    IntegerType * sizeTy = b->getSizeTy();
850    PointerType * typePtr = baseType->getPointerTo(addrSpace);
851    return StructType::get(typePtr, typePtr, sizeTy, sizeTy, sizeTy, sizeTy, sizeTy, nullptr);
852}
853
[5541]854DynamicBuffer::DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t initialCapacity, size_t overflow, unsigned swizzle, unsigned addrSpace)
[5542]855: StreamSetBuffer(BufferKind::DynamicBuffer, type, resolveStreamSetType(b, type), initialCapacity, addrSpace)
[5620]856, mBufferStructType(getDynamicBufferStructType(b, mType, addrSpace))
[5541]857, mSwizzleFactor(swizzle)
858, mOverflowBlocks(overflow)
859{
[5641]860    if (initialCapacity * b->getBitBlockWidth() < 2 * overflow) {
861        report_fatal_error("DynamicBuffer: initialCapacity * b->getBitBlockWidth() < 2 * overflow");
862    }
[5541]863    mUniqueID = "DB";
864    if (swizzle != 1) {
865        mUniqueID += "s" + std::to_string(swizzle);
866    }
867        if (overflow != 0) {
868        mUniqueID += "o" + std::to_string(overflow);
869    }
870    if (addrSpace != 0) {
871        mUniqueID += "@" + std::to_string(addrSpace);
872    }
873}
874
875
[5446]876inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned AddressSpace)
[5320]877: mBufferKind(k)
878, mType(resolvedType)
[5446]879, mBufferBlocks(BufferBlocks)
[5320]880, mAddressSpace(AddressSpace)
881, mStreamSetBufferPtr(nullptr)
[5408]882, mBaseType(baseType)
883, mProducer(nullptr) {
[5320]884
885}
886
[5377]887StreamSetBuffer::~StreamSetBuffer() { }
888
[5320]889// Helper routines
[5436]890ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
[5320]891    unsigned numElements = 1;
892    if (LLVM_LIKELY(type->isArrayTy())) {
893        numElements = type->getArrayNumElements();
894        type = type->getArrayElementType();
895    }
896    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
897        type = type->getVectorElementType();
898        if (LLVM_LIKELY(type->isIntegerTy())) {
899            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
900            type = b->getBitBlockType();
901            if (fieldWidth != 1) {
902                type = ArrayType::get(type, fieldWidth);
[5307]903            }
[5320]904            return ArrayType::get(type, numElements);
[5307]905        }
906    }
907    std::string tmp;
908    raw_string_ostream out(tmp);
909    type->print(out);
910    out << " is an unvalid stream set buffer type.";
911    report_fatal_error(out.str());
912}
[5301]913
[5436]914StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
[5320]915    if (LLVM_LIKELY(type->isArrayTy())) {
916        type = type->getArrayElementType();
917    }
918    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
919        type = type->getVectorElementType();
920        if (LLVM_LIKELY(type->isIntegerTy())) {
921            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
922            type = b->getBitBlockType();
923            if (fieldWidth != 1) {
924                type = ArrayType::get(type, fieldWidth);
925            }
926            return StructType::get(b->getSizeTy(), type->getPointerTo(), nullptr);
927        }
928    }
929    std::string tmp;
930    raw_string_ostream out(tmp);
931    type->print(out);
932    out << " is an unvalid stream set buffer type.";
933    report_fatal_error(out.str());
[5260]934}
Note: See TracBrowser for help on using the repository browser.