source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5757

Last change on this file since 5757 was 5757, checked in by nmedfort, 17 months ago

Bug fixes + more assertions to prevent similar errors.

File size: 51.1 KB
RevLine 
[5044]1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
[5260]6#include "streamset.h"
[5436]7#include <llvm/IR/Module.h>
[5307]8#include <llvm/Support/raw_ostream.h>
[5408]9#include <kernels/kernel.h>
[5436]10#include <kernels/kernel_builder.h>
[5425]11#include <toolchain/toolchain.h>
[5542]12#include <llvm/Support/Debug.h>
13#include <llvm/Support/Format.h>
[5307]14
[5260]15namespace llvm { class Constant; }
16namespace llvm { class Function; }
[5191]17
[5100]18using namespace parabix;
[5260]19using namespace llvm;
20using namespace IDISA;
[5044]21
[5755]22inline static bool is_power_2(const uint64_t n) {
23    return ((n & (n - 1)) == 0) && n;
24}
[5506]25
26Type * StreamSetBuffer::getStreamSetBlockType() const { return mType;}
27
[5436]28ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
[5320]29
[5436]30StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
[5320]31
[5755]32void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
[5706]33    assert (mBufferBlocks > 0);
[5429]34    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
35        Type * const ty = getType();
[5597]36        if (mAddressSpace == 0) {
37            Constant * size = ConstantExpr::getSizeOf(ty);
38            size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks));
[5755]39            mStreamSetBufferPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(size), ty->getPointerTo());
[5597]40        } else {
[5755]41            mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(ty, b->getSize(mBufferBlocks));
[5597]42        }
[5755]43        b->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, b->getCacheAlignment());
[5429]44    } else {
45        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
46    }
[5044]47}
48
[5755]49void StreamSetBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
[5597]50    if (mAddressSpace == 0) {
[5755]51        b->CreateFree(mStreamSetBufferPtr);
[5597]52    }
53}
54
[5755]55inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
56    return isa<ConstantInt>(index) ? cast<ConstantInt>(index)->getLimitedValue() < capacity : false;
57}
58
59Value * StreamSetBuffer::modBufferSize(IDISA::IDISA_Builder * const b, Value * const offset) const {
60    assert (offset->getType()->isIntegerTy());
61    if (mBufferBlocks == 0 || isCapacityGuaranteed(offset, mBufferBlocks)) {
62        return offset;
63    } else if (mBufferBlocks == 1) {
64        return ConstantInt::getNullValue(offset->getType());
65    } else if (is_power_2(mBufferBlocks)) {
66        return b->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
67    } else {
68        return b->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
69    }
70}
71
72Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
[5721]73    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
[5755]74        Value * const count = getStreamSetCount(b, handle);
75        Value * const index = b->CreateZExtOrTrunc(streamIndex, count->getType());
76        Value * const cond = b->CreateICmpULT(index, count);
77        b->CreateAssert(cond, "out-of-bounds stream access");
[5486]78    }
[5755]79    return b->CreateGEP(addr, {modBufferSize(b, blockIndex), streamIndex});
[5260]80}
81
[5755]82Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
[5721]83    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
[5755]84        Value * const count = getStreamSetCount(b, handle);
85        Value * const index = b->CreateZExtOrTrunc(streamIndex, count->getType());
86        Value * const cond = b->CreateICmpULT(index, count);
87        b->CreateAssert(cond, "out-of-bounds stream access");
[5486]88    }
[5755]89    return b->CreateGEP(addr, {modBufferSize(b, blockIndex), streamIndex, packIndex});
[5260]90}
91
[5755]92void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* addr */) const {
[5398]93    report_fatal_error("setBaseAddress is not supported by this buffer type");
94}
95
[5755]96Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * /* handle */) const {
97    return b->getSize(mBufferBlocks * b->getBitBlockWidth());
[5398]98}
99
[5755]100void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* size */) const {
[5398]101    report_fatal_error("setBufferedSize is not supported by this buffer type");
102}
103
[5757]104Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
105    return b->getSize(mBufferBlocks * b->getBitBlockWidth());
[5501]106}
107
[5755]108void StreamSetBuffer::setCapacity(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* c */) const {
[5501]109    report_fatal_error("setCapacity is not supported by this buffer type");
110}
111
[5755]112Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const b, Value *) const {
[5498]113    size_t count = 1;
[5329]114    if (isa<ArrayType>(mBaseType)) {
115        count = mBaseType->getArrayNumElements();
116    }
[5755]117    return b->getSize(count);
[5329]118}
119
[5307]120/**
121 * @brief getRawItemPointer
122 *
123 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
124 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
125 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
126 */
[5755]127Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
128    Value * ptr = getBaseAddress(b, handle);
[5446]129    Value * relativePosition = absolutePosition;
[5755]130    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
131    const auto bw = elemTy->getPrimitiveSizeInBits();
132    assert (is_power_2(bw));
[5445]133    if (bw < 8) {
[5755]134        Constant * const fw = ConstantInt::get(relativePosition->getType(), 8 / bw);
135        if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
136            b->CreateAssertZero(b->CreateURem(absolutePosition, fw), "absolutePosition must be byte aligned");
137        }
138        relativePosition = b->CreateUDiv(relativePosition, fw);
139        ptr = b->CreatePointerCast(ptr, b->getInt8PtrTy());
[5446]140    } else {
[5755]141        ptr = b->CreatePointerCast(ptr, elemTy->getPointerTo());
[5445]142    }
[5755]143    return b->CreateGEP(ptr, relativePosition);
[5260]144}
145
[5755]146Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
[5706]147    Constant * bufSize = ConstantInt::get(fromPosition->getType(), mBufferBlocks * b->getStride());
[5650]148    Value * itemsFromBase = b->CreateURem(fromPosition, bufSize);
[5639]149    if (reverse) {
[5650]150        Value * bufAvail = b->CreateSelect(b->CreateICmpEQ(itemsFromBase, b->getSize(0)), bufSize, itemsFromBase);
151        return b->CreateSelect(b->CreateICmpULT(bufAvail, availItems), bufAvail, availItems);
[5706]152    } else {
[5650]153        Value * linearSpace = b->CreateSub(bufSize, itemsFromBase, "linearSpace");
154        return b->CreateSelect(b->CreateICmpULT(availItems, linearSpace), availItems, linearSpace);
155    }
[5301]156}
157
[5755]158Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
159    Constant * bufSize = ConstantInt::get(fromPosition->getType(), mBufferBlocks * b->getStride());
160    Value * bufRem = b->CreateURem(fromPosition, bufSize);
[5618]161    if (reverse) {
[5755]162        return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufSize, bufRem);
[5618]163    }
[5755]164    return b->CreateSub(bufSize, bufRem, "linearSpace");
[5355]165}
[5301]166
[5755]167Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
168    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
169        b->CreateAssert(handle, "handle cannot be null");
170    }
171    return handle;
[5377]172}
173
[5755]174
175Value * StreamSetBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * blockIndex) const {
176    return b->CreateGEP(getBaseAddress(b, handle), blockIndex);
177}
178
179void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const b, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
180    Type * i8ptr = b->getInt8PtrTy();
181    unsigned alignment = b->getBitBlockWidth() / 8;
[5498]182    size_t numStreams = 1;
[5493]183    if (isa<ArrayType>(mBaseType)) {
184        numStreams = mBaseType->getArrayNumElements();
185    }
186    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
[5755]187    Value * blockCopyBytes = b->CreateMul(blocksToCopy, b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8));
188    b->CreateMemMove(b->CreateBitCast(targetBlockPtr, i8ptr), b->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
[5479]189}
190
[5755]191inline bool isConstantZero(Value * const v) {
192    return isa<Constant>(v) && cast<Constant>(v)->isNullValue();
[5706]193}
194
[5755]195void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const b, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy, const unsigned alignment) const {
196    Constant * const blockSize = ConstantInt::get(itemsToCopy->getType(), b->getBitBlockWidth());
[5498]197    size_t numStreams = 1;
[5493]198    if (isa<ArrayType>(mBaseType)) {
199        numStreams = mBaseType->getArrayNumElements();
200    }
201    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
[5448]202    if (numStreams == 1) {
[5755]203        Value * copyBits = b->CreateMul(itemsToCopy, b->getSize(fieldWidth));
204        Value * copyBytes = b->CreateLShr(b->CreateAdd(copyBits, b->getSize(7)), b->getSize(3));
205        b->CreateMemCpy(targetBlockPtr, sourceBlockPtr, copyBytes, alignment);
[5486]206    } else {
[5755]207        Value * blocksToCopy = b->CreateUDiv(itemsToCopy, blockSize);
208        Value * partialItems = b->CreateURem(itemsToCopy, blockSize);
209        Value * partialBlockTargetPtr = b->CreateGEP(targetBlockPtr, blocksToCopy);
210        Value * partialBlockSourcePtr = b->CreateGEP(sourceBlockPtr, blocksToCopy);
211        Value * blockCopyBytes = b->CreateMul(blocksToCopy, b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8));
212        b->CreateMemCpy(targetBlockPtr, sourceBlockPtr, blockCopyBytes, alignment);
213        Value * partialCopyBitsPerStream = b->CreateMul(partialItems, b->getSize(fieldWidth));
214        Value * partialCopyBytesPerStream = b->CreateLShr(b->CreateAdd(partialCopyBitsPerStream, b->getSize(7)), b->getSize(3));
[5706]215        for (unsigned i = 0; i < numStreams; i++) {
[5755]216            Value * strmTargetPtr = b->CreateGEP(partialBlockTargetPtr, {b->getInt32(0), b->getInt32(i)});
217            Value * strmSourcePtr = b->CreateGEP(partialBlockSourcePtr, {b->getInt32(0), b->getInt32(i)});
218            b->CreateMemCpy(strmTargetPtr, strmSourcePtr, partialCopyBytesPerStream, alignment);
[5486]219        }
[5448]220    }
[5432]221}
222
[5755]223void StreamSetBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * const handle, Value * priorProduced, Value * newProduced, const std::string Name) const {
[5641]224    report_fatal_error("Copy back not supported for this buffer type:" + Name);
225}
226
[5398]227// Source File Buffer
[5506]228
229Type * SourceBuffer::getStreamSetBlockType() const {
230    return cast<PointerType>(mType->getStructElementType(int(SourceBuffer::Field::BaseAddress)))->getElementType();
231}
232
[5755]233Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle) const {
234    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::BufferedSize))});
235    return b->CreateLoad(ptr);
[5398]236}
237
[5755]238void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle, Value * size) const {
239    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::BufferedSize))});
240    b->CreateStore(size, ptr);
[5398]241}
242
[5755]243Value * SourceBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const handle) const {
244    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::Capacity))});
245    return b->CreateLoad(ptr);
[5501]246}
247
[5755]248void SourceBuffer::setCapacity(IDISA::IDISA_Builder * const b, Value * const handle, Value * c) const {
249    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::Capacity))});
250    b->CreateStore(c, ptr);
[5501]251}
252
[5755]253void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr) const {
254    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
255        b->CreateAssert(handle, "handle cannot be null");
256    }
257    Value * const ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::BaseAddress))});
258    Type * const ptrTy = ptr->getType()->getPointerElementType();
259    if (LLVM_LIKELY(isa<PointerType>(addr->getType()))) {
260        const auto ptrSpace = cast<PointerType>(ptr->getType())->getAddressSpace();
261        const auto addrSpace = cast<PointerType>(ptrTy)->getAddressSpace();
262        if (LLVM_UNLIKELY(addrSpace != ptrSpace)) {
263            report_fatal_error("SourceBuffer: base address was declared with address space "
264                                     + std::to_string(ptrSpace)
265                                     + " but given a pointer in address space "
266                                     + std::to_string(addrSpace));
267        }
268    } else {
269        report_fatal_error("SourceBuffer: base address is not a pointer type");
270    }
271    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
272        b->CreateAssert(ptr, "SourceBuffer: base address cannot be zero");
273        DataLayout DL(b->getModule());
274        IntegerType * const intPtrTy = b->getIntPtrTy(DL, cast<PointerType>(ptrTy)->getAddressSpace());
275        Value * const notAligned = b->CreateURem(b->CreatePtrToInt(ptr, intPtrTy), ConstantInt::get(intPtrTy, b->getBitBlockWidth() / 8));
276        b->CreateAssertZero(notAligned, "SourceBuffer: base address is not aligned with the bit block width");
277    }
278    b->CreateStore(b->CreatePointerCast(addr, ptrTy), ptr);
[5398]279}
280
[5755]281Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
282    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
283        b->CreateAssert(handle, "handle cannot be null");
284    }
285    Value * const ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(SourceBuffer::Field::BaseAddress))});
286    return b->CreateLoad(ptr);
[5398]287}
288
[5755]289Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
[5618]290    if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
[5755]291    Value * maxAvail = b->CreateSub(getBufferedSize(b, handle), fromPosition);
292    return b->CreateSelect(b->CreateICmpULT(availItems, maxAvail), availItems, maxAvail);
[5398]293}
294
[5755]295Value * SourceBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
[5650]296    report_fatal_error("SourceBuffers cannot be written");
[5503]297}
[5501]298
[5755]299void SourceBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
[5597]300    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
301        Type * const ty = getType();
[5755]302        mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(ty, b->getSize(mBufferBlocks));
303        b->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, b->getCacheAlignment());
[5597]304    } else {
305        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
306    }
307}
[5503]308
[5755]309void SourceBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
[5597]310
311}
312
[5429]313// External File Buffer
[5436]314void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
[5429]315    report_fatal_error("External buffers cannot be allocated.");
[5377]316}
317
[5597]318void ExternalBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> &) const {
319
320}
321
[5706]322Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value * availItems, const bool reverse) const {
323    // All available items can be accessed.
324    return reverse ? ConstantInt::getAllOnesValue(availItems->getType()) : availItems;
[5377]325}
326
[5706]327Value * ExternalBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const, Value *, Value * fromPosition, const bool reverse) const {
328    // Trust that the buffer is large enough to write any amount
329    return reverse ? fromPosition : ConstantInt::getAllOnesValue(fromPosition->getType());
330}
331
[5757]332Value * ExternalBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
333    return ConstantInt::getAllOnesValue(b->getSizeTy());
334}
335
336
[5260]337// Circular Buffer
[5755]338Value * CircularBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * const blockIndex) const {
339    return b->CreateGEP(getBaseAddress(b, handle), modBufferSize(b, blockIndex));
[5100]340}
341
[5755]342Value * CircularBuffer::getLinearlyCopyableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
343//    Constant * bufSize = ConstantInt::get(priorProduced->getType(), mBufferBlocks * b->getBitBlockWidth());
344//    Value * from = b->CreateURem(fromPosition, bufSize);
345//    Value * avail = b->CreateURem(availItems, bufSize);
346//    Value * wraparound = b->CreateICmpUGT(from, avail);
347
348
349    return nullptr;
350}
351
352Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
353    Value * ptr = getBaseAddress(b, handle);
354    Value * relativePosition = b->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * b->getBitBlockWidth()));
355    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
356    const auto bw = elemTy->getPrimitiveSizeInBits();
357    assert (is_power_2(bw));
[5446]358    if (bw < 8) {
[5755]359        Constant * const fw = ConstantInt::get(relativePosition->getType(), 8 / bw);
360        relativePosition = b->CreateUDiv(relativePosition, fw);
361        ptr = b->CreatePointerCast(ptr, b->getInt8PtrTy());
[5446]362    } else {
[5755]363        ptr = b->CreatePointerCast(ptr, elemTy->getPointerTo());
[5446]364    }
[5755]365    return b->CreateGEP(ptr, relativePosition);
[5446]366}
367
[5301]368// CircularCopybackBuffer Buffer
[5755]369void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
[5597]370    Type * const ty = getType();
371    Constant * size = ConstantExpr::getSizeOf(ty);
372    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
[5755]373    mStreamSetBufferPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(size), ty->getPointerTo());
[5301]374}
375
[5755]376Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
377    Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(b, handle, fromPosition, reverse);
[5650]378    if (reverse) return writableProper;
[5755]379    return b->CreateAdd(writableProper, b->getSize(mOverflowBlocks * b->getBitBlockWidth()));
[5434]380}
381
[5755]382void CircularCopybackBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * const handle, Value * priorProduced, Value * newProduced, const std::string Name) const {
[5706]383    assert (priorProduced->getType() == newProduced->getType());
384    Constant * bufSize = ConstantInt::get(priorProduced->getType(), mBufferBlocks * b->getBitBlockWidth());
[5641]385    Value * priorBufPos = b->CreateURem(priorProduced, bufSize);
386    Value * newBufPos = b->CreateURem(newProduced, bufSize);
[5706]387    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_circularCopyBack");
388    BasicBlock * done = b->CreateBasicBlock(Name + "_circularCopyBackDone");
[5641]389    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
390    b->CreateCondBr(wraparound, copyBack, done);
[5706]391
[5641]392    b->SetInsertPoint(copyBack);
[5755]393    Value * const baseAddress = getBaseAddress(b, handle);
394    Value * overflowAddress = b->CreateGEP(baseAddress, b->getInt32(mBufferBlocks));
395    // copyStream(b, baseAddress, b->getSize(0), overflowAddress, b->getSize(0), newBufPos);
396    createBlockAlignedCopy(b, baseAddress, overflowAddress, newBufPos);
[5641]397    b->CreateBr(done);
[5706]398
[5641]399    b->SetInsertPoint(done);
400}
401
402
[5355]403// SwizzledCopybackBuffer Buffer
404
[5755]405void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
[5597]406    Type * const ty = getType();
407    Constant * size = ConstantExpr::getSizeOf(ty);
408    size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), mBufferBlocks + mOverflowBlocks));
[5755]409    mStreamSetBufferPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(size), ty->getPointerTo());
[5355]410}
411
[5755]412void SwizzledCopybackBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const b, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy, const unsigned alignment) const {
413    Type * int8PtrTy = b->getInt8PtrTy();
414    DataLayout DL(b->getModule());
415    IntegerType * const intAddrTy = b->getIntPtrTy(DL);
[5454]416
[5755]417    Constant * blockSize = ConstantInt::get(itemsToCopy->getType(), b->getBitBlockWidth());
418    Function * f = b->GetInsertBlock()->getParent();
419    BasicBlock * wholeBlockCopy = BasicBlock::Create(b->getContext(), "wholeBlockCopy", f, 0);
420    BasicBlock * partialBlockCopy = BasicBlock::Create(b->getContext(), "partialBlockCopy", f, 0);
421    BasicBlock * copyDone = BasicBlock::Create(b->getContext(), "copyDone", f, 0);
[5454]422    const unsigned numStreams = getType()->getArrayNumElements();
[5755]423    const unsigned swizzleFactor = b->getBitBlockWidth()/mFieldWidth;
[5454]424    const auto elemTy = getType()->getArrayElementType();
425    const unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
[5755]426    Value * blocksToCopy = b->CreateUDiv(itemsToCopy, blockSize);
427    Value * partialItems = b->CreateURem(itemsToCopy, blockSize);
428    Value * partialBlockTargetPtr = b->CreateGEP(targetBlockPtr, blocksToCopy);
429    Value * partialBlockSourcePtr = b->CreateGEP(sourceBlockPtr, blocksToCopy);
430    b->CreateCondBr(b->CreateICmpUGT(blocksToCopy, b->getSize(0)), wholeBlockCopy, partialBlockCopy);
[5454]431
[5755]432    b->SetInsertPoint(wholeBlockCopy);
433    Value * copyLength = b->CreateSub(b->CreatePtrToInt(partialBlockTargetPtr, intAddrTy), b->CreatePtrToInt(targetBlockPtr, intAddrTy));
434    b->CreateMemCpy(b->CreatePointerCast(targetBlockPtr, int8PtrTy), b->CreatePointerCast(sourceBlockPtr, int8PtrTy), copyLength, alignment);
435    b->CreateCondBr(b->CreateICmpUGT(partialItems, b->getSize(0)), partialBlockCopy, copyDone);
436
437    b->SetInsertPoint(partialBlockCopy);
438    Value * copyBits = b->CreateMul(itemsToCopy, b->getSize(fieldWidth * swizzleFactor));
439    Value * copyBytes = b->CreateLShr(b->CreateAdd(copyBits, b->getSize(7)), b->getSize(3));
[5355]440    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
[5755]441        Value * strmTargetPtr = b->CreateGEP(partialBlockTargetPtr, {b->getInt32(0), b->getInt32(strm)});
442        Value * strmSourcePtr = b->CreateGEP(partialBlockSourcePtr, {b->getInt32(0), b->getInt32(strm)});
443        b->CreateMemCpy(b->CreatePointerCast(strmTargetPtr, int8PtrTy), b->CreatePointerCast(strmSourcePtr, int8PtrTy), copyBytes, alignment);
[5355]444    }
[5755]445    b->CreateBr(copyDone);
[5454]446
[5755]447    b->SetInsertPoint(copyDone);
[5355]448}
449
[5755]450Value * SwizzledCopybackBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * blockIndex) const {
451    return b->CreateGEP(getBaseAddress(b, handle), modBufferSize(b, blockIndex));
[5355]452}
453
[5755]454Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
455    Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(b, handle, fromPosition, reverse);
[5650]456    if (reverse) return writableProper;
[5755]457    return b->CreateAdd(writableProper, b->getSize(mOverflowBlocks * b->getBitBlockWidth()));
[5434]458}
459
[5755]460void SwizzledCopybackBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * const handle, Value * priorProduced, Value * newProduced, const std::string Name) const {
[5706]461    assert (priorProduced->getType() == newProduced->getType());
462    Constant * bufSize = ConstantInt::get(priorProduced->getType(), mBufferBlocks * b->getBitBlockWidth());
[5641]463    Value * priorBufPos = b->CreateURem(priorProduced, bufSize);
464    Value * newBufPos = b->CreateURem(newProduced, bufSize);
[5706]465    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_swizzledCopyBack");
466    BasicBlock * done = b->CreateBasicBlock(Name + "_swizzledCopyBackDone");
[5641]467    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
468    b->CreateCondBr(wraparound, copyBack, done);
469    b->SetInsertPoint(copyBack);
470    Value * overFlowAreaPtr = b->CreateGEP(handle, b->getSize(mBufferBlocks));
471    createBlockAlignedCopy(b, handle, overFlowAreaPtr, newBufPos);
472    b->CreateBr(done);
473    b->SetInsertPoint(done);
474}
[5434]475
[5260]476// Expandable Buffer
477
[5755]478void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
479    mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(getType());
480    Value * const capacityPtr = b->CreateGEP(mStreamSetBufferPtr, {b->getInt32(0), b->getInt32(0)});
481    b->CreateStore(b->getSize(mInitialCapacity), capacityPtr);
[5320]482    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
[5755]483    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), b->getSizeTy(), false);
484    Constant * const size = ConstantExpr::getMul(b->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
485    const auto alignment = std::max(b->getCacheAlignment(), b->getBitBlockWidth() / 8);
486    Value * const ptr = b->CreateAlignedMalloc(size, alignment);
487    b->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
488    Value * const streamSetPtr = b->CreateGEP(mStreamSetBufferPtr, {b->getInt32(0), b->getInt32(1)});
489    b->CreateStore(b->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
[5260]490}
491
[5755]492std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
[5340]493
[5320]494    // ENTRY
[5755]495    Value * const capacityPtr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(0)});
496    Value * const capacity = b->CreateLoad(capacityPtr);
497    Value * const streamSetPtr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(1)});
498    Value * const streamSet = b->CreateLoad(streamSetPtr);
499    blockIndex = modBufferSize(b, blockIndex);
[5311]500
[5353]501    assert (streamIndex->getType() == capacity->getType());
[5755]502    Value * const cond = b->CreateICmpULT(streamIndex, capacity);
[5353]503
[5320]504    // Are we guaranteed that we can access this stream?
[5353]505    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
[5755]506        if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
507            b->CreateAssert(cond, "out-of-bounds stream access");
508        }
509        Value * offset = b->CreateAdd(b->CreateMul(blockIndex, capacity), streamIndex);
[5353]510        return {streamSet, offset};
[5320]511    }
[5260]512
[5755]513    BasicBlock * const entry = b->GetInsertBlock();
514    BasicBlock * const expand = BasicBlock::Create(b->getContext(), "expand", entry->getParent());
515    BasicBlock * const resume = BasicBlock::Create(b->getContext(), "resume", entry->getParent());
[5311]516
[5755]517    b->CreateLikelyCondBr(cond, resume, expand);
[5353]518
[5320]519    // EXPAND
[5755]520    b->SetInsertPoint(expand);
[5353]521
522    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
523    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
524
[5755]525    Value * newCapacity = b->CreateAdd(streamIndex, b->getSize(1));
526    newCapacity = b->CreateCeilLog2(newCapacity);
527    newCapacity = b->CreateShl(b->getSize(1), newCapacity, "newCapacity");
[5361]528
[5353]529    std::string tmp;
530    raw_string_ostream out(tmp);
531    out << "__expand";
532    elementType->print(out);
533    std::string name = out.str();
534
[5755]535    Module * const m = b->getModule();
[5353]536    Function * expandFunction = m->getFunction(name);
537
538    if (expandFunction == nullptr) {
539
[5755]540        const auto ip = b->saveIP();
[5353]541
[5755]542        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), b->getSizeTy(), b->getSizeTy()}, false);
[5353]543        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
544
545        auto args = expandFunction->arg_begin();
546        Value * streamSet = &*args++;
547        Value * capacity = &*args++;
548        Value * newCapacity = &*args;
549
[5755]550        BasicBlock * entry = BasicBlock::Create(b->getContext(), "entry", expandFunction);
551        b->SetInsertPoint(entry);
[5353]552
[5755]553        Value * size = b->CreateMul(newCapacity, b->getSize(mBufferBlocks));
554        const auto memAlign = std::max(b->getCacheAlignment(), b->getBitBlockWidth() / 8);
[5486]555
[5755]556        Value * newStreamSet = b->CreatePointerCast(b->CreateAlignedMalloc(b->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
557        Value * const diffCapacity = b->CreateMul(b->CreateSub(newCapacity, capacity), vectorWidth);
[5353]558
559        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
560        for (unsigned i = 0; i < mBufferBlocks; ++i) {
[5755]561            ConstantInt * const offset = b->getSize(i);
562            Value * srcOffset = b->CreateMul(capacity, offset);
563            Value * srcPtr = b->CreateGEP(streamSet, srcOffset);
564            Value * destOffset = b->CreateMul(newCapacity, offset);
565            Value * destPtr = b->CreateGEP(newStreamSet, destOffset);
566            b->CreateMemCpy(destPtr, srcPtr, b->CreateMul(capacity, vectorWidth), alignment);
567            Value * destZeroOffset = b->CreateAdd(destOffset, capacity);
568            Value * destZeroPtr = b->CreateGEP(newStreamSet, destZeroOffset);
569            b->CreateMemZero(destZeroPtr, diffCapacity, alignment);
[5353]570        }
571
[5755]572        b->CreateFree(streamSet);
[5353]573
[5755]574        b->CreateRet(newStreamSet);
[5353]575
[5755]576        b->restoreIP(ip);
[5320]577    }
[5311]578
[5755]579    Value * newStreamSet = b->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
580    b->CreateStore(newStreamSet, streamSetPtr);
581    b->CreateStore(newCapacity, capacityPtr);
[5353]582
[5755]583    b->CreateBr(resume);
[5353]584
[5320]585    // RESUME
[5755]586    b->SetInsertPoint(resume);
[5320]587
[5755]588    PHINode * phiStreamSet = b->CreatePHI(streamSet->getType(), 2);
[5320]589    phiStreamSet->addIncoming(streamSet, entry);
590    phiStreamSet->addIncoming(newStreamSet, expand);
591
[5755]592    PHINode * phiCapacity = b->CreatePHI(capacity->getType(), 2);
[5320]593    phiCapacity->addIncoming(capacity, entry);
594    phiCapacity->addIncoming(newCapacity, expand);
595
[5755]596    Value * offset = b->CreateAdd(b->CreateMul(blockIndex, phiCapacity), streamIndex);
[5320]597
598    return {phiStreamSet, offset};
[5260]599}
600
[5755]601Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
[5706]602    report_fatal_error("temporarily not supported");
603//    Value * ptr, * offset;
[5755]604//    std::tie(ptr, offset) = getInternalStreamBuffer(b, handle, streamIndex, blockIndex, readOnly);
605//    return b->CreateGEP(ptr, offset);
[5320]606}
607
[5755]608Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
[5706]609    report_fatal_error("temporarily not supported");
610//    Value * ptr, * offset;
[5755]611//    std::tie(ptr, offset) = getInternalStreamBuffer(b, handle, streamIndex, blockIndex, readOnly);
612//    return b->CreateGEP(ptr, {offset, packIndex});
[5320]613}
614
[5755]615Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const b, Value * const handle) const {
616    return b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(0)}));
[5329]617}
618
[5755]619Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
620    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
621        b->CreateAssert(handle, "handle cannot be null");
622    }
623    Value * const baseAddr = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(1)}));
624    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
625        b->CreateAssert(handle, "base address cannot be 0");
626    }
[5446]627    return baseAddr;
[5377]628}
629
[5544]630void ExpandableBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
631    b->CreateFree(getBaseAddress(b.get(), mStreamSetBufferPtr));
[5386]632}
633
[5755]634Value * ExpandableBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value *, Value *) const {
[5706]635    report_fatal_error("Expandable buffers: getBlockAddress is not supported.");
[5316]636}
637
[5650]638Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value *, bool) const {
[5320]639    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
[5311]640}
641
[5260]642
[5541]643Value * DynamicBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
[5755]644    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
645        b->CreateAssert(handle, "handle cannot be null");
646    }
[5541]647    Value * const p = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
648    Value * const addr = b->CreateLoad(p);
[5755]649    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
650        b->CreateAssert(addr, "base address cannot be 0");
651    }
[5541]652    return addr;
653}
654
[5755]655Value * DynamicBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * blockIndex) const {
656    Value * const workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
657    assert (blockIndex->getType() == workingBlocks->getType());
658    return b->CreateGEP(getBaseAddress(b, handle), b->CreateURem(blockIndex, workingBlocks));
[5541]659}
660
[5755]661Value * DynamicBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
[5706]662    Constant * blockSize = ConstantInt::get(absolutePosition->getType(), b->getBitBlockWidth());
[5755]663    Value * const absBlock = b->CreateUDiv(absolutePosition, blockSize);
[5706]664    Value * blockPos = b->CreateURem(absolutePosition, blockSize);
665    Value * blockPtr = getBlockAddress(b, handle, absBlock);
[5755]666    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
667    const auto bw = elemTy->getPrimitiveSizeInBits();
668    assert (is_power_2(bw));
[5541]669    if (bw < 8) {
670        blockPos = b->CreateUDiv(blockPos, ConstantInt::get(blockPos->getType(), 8 / bw));
671        blockPtr = b->CreatePointerCast(blockPtr, b->getInt8PtrTy());
672    } else {
[5755]673        blockPtr = b->CreatePointerCast(blockPtr, elemTy->getPointerTo());
[5541]674    }
675    return b->CreateGEP(blockPtr, blockPos);
676}
677
[5597]678
[5755]679Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
[5639]680    Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
[5706]681    Constant * blockSize = ConstantInt::get(bufBlocks->getType(), b->getBitBlockWidth());
[5639]682    Value * bufSize = b->CreateMul(bufBlocks, blockSize);
[5706]683    assert (bufSize->getType() == fromPosition->getType());
[5650]684    Value * itemsFromBase = b->CreateURem(fromPosition, bufSize);
[5639]685    if (reverse) {
[5650]686        Value * bufAvail = b->CreateSelect(b->CreateICmpEQ(itemsFromBase, b->getSize(0)), bufSize, itemsFromBase);
687        return b->CreateSelect(b->CreateICmpULT(bufAvail, availItems), bufAvail, availItems);
[5706]688    } else {
[5650]689        Value * linearSpace = b->CreateSub(bufSize, itemsFromBase, "linearSpace");
690        return b->CreateSelect(b->CreateICmpULT(availItems, linearSpace), availItems, linearSpace);
691    }
[5541]692}
693
[5755]694Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
[5650]695    Value * bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
[5706]696    Constant * blockSize = ConstantInt::get(bufBlocks->getType(), b->getBitBlockWidth());
[5650]697    Value * bufSize = b->CreateMul(bufBlocks, blockSize);
[5706]698    assert (bufSize->getType() == fromPosition->getType());
[5650]699    Value * bufRem = b->CreateURem(fromPosition, bufSize);
[5618]700    if (reverse) {
[5650]701        return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufSize, bufRem);
[5618]702    }
[5706]703    Constant * overflow = ConstantInt::get(bufBlocks->getType(), mOverflowBlocks);
704    bufSize = b->CreateMul(b->CreateAdd(bufBlocks, overflow), blockSize);
[5650]705    return b->CreateSub(bufSize, bufRem, "linearWritable");
[5541]706}
707
[5755]708Value * DynamicBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle) const {
709    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))});
710    return b->CreateMul(b->CreateLoad(ptr), b->getSize(b->getBitBlockWidth()));
[5612]711}
712
[5755]713void DynamicBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * const handle, Value * priorProducedCount, Value * newProducedCount, const std::string Name) const {
[5706]714    assert (priorProducedCount->getType() == newProducedCount->getType());   
[5641]715    Value * workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
[5706]716    assert (workingBlocks->getType() == newProducedCount->getType());
717    Value * bufSize = b->CreateMul(workingBlocks, ConstantInt::get(workingBlocks->getType(), b->getBitBlockWidth()));
[5641]718    Value * priorBufPos = b->CreateURem(priorProducedCount, bufSize);
719    Value * newBufPos = b->CreateURem(newProducedCount, bufSize);
[5706]720    BasicBlock * copyBack = b->CreateBasicBlock(Name + "_dynamicCopyBack");
721    BasicBlock * done = b->CreateBasicBlock(Name + "_dynamicCopyBackDone");
722
[5641]723    Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
724    b->CreateCondBr(wraparound, copyBack, done);
[5706]725
[5641]726    b->SetInsertPoint(copyBack);
[5755]727    Value * bufBasePtr = getBaseAddress(b, handle);
[5641]728    Value * overFlowAreaPtr = b->CreateGEP(bufBasePtr, workingBlocks);
729    createBlockAlignedCopy(b, bufBasePtr, overFlowAreaPtr, newBufPos);
730    b->CreateBr(done);
[5706]731
[5641]732    b->SetInsertPoint(done);
733}
[5612]734
[5541]735void DynamicBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
[5755]736    Value * const handle = b->CreateCacheAlignedAlloca(mBufferStructType);
[5541]737    size_t numStreams = 1;
738    if (isa<ArrayType>(mBaseType)) {
739        numStreams = mBaseType->getArrayNumElements();
740    }
741    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
742    Value * bufSize = b->getSize((mBufferBlocks + mOverflowBlocks) * b->getBitBlockWidth() * numStreams * fieldWidth/8);
[5543]743    bufSize = b->CreateRoundUp(bufSize, b->getSize(b->getCacheAlignment()));
[5597]744    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::BaseAddress))});
[5612]745    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
746    Value * bufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufSize), bufPtrType);
[5618]747    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
748        b->CallPrintInt("allocated: ", bufPtr);
749        b->CallPrintInt("allocated capacity: ", bufSize);
750    }
[5541]751    b->CreateStore(bufPtr, bufBasePtrField);
[5612]752    b->CreateStore(ConstantPointerNull::getNullValue(bufPtrType), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))}));
[5597]753    b->CreateStore(bufSize, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::AllocatedCapacity))}));
754    b->CreateStore(b->getSize(mBufferBlocks), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
755    b->CreateStore(b->getSize(-1), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::Length))}));
756    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ProducedPosition))}));
757    b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::ConsumedPosition))}));
[5541]758    mStreamSetBufferPtr = handle;
759}
760
[5544]761void DynamicBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
[5755]762    Value * const handle = mStreamSetBufferPtr;
[5541]763    /* Free the dynamically allocated buffer, but not the stack-allocated buffer struct. */
[5612]764    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
765    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
766    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
767    BasicBlock * freePrior = b->CreateBasicBlock("freePrior");
768    BasicBlock * freeCurrent = b->CreateBasicBlock("freeCurrent");
769    Value * priorBuf = b->CreateLoad(priorBasePtrField);
770    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
771    b->CreateCondBr(priorBufIsNonNull, freePrior, freeCurrent);
772    b->SetInsertPoint(freePrior);
[5618]773    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
774        b->CallPrintInt("releasing: ", priorBuf);
775    }
[5612]776    b->CreateFree(priorBuf);
777    b->CreateBr(freeCurrent);
778    b->SetInsertPoint(freeCurrent);
779    b->CreateFree(b->CreateLoad(bufBasePtrField));
[5541]780}
781
[5612]782//
783//  Simple capacity doubling.  Use the circular buffer property: duplicating buffer data
784//  ensures that we have correct data.   TODO: consider optimizing based on actual
785//  consumer and producer positions.
786//
[5755]787void DynamicBuffer::doubleCapacity(IDISA::IDISA_Builder * const b, Value * const handle) {
[5612]788    size_t numStreams = 1;
789    if (isa<ArrayType>(mBaseType)) {
790        numStreams = mBaseType->getArrayNumElements();
791    }
792    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
793    Constant * blockBytes = b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8);
794    Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::BaseAddress))});
795    Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
796    Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::PriorBaseAddress))});
797    Value * workingBlocksField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))});
798    Value * capacityField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::AllocatedCapacity))});
[5706]799
[5612]800    Value * oldBufPtr = b->CreateLoad(bufBasePtrField);
[5618]801    Value * currentWorkingBlocks = b->CreateLoad(workingBlocksField);
[5612]802    Value * workingBytes = b->CreateMul(currentWorkingBlocks, blockBytes);
803    Value * const curAllocated = b->CreateLoad(capacityField);
804    Value * neededCapacity = b->CreateAdd(workingBytes, workingBytes);
805    if (mOverflowBlocks > 0) {
806        Constant * overflowBytes = b->getSize(mOverflowBlocks * b->getBitBlockWidth() * numStreams * fieldWidth/8);
807        neededCapacity = b->CreateAdd(neededCapacity, overflowBytes);
808    }
809    neededCapacity = b->CreateRoundUp(neededCapacity, b->getSize(b->getCacheAlignment()));
810    BasicBlock * doubleEntry = b->GetInsertBlock();
811    BasicBlock * doRealloc = b->CreateBasicBlock("doRealloc");
812    BasicBlock * doCopy2 = b->CreateBasicBlock("doCopy2");
813    b->CreateCondBr(b->CreateICmpULT(curAllocated, neededCapacity), doRealloc, doCopy2);
814    b->SetInsertPoint(doRealloc);
815    // If there is a non-null priorBasePtr, free it.
816    Value * priorBuf = b->CreateLoad(priorBasePtrField);
817    Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
818    BasicBlock * deallocatePrior = b->CreateBasicBlock("deallocatePrior");
819    BasicBlock * allocateNew = b->CreateBasicBlock("allocateNew");
820    b->CreateCondBr(priorBufIsNonNull, deallocatePrior, allocateNew);
821    b->SetInsertPoint(deallocatePrior);
[5618]822    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
823        b->CallPrintInt("deallocating: ", priorBuf);
824    }
[5612]825    b->CreateFree(priorBuf);
826    b->CreateBr(allocateNew);
827    b->SetInsertPoint(allocateNew);
828    b->CreateStore(oldBufPtr, priorBasePtrField);
829    Value * newBufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(neededCapacity), bufPtrType);
[5618]830    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
831        b->CallPrintInt("re-allocated: ", newBufPtr);
832        b->CallPrintInt("allocated capacity: ", neededCapacity);
833    }
[5612]834    b->CreateStore(newBufPtr, bufBasePtrField);
835    createBlockCopy(b, newBufPtr, oldBufPtr, currentWorkingBlocks);
836    b->CreateStore(neededCapacity, capacityField);
837    b->CreateBr(doCopy2);
838    b->SetInsertPoint(doCopy2);
839    PHINode * bufPtr = b->CreatePHI(oldBufPtr->getType(), 2);
840    bufPtr->addIncoming(oldBufPtr, doubleEntry);
[5615]841    bufPtr->addIncoming(newBufPtr, allocateNew);
[5612]842    createBlockCopy(b, b->CreateGEP(bufPtr, currentWorkingBlocks), bufPtr, currentWorkingBlocks);
[5618]843    currentWorkingBlocks = b->CreateAdd(currentWorkingBlocks, currentWorkingBlocks);
844    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
845        b->CallPrintInt("currentWorkingBlocks: ", currentWorkingBlocks);
846    }
847    b->CreateStore(currentWorkingBlocks, workingBlocksField);
[5612]848}
[5541]849
[5755]850inline StructType * getSourceBufferType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * const type, const unsigned MemoryAddressSpace) {
851    return StructType::get(b->getContext(), {resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), b->getSizeTy()});
852}
853
[5706]854SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
[5755]855: StreamSetBuffer(BufferKind::SourceBuffer, type, getSourceBufferType(b, type, MemoryAddressSpace), 0, 0, StructAddressSpace) {
[5706]856    mUniqueID = "B";
857    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
858        mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
859    }
860}
861
[5755]862ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, Value * addr, unsigned AddressSpace)
863: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, 0, AddressSpace) {
[5706]864    mUniqueID = "E";
865    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
866    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
867}
868
869CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
[5755]870: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, 0, AddressSpace) {
[5706]871    mUniqueID = "C" + std::to_string(bufferBlocks);
872    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
873}
874
[5755]875CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
876: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, overflowBlocks, AddressSpace) {
[5706]877
878}
879
880CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
[5755]881: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, overflowBlocks, AddressSpace) {
[5706]882    if (bufferBlocks < 2 * overflowBlocks) {
883        report_fatal_error("CircularCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
884    }
885    mUniqueID = "CC" + std::to_string(bufferBlocks);
886    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
887    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
888}
889
890ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
[5755]891: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, 0, AddressSpace)
[5706]892, mInitialCapacity(type->getArrayNumElements()) {
893    mUniqueID = "XP" + std::to_string(bufferBlocks);
894    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
895}
896
897SwizzledCopybackBuffer::SwizzledCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
[5755]898: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, overflowBlocks, AddressSpace), mFieldWidth(fieldwidth) {
[5706]899    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
900    if (bufferBlocks < 2 * overflowBlocks) {
901        report_fatal_error("SwizzledCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
902    }
[5755]903    if (overflowBlocks != 1) {
[5706]904        mUniqueID += "_" + std::to_string(mOverflowBlocks);
905    }
906    if (AddressSpace > 0) {
907        mUniqueID += "@" + std::to_string(AddressSpace);
908    }
909}
910
[5620]911inline StructType * getDynamicBufferStructType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * baseType, const unsigned addrSpace) {
912    IntegerType * sizeTy = b->getSizeTy();
913    PointerType * typePtr = baseType->getPointerTo(addrSpace);
[5733]914    return StructType::get(b->getContext(), {typePtr, typePtr, sizeTy, sizeTy, sizeTy, sizeTy, sizeTy});
[5620]915}
916
[5541]917DynamicBuffer::DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t initialCapacity, size_t overflow, unsigned swizzle, unsigned addrSpace)
[5755]918: StreamSetBuffer(BufferKind::DynamicBuffer, type, resolveStreamSetType(b, type), initialCapacity, overflow, addrSpace)
[5620]919, mBufferStructType(getDynamicBufferStructType(b, mType, addrSpace))
[5755]920, mSwizzleFactor(swizzle) {
[5641]921    if (initialCapacity * b->getBitBlockWidth() < 2 * overflow) {
922        report_fatal_error("DynamicBuffer: initialCapacity * b->getBitBlockWidth() < 2 * overflow");
923    }
[5541]924    mUniqueID = "DB";
925    if (swizzle != 1) {
926        mUniqueID += "s" + std::to_string(swizzle);
927    }
928        if (overflow != 0) {
929        mUniqueID += "o" + std::to_string(overflow);
930    }
931    if (addrSpace != 0) {
932        mUniqueID += "@" + std::to_string(addrSpace);
933    }
934}
935
936
[5755]937inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned OverflowBlocks, unsigned AddressSpace)
[5320]938: mBufferKind(k)
939, mType(resolvedType)
[5446]940, mBufferBlocks(BufferBlocks)
[5755]941, mOverflowBlocks(OverflowBlocks)
[5320]942, mAddressSpace(AddressSpace)
943, mStreamSetBufferPtr(nullptr)
[5408]944, mBaseType(baseType)
945, mProducer(nullptr) {
[5755]946    assert((k == BufferKind::SourceBuffer || k == BufferKind::ExternalBuffer) ^ (BufferBlocks > 0));
947    assert ("A zero length buffer cannot have overflow blocks!" && ((BufferBlocks > 0) || (OverflowBlocks == 0)));
[5320]948}
949
[5377]950StreamSetBuffer::~StreamSetBuffer() { }
951
[5320]952// Helper routines
[5436]953ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
[5320]954    unsigned numElements = 1;
955    if (LLVM_LIKELY(type->isArrayTy())) {
956        numElements = type->getArrayNumElements();
957        type = type->getArrayElementType();
958    }
959    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
960        type = type->getVectorElementType();
961        if (LLVM_LIKELY(type->isIntegerTy())) {
962            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
963            type = b->getBitBlockType();
964            if (fieldWidth != 1) {
965                type = ArrayType::get(type, fieldWidth);
[5307]966            }
[5320]967            return ArrayType::get(type, numElements);
[5307]968        }
969    }
970    std::string tmp;
971    raw_string_ostream out(tmp);
972    type->print(out);
973    out << " is an unvalid stream set buffer type.";
974    report_fatal_error(out.str());
975}
[5301]976
[5436]977StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
[5320]978    if (LLVM_LIKELY(type->isArrayTy())) {
979        type = type->getArrayElementType();
980    }
981    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
982        type = type->getVectorElementType();
983        if (LLVM_LIKELY(type->isIntegerTy())) {
984            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
985            type = b->getBitBlockType();
986            if (fieldWidth != 1) {
987                type = ArrayType::get(type, fieldWidth);
988            }
[5733]989            return StructType::get(b->getContext(), {b->getSizeTy(), type->getPointerTo()});
[5320]990        }
991    }
992    std::string tmp;
993    raw_string_ostream out(tmp);
994    type->print(out);
995    out << " is an unvalid stream set buffer type.";
996    report_fatal_error(out.str());
[5260]997}
Note: See TracBrowser for help on using the repository browser.