source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5381

Last change on this file since 5381 was 5381, checked in by nmedfort, 2 years ago

Bug fix for 32 bit.

File size: 27.4 KB
RevLine 
[5044]1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
[5260]6#include "streamset.h"
7#include <IR_Gen/idisa_builder.h>  // for IDISA_Builder
8#include <llvm/IR/BasicBlock.h>    // for BasicBlock
9#include <llvm/IR/Constants.h>     // for ConstantInt
10#include <llvm/IR/DataLayout.h>    // for DataLayout
11#include <llvm/IR/DerivedTypes.h>  // for IntegerType (ptr only), PointerType
12#include <llvm/IR/Module.h>        // for Module
13#include <llvm/IR/Value.h>         // for Value
[5307]14#include <llvm/Support/raw_ostream.h>
[5340]15#include <llvm/IR/CFG.h>
[5307]16
[5260]17namespace llvm { class Constant; }
18namespace llvm { class Function; }
[5191]19
[5100]20using namespace parabix;
[5260]21using namespace llvm;
22using namespace IDISA;
[5044]23
[5320]24ArrayType * resolveStreamSetType(IDISA_Builder * const b, Type * type);
25
26StructType * resolveExpandableStreamSetType(IDISA_Builder * const b, Type * type);
27
[5217]28void StreamSetBuffer::allocateBuffer() {
[5353]29    Type * const ty = getType();
30    ConstantInt * blocks = iBuilder->getSize(mBufferBlocks);
31    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
32    Constant * width = ConstantExpr::getMul(ConstantExpr::getSizeOf(ty), blocks);
33    iBuilder->CreateMemZero(mStreamSetBufferPtr, width, iBuilder->getCacheAlignment());
[5044]34}
35
[5340]36Value * StreamSetBuffer::getStreamBlockPtr(Value * self, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
[5353]37    iBuilder->CreateAssert(iBuilder->CreateICmpULT(streamIndex, getStreamSetCount(self)), "StreamSetBuffer: out-of-bounds stream access");
[5377]38    return iBuilder->CreateGEP(getStreamSetBlockPtr(getBaseAddress(self), blockIndex), {iBuilder->getInt32(0), streamIndex});
[5260]39}
40
[5340]41Value * StreamSetBuffer::getStreamPackPtr(Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
[5353]42    iBuilder->CreateAssert(iBuilder->CreateICmpULT(streamIndex, getStreamSetCount(self)), "StreamSetBuffer: out-of-bounds stream access");
[5377]43    return iBuilder->CreateGEP(getStreamSetBlockPtr(getBaseAddress(self), blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
[5260]44}
45
[5377]46inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
[5353]47    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
48        if (LLVM_LIKELY(cast<ConstantInt>(index)->getLimitedValue() < capacity)) {
49            return true;
50        }
51    }
52    return false;
53}
54
[5377]55Value * StreamSetBuffer::getStreamSetCount(Value *) const {
[5329]56    uint64_t count = 1;
57    if (isa<ArrayType>(mBaseType)) {
58        count = mBaseType->getArrayNumElements();
59    }
[5337]60    return iBuilder->getSize(count);
[5329]61}
62
[5377]63inline Value * StreamSetBuffer::modByBufferBlocks(Value * const offset) const {
[5353]64    assert (offset->getType()->isIntegerTy());
65    if (isCapacityGuaranteed(offset, mBufferBlocks)) {
66        return offset;
67    } else if (mBufferBlocks == 1) {
68        return ConstantInt::getNullValue(iBuilder->getSizeTy());
69    } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
70        return iBuilder->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
71    } else {
72        return iBuilder->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
73    }
74}
75
[5307]76/**
77 * @brief getRawItemPointer
78 *
79 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
80 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
81 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
82 */
83Value * StreamSetBuffer::getRawItemPointer(Value * self, Value * streamIndex, Value * absolutePosition) const {
[5377]84    Value * ptr = getBaseAddress(self);
[5317]85    if (isa<ConstantInt>(streamIndex) && cast<ConstantInt>(streamIndex)->isZero()) {
86        ptr = iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), streamIndex});
[5307]87    }
88    IntegerType * const ty = cast<IntegerType>(mBaseType->getArrayElementType()->getVectorElementType());
89    ptr = iBuilder->CreatePointerCast(ptr, ty->getPointerTo());
90    if (LLVM_UNLIKELY(ty->getBitWidth() < 8)) {
91        const auto bw = ty->getBitWidth();
92        if (LLVM_LIKELY((bw & (bw - 1)) == 0)) { // is power of 2
93            absolutePosition = iBuilder->CreateUDiv(absolutePosition, ConstantInt::get(absolutePosition->getType(), 8 / bw));
94        } else {
95            absolutePosition = iBuilder->CreateMul(absolutePosition, ConstantInt::get(absolutePosition->getType(), bw));
96            absolutePosition = iBuilder->CreateUDiv(absolutePosition, ConstantInt::get(absolutePosition->getType(), 8));
97        }
98    }
99    return iBuilder->CreateGEP(ptr, absolutePosition);
[5260]100}
101
[5377]102Value * StreamSetBuffer::getLinearlyAccessibleItems(Value * self, Value * fromPosition) const {
[5307]103    if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
[5301]104        Constant * stride = iBuilder->getSize(iBuilder->getStride());
105        return iBuilder->CreateSub(stride, iBuilder->CreateURem(fromPosition, stride));
[5377]106    } else {
[5301]107        Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
108        return iBuilder->CreateSub(bufSize, iBuilder->CreateURem(fromPosition, bufSize));
109    }
110}
111
[5377]112Value * StreamSetBuffer::getLinearlyAccessibleBlocks(Value * self, Value * fromBlock) const {
[5355]113    Constant * bufBlocks = iBuilder->getSize(mBufferBlocks);
114    return iBuilder->CreateSub(bufBlocks, iBuilder->CreateURem(fromBlock, bufBlocks));
115}
[5301]116
[5379]117void StreamSetBuffer::reserveBytes(Value * self, llvm::Value * position, llvm::Value *requested) const {
[5377]118    report_fatal_error("reserve() can only be used with ExtensibleBuffers");
119}
[5355]120
[5377]121Value * StreamSetBuffer::getBaseAddress(Value * self) const {
122    return self;
123}
124
[5185]125// Single Block Buffer
[5260]126
[5100]127// For a single block buffer, the block pointer is always the buffer base pointer.
[5317]128Value * SingleBlockBuffer::getStreamSetBlockPtr(Value * self, Value *) const {
[5276]129    return self;
[5044]130}
131
[5191]132// External File Buffer
[5377]133void ExternalFileBuffer::setStreamSetBuffer(Value * ptr) {
[5297]134    mStreamSetBufferPtr = iBuilder->CreatePointerBitCastOrAddrSpaceCast(ptr, getPointerType());
[5122]135}
136
[5217]137void ExternalFileBuffer::allocateBuffer() {
[5307]138    report_fatal_error("External buffers cannot be allocated.");
[5100]139}
140
[5377]141Value * ExternalFileBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
142    return iBuilder->CreateGEP(self, blockIndex);
[5100]143}
144
[5377]145Value * ExternalFileBuffer::getLinearlyAccessibleItems(Value * self, Value *) const {
[5329]146    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
[5301]147}
148
[5377]149// ExtensibleBuffer
150Value * ExtensibleBuffer::getLinearlyAccessibleItems(Value * self, Value * fromPosition) const {
151    Value * capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
152    Value * capacity = iBuilder->CreateLoad(capacityPtr);
153    return iBuilder->CreateSub(capacity, fromPosition);
154}
155
156void ExtensibleBuffer::allocateBuffer() {
157    Type * ty = getType();
158    Value * instance = iBuilder->CreateCacheAlignedAlloca(ty);
159    Value * const capacityPtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
[5381]160    Constant * initialSize = ConstantExpr::getSizeOf(ty->getStructElementType(1)->getPointerElementType());
161    initialSize = ConstantExpr::getMul(initialSize, iBuilder->getSize(mBufferBlocks));
162    initialSize = ConstantExpr::getIntegerCast(initialSize, iBuilder->getSizeTy(), false);
[5379]163    iBuilder->CreateStore(initialSize, capacityPtr);
164    Value * addr = iBuilder->CreateAlignedMalloc(initialSize, iBuilder->getCacheAlignment());
165    iBuilder->CreateMemZero(addr, initialSize, iBuilder->getCacheAlignment());
[5377]166    Value * const addrPtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
167    addr = iBuilder->CreatePointerCast(addr, addrPtr->getType()->getPointerElementType());
168    iBuilder->CreateStore(addr, addrPtr);
169    mStreamSetBufferPtr = instance;
170}
171
172Value * ExtensibleBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
173    return iBuilder->CreateGEP(self, blockIndex);
174}
175
[5379]176void ExtensibleBuffer::reserveBytes(Value * const self, llvm::Value * const position, llvm::Value * const requested) const {
[5377]177    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
[5379]178    Value * const currentSize = iBuilder->CreateLoad(capacityPtr);
179    Type * const intTy = currentSize->getType();
180    assert (position->getType() == requested->getType());
181    Constant * const blockSize = ConstantExpr::getIntegerCast(ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(getType()->getStructElementType(1)), intTy, false), requested->getType(), false);
[5377]182    BasicBlock * const entry = iBuilder->GetInsertBlock();
183    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
184    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
[5379]185    Value * const reserved = iBuilder->CreateAdd(iBuilder->CreateMul(position, blockSize), requested);
186    iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpULT(reserved, currentSize), resume, expand);
[5377]187
188    iBuilder->SetInsertPoint(expand);
[5379]189    Value * const reservedSize = iBuilder->CreateShl(reserved, 1);
[5377]190    Value * newAddr = iBuilder->CreateAlignedMalloc(reservedSize, iBuilder->getCacheAlignment());
191    Value * const baseAddrPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
192    Value * const baseAddr = iBuilder->CreateLoad(baseAddrPtr);
193    iBuilder->CreateMemCpy(newAddr, baseAddr, currentSize, iBuilder->getCacheAlignment());
194    iBuilder->CreateAlignedFree(baseAddr);
[5379]195    Value * const remainingSize = iBuilder->CreateSub(reservedSize, currentSize);
196    iBuilder->CreateMemZero(iBuilder->CreateGEP(newAddr, currentSize), remainingSize, iBuilder->getBitBlockWidth() / 8);
[5377]197    newAddr = iBuilder->CreatePointerCast(newAddr, baseAddr->getType());
[5379]198    iBuilder->CreateStore(reservedSize, capacityPtr);
[5377]199    iBuilder->CreateStore(newAddr, baseAddrPtr);
200    iBuilder->CreateBr(resume);
201
202    iBuilder->SetInsertPoint(resume);
203}
204
[5379]205Value * ExtensibleBuffer::getBaseAddress(Value * const self) const {
[5377]206    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
207}
208
[5260]209// Circular Buffer
[5100]210
[5379]211Value * CircularBuffer::getStreamSetBlockPtr(Value * const self, Value * const blockIndex) const {
[5353]212    return iBuilder->CreateGEP(self, modByBufferBlocks(blockIndex));
[5100]213}
214
[5301]215// CircularCopybackBuffer Buffer
216
217void CircularCopybackBuffer::allocateBuffer() {
218    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
219}
220
[5303]221void CircularCopybackBuffer::createCopyBack(Value * self, Value * overFlowItems) const {
[5355]222    Type * size_ty = iBuilder->getSizeTy();
223    Type * i8ptr = iBuilder->getInt8PtrTy();
224    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
[5303]225    Function * f = iBuilder->GetInsertBlock()->getParent();
226    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
227    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
228    BasicBlock * copyBackDone = BasicBlock::Create(iBuilder->getContext(), "copyBackDone", f, 0);
229    unsigned numStreams = getType()->getArrayNumElements();
230    auto elemTy = getType()->getArrayElementType();
231    unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
232    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
233    Value * overFlowBlocks = iBuilder->CreateUDiv(overFlowItems, blockSize);
234    Value * partialItems = iBuilder->CreateURem(overFlowItems, blockSize);
[5355]235    Value * partialBlockTargetPtr = iBuilder->CreateGEP(self, overFlowBlocks);
236    Value * partialBlockSourcePtr = iBuilder->CreateGEP(overFlowAreaPtr, overFlowBlocks);
[5303]237    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(overFlowBlocks, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
238    iBuilder->SetInsertPoint(wholeBlockCopy);
239    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
[5355]240    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, size_ty), iBuilder->CreatePtrToInt(self, size_ty));
[5303]241    iBuilder->CreateMemMove(iBuilder->CreateBitCast(self, i8ptr), iBuilder->CreateBitCast(overFlowAreaPtr, i8ptr), copyLength, alignment);
242    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyBackDone);
243    iBuilder->SetInsertPoint(partialBlockCopy);
244    Value * copyBits = iBuilder->CreateMul(overFlowItems, iBuilder->getSize(fieldWidth));
[5317]245    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
[5303]246    for (unsigned strm = 0; strm < numStreams; strm++) {
247        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
248        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
249        iBuilder->CreateMemMove(iBuilder->CreateBitCast(strmTargetPtr, i8ptr), iBuilder->CreateBitCast(strmSourcePtr, i8ptr), copyBytes, alignment);
250    }
251    iBuilder->CreateBr(copyBackDone);
252    iBuilder->SetInsertPoint(copyBackDone);
[5301]253}
254
[5317]255Value * CircularCopybackBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
[5353]256    return iBuilder->CreateGEP(self, modByBufferBlocks(blockIndex));
[5100]257}
258
[5355]259// SwizzledCopybackBuffer Buffer
260
261void SwizzledCopybackBuffer::allocateBuffer() {
262    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
263}
264
265void SwizzledCopybackBuffer::createCopyBack(Value * self, Value * overFlowItems) const {
266    Type * size_ty = iBuilder->getSizeTy();
267    Type * i8ptr = iBuilder->getInt8PtrTy();
268    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
269    Function * f = iBuilder->GetInsertBlock()->getParent();
270    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
271    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
272    BasicBlock * copyBackDone = BasicBlock::Create(iBuilder->getContext(), "copyBackDone", f, 0);
273    unsigned numStreams = getType()->getArrayNumElements();
274    unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
275    auto elemTy = getType()->getArrayElementType();
276    unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
277    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
278    Value * overFlowBlocks = iBuilder->CreateUDiv(overFlowItems, blockSize);
279    Value * partialItems = iBuilder->CreateURem(overFlowItems, blockSize);
280    Value * partialBlockTargetPtr = iBuilder->CreateGEP(self, overFlowBlocks);
281    Value * partialBlockSourcePtr = iBuilder->CreateGEP(overFlowAreaPtr, overFlowBlocks);
282    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(overFlowBlocks, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
283    iBuilder->SetInsertPoint(wholeBlockCopy);
284    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
285    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, size_ty), iBuilder->CreatePtrToInt(self, size_ty));
286    iBuilder->CreateMemMove(iBuilder->CreateBitCast(self, i8ptr), iBuilder->CreateBitCast(overFlowAreaPtr, i8ptr), copyLength, alignment);
287    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyBackDone);
288    iBuilder->SetInsertPoint(partialBlockCopy);
289    Value * copyBits = iBuilder->CreateMul(overFlowItems, iBuilder->getSize(fieldWidth * swizzleFactor));
290    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
291    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
292        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
293        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
294        iBuilder->CreateMemMove(iBuilder->CreateBitCast(strmTargetPtr, i8ptr), iBuilder->CreateBitCast(strmSourcePtr, i8ptr), copyBytes, alignment);
295    }
296    iBuilder->CreateBr(copyBackDone);
297    iBuilder->SetInsertPoint(copyBackDone);
298}
299
300Value * SwizzledCopybackBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
[5377]301    return iBuilder->CreateGEP(self, modByBufferBlocks(blockIndex));
[5355]302}
303
[5377]304SwizzledCopybackBuffer::SwizzledCopybackBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
[5355]305: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
306   
307}
308
[5260]309// Expandable Buffer
310
[5320]311void ExpandableBuffer::allocateBuffer() {
312    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType());
313    Value * const capacityPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
314    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
315    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
[5361]316    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
317    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
318    Value * const ptr = iBuilder->CreateAlignedMalloc(size, iBuilder->getCacheAlignment());
319    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
[5320]320    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
[5361]321    iBuilder->CreateStore(iBuilder->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
[5260]322}
323
[5377]324std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
[5340]325
[5320]326    // ENTRY
327    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
328    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
329    Value * const streamSetPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
330    Value * const streamSet = iBuilder->CreateLoad(streamSetPtr);
[5353]331    blockIndex = modByBufferBlocks(blockIndex);
[5311]332
[5353]333    assert (streamIndex->getType() == capacity->getType());
334    Value * const cond = iBuilder->CreateICmpULT(streamIndex, capacity);
335
[5320]336    // Are we guaranteed that we can access this stream?
[5353]337    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
338        iBuilder->CreateAssert(cond, "ExpandableBuffer: out-of-bounds stream access");
339        Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, capacity), streamIndex);
340        return {streamSet, offset};
[5320]341    }
[5260]342
[5320]343    BasicBlock * const entry = iBuilder->GetInsertBlock();
344    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
345    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
[5311]346
[5353]347    iBuilder->CreateLikelyCondBr(cond, resume, expand);
348
[5320]349    // EXPAND
350    iBuilder->SetInsertPoint(expand);
[5353]351
352    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
353    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
354
[5361]355    Value * newCapacity = iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1));
356    newCapacity = iBuilder->CreateCeilLog2(newCapacity);
357    newCapacity = iBuilder->CreateShl(iBuilder->getSize(1), newCapacity, "newCapacity");
358
[5353]359    std::string tmp;
360    raw_string_ostream out(tmp);
361    out << "__expand";
362    elementType->print(out);
363    std::string name = out.str();
364
365    Module * const m = iBuilder->getModule();
366    Function * expandFunction = m->getFunction(name);
367
368    if (expandFunction == nullptr) {
369
370        const auto ip = iBuilder->saveIP();
371
372        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), iBuilder->getSizeTy(), iBuilder->getSizeTy()}, false);
373        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
374
375        auto args = expandFunction->arg_begin();
376        Value * streamSet = &*args++;
377        Value * capacity = &*args++;
378        Value * newCapacity = &*args;
379
380        BasicBlock * entry = BasicBlock::Create(iBuilder->getContext(), "entry", expandFunction);
381        iBuilder->SetInsertPoint(entry);
382
383        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
[5361]384        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), iBuilder->getCacheAlignment()), elementType->getPointerTo());
[5353]385        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
386
387        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
388        for (unsigned i = 0; i < mBufferBlocks; ++i) {
389            ConstantInt * const offset = iBuilder->getSize(i);
390            Value * srcOffset = iBuilder->CreateMul(capacity, offset);
391            Value * srcPtr = iBuilder->CreateGEP(streamSet, srcOffset);
392            Value * destOffset = iBuilder->CreateMul(newCapacity, offset);
393            Value * destPtr = iBuilder->CreateGEP(newStreamSet, destOffset);
394            iBuilder->CreateMemCpy(destPtr, srcPtr, iBuilder->CreateMul(capacity, vectorWidth), alignment);
395            Value * destZeroOffset = iBuilder->CreateAdd(destOffset, capacity);
396            Value * destZeroPtr = iBuilder->CreateGEP(newStreamSet, destZeroOffset);
397            iBuilder->CreateMemZero(destZeroPtr, diffCapacity, alignment);
398        }
399
400        iBuilder->CreateAlignedFree(streamSet);
401
402        iBuilder->CreateRet(newStreamSet);
403
404        iBuilder->restoreIP(ip);
[5320]405    }
[5311]406
[5353]407    Value * newStreamSet = iBuilder->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
408    iBuilder->CreateStore(newStreamSet, streamSetPtr);
409    iBuilder->CreateStore(newCapacity, capacityPtr);
410
[5320]411    iBuilder->CreateBr(resume);
[5353]412
[5320]413    // RESUME
414    iBuilder->SetInsertPoint(resume);
415
416    PHINode * phiStreamSet = iBuilder->CreatePHI(streamSet->getType(), 2);
417    phiStreamSet->addIncoming(streamSet, entry);
418    phiStreamSet->addIncoming(newStreamSet, expand);
419
420    PHINode * phiCapacity = iBuilder->CreatePHI(capacity->getType(), 2);
421    phiCapacity->addIncoming(capacity, entry);
422    phiCapacity->addIncoming(newCapacity, expand);
423
424    Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, phiCapacity), streamIndex);
425
426    return {phiStreamSet, offset};
[5260]427}
428
[5377]429Value * ExpandableBuffer::getStreamBlockPtr(Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
[5320]430    Value * ptr, * offset;
[5340]431    std::tie(ptr, offset) = getInternalStreamBuffer(self, streamIndex, blockIndex, readOnly);
[5320]432    return iBuilder->CreateGEP(ptr, offset);
433}
434
[5377]435Value * ExpandableBuffer::getStreamPackPtr(Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
[5320]436    Value * ptr, * offset;
[5340]437    std::tie(ptr, offset) = getInternalStreamBuffer(self, streamIndex, blockIndex, readOnly);
[5320]438    return iBuilder->CreateGEP(ptr, {offset, packIndex});
439}
440
[5377]441Value * ExpandableBuffer::getStreamSetCount(Value * self) const {
[5329]442    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
443}
444
[5377]445Value * ExpandableBuffer::getBaseAddress(Value * self) const {
446    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
447}
448
[5317]449Value * ExpandableBuffer::getStreamSetBlockPtr(Value *, Value *) const {
[5329]450    report_fatal_error("Expandable buffers: getStreamSetBlockPtr is not supported.");
[5316]451}
452
[5377]453Value * ExpandableBuffer::getLinearlyAccessibleItems(Value * self, Value *) const {
[5320]454    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
[5311]455}
456
[5260]457// Constructors
[5377]458SingleBlockBuffer::SingleBlockBuffer(IDISA::IDISA_Builder * b, Type * type)
[5320]459: StreamSetBuffer(BufferKind::BlockBuffer, b, type, resolveStreamSetType(b, type), 1, 0) {
[5260]460
461}
462
[5377]463ExternalFileBuffer::ExternalFileBuffer(IDISA::IDISA_Builder * b, Type * type, unsigned AddressSpace)
[5320]464: StreamSetBuffer(BufferKind::ExternalFileBuffer, b, type, resolveStreamSetType(b, type), 0, AddressSpace) {
[5260]465
466}
467
[5377]468ExtensibleBuffer::ExtensibleBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
469: StreamSetBuffer(BufferKind::ExtensibleBuffer, b, type, StructType::get(b->getSizeTy(), resolveStreamSetType(b, type)->getPointerTo(), nullptr), bufferBlocks, AddressSpace) {
470
471}
472
473CircularBuffer::CircularBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
[5320]474: StreamSetBuffer(BufferKind::CircularBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
[5260]475
476}
477
[5377]478CircularCopybackBuffer::CircularCopybackBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
[5320]479: StreamSetBuffer(BufferKind::CircularCopybackBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks) {
[5260]480
481}
482
[5377]483ExpandableBuffer::ExpandableBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
[5320]484: StreamSetBuffer(BufferKind::ExpandableBuffer, b, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
485, mInitialCapacity(type->getArrayNumElements()) {
[5260]486
487}
488
[5320]489inline StreamSetBuffer::StreamSetBuffer(BufferKind k, IDISA::IDISA_Builder * b, Type * baseType, Type * resolvedType, unsigned blocks, unsigned AddressSpace)
490: mBufferKind(k)
491, iBuilder(b)
492, mType(resolvedType)
493, mBufferBlocks(blocks)
494, mAddressSpace(AddressSpace)
495, mStreamSetBufferPtr(nullptr)
496, mBaseType(baseType) {
497
498}
499
[5377]500StreamSetBuffer::~StreamSetBuffer() { }
501
[5320]502// Helper routines
503ArrayType * resolveStreamSetType(IDISA_Builder * const b, Type * type) {
504    unsigned numElements = 1;
505    if (LLVM_LIKELY(type->isArrayTy())) {
506        numElements = type->getArrayNumElements();
507        type = type->getArrayElementType();
508    }
509    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
510        type = type->getVectorElementType();
511        if (LLVM_LIKELY(type->isIntegerTy())) {
512            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
513            type = b->getBitBlockType();
514            if (fieldWidth != 1) {
515                type = ArrayType::get(type, fieldWidth);
[5307]516            }
[5320]517            return ArrayType::get(type, numElements);
[5307]518        }
519    }
520    std::string tmp;
521    raw_string_ostream out(tmp);
522    type->print(out);
523    out << " is an unvalid stream set buffer type.";
524    report_fatal_error(out.str());
525}
[5301]526
[5320]527StructType * resolveExpandableStreamSetType(IDISA_Builder * const b, Type * type) {
528    if (LLVM_LIKELY(type->isArrayTy())) {
529        type = type->getArrayElementType();
530    }
531    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
532        type = type->getVectorElementType();
533        if (LLVM_LIKELY(type->isIntegerTy())) {
534            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
535            type = b->getBitBlockType();
536            if (fieldWidth != 1) {
537                type = ArrayType::get(type, fieldWidth);
538            }
539            return StructType::get(b->getSizeTy(), type->getPointerTo(), nullptr);
540        }
541    }
542    std::string tmp;
543    raw_string_ostream out(tmp);
544    type->print(out);
545    out << " is an unvalid stream set buffer type.";
546    report_fatal_error(out.str());
[5260]547}
Note: See TracBrowser for help on using the repository browser.