source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5436

Last change on this file since 5436 was 5436, checked in by nmedfort, 2 years ago

Continued refactoring work. PabloKernel? now abstract base type with a 'generatePabloMethod' hook to generate Pablo code.

File size: 30.1 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <llvm/IR/Module.h>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/kernel.h>
10#include <kernels/kernel_builder.h>
11#include <toolchain/toolchain.h>
12
13namespace llvm { class Constant; }
14namespace llvm { class Function; }
15
16using namespace parabix;
17using namespace llvm;
18using namespace IDISA;
19
20ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
21
22StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
23
24void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
25    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
26        Type * const ty = getType();
27        mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
28        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
29    } else {
30        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
31    }
32}
33
34Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
35    iBuilder->CreateAssert(iBuilder->CreateICmpULT(streamIndex, getStreamSetCount(iBuilder, self)), "StreamSetBuffer: out-of-bounds stream access");
36    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex});
37}
38
39Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
40    iBuilder->CreateAssert(iBuilder->CreateICmpULT(streamIndex, getStreamSetCount(iBuilder, self)), "StreamSetBuffer: out-of-bounds stream access");
41    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
42}
43
44void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, Value * /* addr */) const {
45    report_fatal_error("setBaseAddress is not supported by this buffer type");
46}
47
48Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
49    report_fatal_error("getBufferedSize is not supported by this buffer type");
50}
51
52void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* size */) const {
53    report_fatal_error("setBufferedSize is not supported by this buffer type");
54}
55
56inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
57    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
58        if (LLVM_LIKELY(cast<ConstantInt>(index)->getLimitedValue() < capacity)) {
59            return true;
60        }
61    }
62    return false;
63}
64
65Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value *) const {
66    uint64_t count = 1;
67    if (isa<ArrayType>(mBaseType)) {
68        count = mBaseType->getArrayNumElements();
69    }
70    return iBuilder->getSize(count);
71}
72
73inline Value * StreamSetBuffer::modByBufferBlocks(IDISA::IDISA_Builder * const iBuilder, Value * const offset) const {
74    assert (offset->getType()->isIntegerTy());
75    if (isCapacityGuaranteed(offset, mBufferBlocks)) {
76        return offset;
77    } else if (mBufferBlocks == 1) {
78        return ConstantInt::getNullValue(iBuilder->getSizeTy());
79    } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
80        return iBuilder->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
81    } else {
82        return iBuilder->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
83    }
84}
85
86/**
87 * @brief getRawItemPointer
88 *
89 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
90 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
91 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
92 */
93Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
94    Value * ptr = getBaseAddress(iBuilder, self);
95    if (!isa<ConstantInt>(streamIndex) || !cast<ConstantInt>(streamIndex)->isZero()) {
96        ptr = iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), streamIndex});
97    }
98    IntegerType * const ty = cast<IntegerType>(mBaseType->getArrayElementType()->getVectorElementType());
99    ptr = iBuilder->CreatePointerCast(ptr, ty->getPointerTo());
100    if (LLVM_UNLIKELY(ty->getBitWidth() < 8)) {
101        const auto bw = ty->getBitWidth();
102        if (LLVM_LIKELY((bw & (bw - 1)) == 0)) { // is power of 2
103            absolutePosition = iBuilder->CreateUDiv(absolutePosition, ConstantInt::get(absolutePosition->getType(), 8 / bw));
104        } else {
105            absolutePosition = iBuilder->CreateMul(absolutePosition, ConstantInt::get(absolutePosition->getType(), bw));
106            absolutePosition = iBuilder->CreateUDiv(absolutePosition, ConstantInt::get(absolutePosition->getType(), 8));
107        }
108    }
109    return iBuilder->CreateGEP(ptr, absolutePosition);
110}
111
112Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, Value * fromPosition) const {
113    if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
114        Constant * stride = iBuilder->getSize(iBuilder->getStride());
115        return iBuilder->CreateSub(stride, iBuilder->CreateURem(fromPosition, stride));
116    } else {
117        Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
118        return iBuilder->CreateSub(bufSize, iBuilder->CreateURem(fromPosition, bufSize));
119    }
120}
121
122Value * StreamSetBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, Value * fromBlock) const {
123    Constant * bufBlocks = iBuilder->getSize(mBufferBlocks);
124    return iBuilder->CreateSub(bufBlocks, iBuilder->CreateURem(fromBlock, bufBlocks));
125}
126
127Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition) const {
128    return getLinearlyAccessibleItems(iBuilder, self, fromPosition);
129}
130
131Value * StreamSetBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock) const {
132    return getLinearlyAccessibleBlocks(iBuilder, self, fromBlock);
133}
134
135Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const /* iBuilder */, Value * self) const {
136    return self;
137}
138
139void StreamSetBuffer::releaseBuffer(IDISA::IDISA_Builder * const /* iBuilder */, Value * /* self */) const {
140    /* do nothing: memory is stack allocated */
141}
142
143void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
144    Type * size_ty = iBuilder->getSizeTy();
145    Type * i8ptr = iBuilder->getInt8PtrTy();
146    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
147    Function * f = iBuilder->GetInsertBlock()->getParent();
148    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
149    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
150    BasicBlock * copyDone = BasicBlock::Create(iBuilder->getContext(), "copyDone", f, 0);
151    unsigned numStreams = getType()->getArrayNumElements();
152    auto elemTy = getType()->getArrayElementType();
153    unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
154    Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
155    Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
156    Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
157    Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
158    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(blocksToCopy, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
159    iBuilder->SetInsertPoint(wholeBlockCopy);
160    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
161    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, size_ty), iBuilder->CreatePtrToInt(targetBlockPtr, size_ty));
162    iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), copyLength, alignment);
163    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyDone);
164    iBuilder->SetInsertPoint(partialBlockCopy);
165    Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth));
166    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
167    for (unsigned strm = 0; strm < numStreams; strm++) {
168        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
169        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
170        iBuilder->CreateMemMove(iBuilder->CreateBitCast(strmTargetPtr, i8ptr), iBuilder->CreateBitCast(strmSourcePtr, i8ptr), copyBytes, alignment);
171    }
172    iBuilder->CreateBr(copyDone);
173    iBuilder->SetInsertPoint(copyDone);
174}
175
176
177
178// Single Block Buffer
179
180// For a single block buffer, the block pointer is always the buffer base pointer.
181Value * SingleBlockBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const, Value * self, Value *) const {
182    return self;
183}
184
185// Source File Buffer
186Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
187    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
188    return iBuilder->CreateLoad(ptr);
189}
190
191void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * size) const {
192    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
193    iBuilder->CreateStore(size, ptr);
194}
195
196void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * addr) const {
197    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
198    iBuilder->CreateStore(iBuilder->CreatePointerCast(addr, ptr->getType()->getPointerElementType()), ptr);
199}
200
201Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * const self) const {
202    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
203    Value * const addr = iBuilder->CreateLoad(ptr);
204    return addr;
205}
206
207Value * SourceBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
208    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
209}
210
211Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value *) const {
212    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
213}
214
215// External File Buffer
216void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
217    report_fatal_error("External buffers cannot be allocated.");
218}
219
220Value * ExternalBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
221    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
222}
223
224Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value *, Value *) const {
225    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
226}
227
228// Circular Buffer
229Value * CircularBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * const self, Value * const blockIndex) const {
230    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
231}
232
233// CircularCopybackBuffer Buffer
234void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
235    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
236}
237
238void CircularCopybackBuffer::createCopyBack(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * overFlowItems) const {
239    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
240    createBlockAlignedCopy(iBuilder, self, overFlowAreaPtr, overFlowItems);
241}
242
243Value * CircularCopybackBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
244    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
245}
246
247Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition) const {
248    return iBuilder->CreateAdd(getLinearlyAccessibleItems(iBuilder, self, fromPosition), iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
249}
250
251Value * CircularCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock) const {
252    return iBuilder->CreateAdd(getLinearlyAccessibleBlocks(iBuilder, self, fromBlock), iBuilder->getSize(mOverflowBlocks));
253}
254
255// SwizzledCopybackBuffer Buffer
256
257void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
258    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
259}
260
261void SwizzledCopybackBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
262    Type * size_ty = iBuilder->getSizeTy();
263    Type * i8ptr = iBuilder->getInt8PtrTy();
264    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
265    Function * f = iBuilder->GetInsertBlock()->getParent();
266    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
267    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
268    BasicBlock * copyDone = BasicBlock::Create(iBuilder->getContext(), "copyDone", f, 0);
269    unsigned numStreams = getType()->getArrayNumElements();
270    unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
271    auto elemTy = getType()->getArrayElementType();
272    unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
273    Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
274    Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
275    Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
276    Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
277    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(blocksToCopy, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
278    iBuilder->SetInsertPoint(wholeBlockCopy);
279    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
280    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, size_ty), iBuilder->CreatePtrToInt(targetBlockPtr, size_ty));
281    iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), copyLength, alignment);
282    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyDone);
283    iBuilder->SetInsertPoint(partialBlockCopy);
284    Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth * swizzleFactor));
285    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
286    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
287        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
288        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
289        iBuilder->CreateMemMove(iBuilder->CreateBitCast(strmTargetPtr, i8ptr), iBuilder->CreateBitCast(strmSourcePtr, i8ptr), copyBytes, alignment);
290    }
291    iBuilder->CreateBr(copyDone);
292    iBuilder->SetInsertPoint(copyDone);
293}
294
295void SwizzledCopybackBuffer::createCopyBack(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * overFlowItems) const {
296    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
297    createBlockAlignedCopy(iBuilder, self, overFlowAreaPtr, overFlowItems);
298}
299
300Value * SwizzledCopybackBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
301    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
302}
303
304Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition) const {
305    return iBuilder->CreateAdd(getLinearlyAccessibleItems(iBuilder, self, fromPosition), iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
306}
307
308Value * SwizzledCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock) const {
309    return iBuilder->CreateAdd(getLinearlyAccessibleBlocks(iBuilder, self, fromBlock), iBuilder->getSize(mOverflowBlocks));
310}
311
312// Expandable Buffer
313
314void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
315    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType());
316    Value * const capacityPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
317    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
318    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
319    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
320    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
321    Value * const ptr = iBuilder->CreateAlignedMalloc(size, iBuilder->getCacheAlignment());
322    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
323    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
324    iBuilder->CreateStore(iBuilder->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
325}
326
327std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
328
329    // ENTRY
330    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
331    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
332    Value * const streamSetPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
333    Value * const streamSet = iBuilder->CreateLoad(streamSetPtr);
334    blockIndex = modByBufferBlocks(iBuilder, blockIndex);
335
336    assert (streamIndex->getType() == capacity->getType());
337    Value * const cond = iBuilder->CreateICmpULT(streamIndex, capacity);
338
339    // Are we guaranteed that we can access this stream?
340    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
341        iBuilder->CreateAssert(cond, "ExpandableBuffer: out-of-bounds stream access");
342        Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, capacity), streamIndex);
343        return {streamSet, offset};
344    }
345
346    BasicBlock * const entry = iBuilder->GetInsertBlock();
347    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
348    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
349
350    iBuilder->CreateLikelyCondBr(cond, resume, expand);
351
352    // EXPAND
353    iBuilder->SetInsertPoint(expand);
354
355    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
356    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
357
358    Value * newCapacity = iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1));
359    newCapacity = iBuilder->CreateCeilLog2(newCapacity);
360    newCapacity = iBuilder->CreateShl(iBuilder->getSize(1), newCapacity, "newCapacity");
361
362    std::string tmp;
363    raw_string_ostream out(tmp);
364    out << "__expand";
365    elementType->print(out);
366    std::string name = out.str();
367
368    Module * const m = iBuilder->getModule();
369    Function * expandFunction = m->getFunction(name);
370
371    if (expandFunction == nullptr) {
372
373        const auto ip = iBuilder->saveIP();
374
375        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), iBuilder->getSizeTy(), iBuilder->getSizeTy()}, false);
376        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
377
378        auto args = expandFunction->arg_begin();
379        Value * streamSet = &*args++;
380        Value * capacity = &*args++;
381        Value * newCapacity = &*args;
382
383        BasicBlock * entry = BasicBlock::Create(iBuilder->getContext(), "entry", expandFunction);
384        iBuilder->SetInsertPoint(entry);
385
386        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
387        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), iBuilder->getCacheAlignment()), elementType->getPointerTo());
388        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
389
390        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
391        for (unsigned i = 0; i < mBufferBlocks; ++i) {
392            ConstantInt * const offset = iBuilder->getSize(i);
393            Value * srcOffset = iBuilder->CreateMul(capacity, offset);
394            Value * srcPtr = iBuilder->CreateGEP(streamSet, srcOffset);
395            Value * destOffset = iBuilder->CreateMul(newCapacity, offset);
396            Value * destPtr = iBuilder->CreateGEP(newStreamSet, destOffset);
397            iBuilder->CreateMemCpy(destPtr, srcPtr, iBuilder->CreateMul(capacity, vectorWidth), alignment);
398            Value * destZeroOffset = iBuilder->CreateAdd(destOffset, capacity);
399            Value * destZeroPtr = iBuilder->CreateGEP(newStreamSet, destZeroOffset);
400            iBuilder->CreateMemZero(destZeroPtr, diffCapacity, alignment);
401        }
402
403        iBuilder->CreateAlignedFree(streamSet);
404
405        iBuilder->CreateRet(newStreamSet);
406
407        iBuilder->restoreIP(ip);
408    }
409
410    Value * newStreamSet = iBuilder->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
411    iBuilder->CreateStore(newStreamSet, streamSetPtr);
412    iBuilder->CreateStore(newCapacity, capacityPtr);
413
414    iBuilder->CreateBr(resume);
415
416    // RESUME
417    iBuilder->SetInsertPoint(resume);
418
419    PHINode * phiStreamSet = iBuilder->CreatePHI(streamSet->getType(), 2);
420    phiStreamSet->addIncoming(streamSet, entry);
421    phiStreamSet->addIncoming(newStreamSet, expand);
422
423    PHINode * phiCapacity = iBuilder->CreatePHI(capacity->getType(), 2);
424    phiCapacity->addIncoming(capacity, entry);
425    phiCapacity->addIncoming(newCapacity, expand);
426
427    Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, phiCapacity), streamIndex);
428
429    return {phiStreamSet, offset};
430}
431
432Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
433    Value * ptr, * offset;
434    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
435    return iBuilder->CreateGEP(ptr, offset);
436}
437
438Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
439    Value * ptr, * offset;
440    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
441    return iBuilder->CreateGEP(ptr, {offset, packIndex});
442}
443
444Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
445    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
446}
447
448Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
449    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
450}
451
452void ExpandableBuffer::releaseBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
453    iBuilder->CreateAlignedFree(getBaseAddress(iBuilder, self));
454}
455
456Value * ExpandableBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value *, Value *) const {
457    report_fatal_error("Expandable buffers: getStreamSetBlockPtr is not supported.");
458}
459
460Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value *) const {
461    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
462}
463
464// Constructors
465SingleBlockBuffer::SingleBlockBuffer(const std::unique_ptr<kernel::KernelBuilder> &  b, Type * type)
466: StreamSetBuffer(BufferKind::BlockBuffer, type, resolveStreamSetType(b, type), 1, 0) {
467    mUniqueID = "S";
468
469}
470
471SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned AddressSpace)
472: StreamSetBuffer(BufferKind::SourceBuffer, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(), b->getSizeTy(), nullptr), 0, AddressSpace) {
473    mUniqueID = "M"; // + std::to_string(bufferBlocks);
474    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
475}
476
477ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, llvm::Value * addr, unsigned AddressSpace)
478: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, AddressSpace) {
479    mUniqueID = "E";
480    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
481    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
482}
483
484CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
485: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
486    mUniqueID = "C" + std::to_string(bufferBlocks);
487    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
488}
489
490CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
491: StreamSetBuffer(BufferKind::CircularCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks) {
492    mUniqueID = "CC" + std::to_string(bufferBlocks);
493    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
494    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
495}
496
497ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
498: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
499, mInitialCapacity(type->getArrayNumElements()) {
500    mUniqueID = "XP" + std::to_string(bufferBlocks);
501    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
502}
503
504SwizzledCopybackBuffer::SwizzledCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
505: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
506    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
507    if (mOverflowBlocks != 1) {
508        mUniqueID += "_" + std::to_string(mOverflowBlocks);
509    }
510    if (AddressSpace > 0) {
511        mUniqueID += "@" + std::to_string(AddressSpace);
512    }
513}
514
515inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned blocks, unsigned AddressSpace)
516: mBufferKind(k)
517, mType(resolvedType)
518, mBufferBlocks(blocks)
519, mAddressSpace(AddressSpace)
520, mStreamSetBufferPtr(nullptr)
521, mBaseType(baseType)
522, mProducer(nullptr) {
523
524}
525
526StreamSetBuffer::~StreamSetBuffer() { }
527
528// Helper routines
529ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
530    unsigned numElements = 1;
531    if (LLVM_LIKELY(type->isArrayTy())) {
532        numElements = type->getArrayNumElements();
533        type = type->getArrayElementType();
534    }
535    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
536        type = type->getVectorElementType();
537        if (LLVM_LIKELY(type->isIntegerTy())) {
538            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
539            type = b->getBitBlockType();
540            if (fieldWidth != 1) {
541                type = ArrayType::get(type, fieldWidth);
542            }
543            return ArrayType::get(type, numElements);
544        }
545    }
546    std::string tmp;
547    raw_string_ostream out(tmp);
548    type->print(out);
549    out << " is an unvalid stream set buffer type.";
550    report_fatal_error(out.str());
551}
552
553StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
554    if (LLVM_LIKELY(type->isArrayTy())) {
555        type = type->getArrayElementType();
556    }
557    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
558        type = type->getVectorElementType();
559        if (LLVM_LIKELY(type->isIntegerTy())) {
560            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
561            type = b->getBitBlockType();
562            if (fieldWidth != 1) {
563                type = ArrayType::get(type, fieldWidth);
564            }
565            return StructType::get(b->getSizeTy(), type->getPointerTo(), nullptr);
566        }
567    }
568    std::string tmp;
569    raw_string_ostream out(tmp);
570    type->print(out);
571    out << " is an unvalid stream set buffer type.";
572    report_fatal_error(out.str());
573}
Note: See TracBrowser for help on using the repository browser.