source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5486

Last change on this file since 5486 was 5486, checked in by nmedfort, 2 years ago

Initial attempt to improve debugging capabilities with compilation stack traces on error.

File size: 31.9 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <llvm/IR/Module.h>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/kernel.h>
10#include <kernels/kernel_builder.h>
11#include <toolchain/toolchain.h>
12
13namespace llvm { class Constant; }
14namespace llvm { class Function; }
15
16using namespace parabix;
17using namespace llvm;
18using namespace IDISA;
19
20ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
21
22StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
23
24void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
25    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
26        Type * const ty = getType();
27        mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
28        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
29    } else {
30        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
31    }
32}
33
34Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
35    if (codegen::EnableAsserts) {
36        Value * const count = getStreamSetCount(iBuilder, self);
37        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
38        Value * const cond = iBuilder->CreateICmpULT(index, count);
39        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
40    }
41    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex});
42}
43
44Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
45    if (codegen::EnableAsserts) {
46        Value * const count = getStreamSetCount(iBuilder, self);
47        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
48        Value * const cond = iBuilder->CreateICmpULT(index, count);
49        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
50    }
51    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
52}
53
54void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, Value * /* addr */) const {
55    report_fatal_error("setBaseAddress is not supported by this buffer type");
56}
57
58Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
59    report_fatal_error("getBufferedSize is not supported by this buffer type");
60}
61
62void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* size */) const {
63    report_fatal_error("setBufferedSize is not supported by this buffer type");
64}
65
66inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
67    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
68        if (LLVM_LIKELY(cast<ConstantInt>(index)->getLimitedValue() < capacity)) {
69            return true;
70        }
71    }
72    return false;
73}
74
75Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value *) const {
76    uint64_t count = 1;
77    if (isa<ArrayType>(mBaseType)) {
78        count = mBaseType->getArrayNumElements();
79    }
80    return iBuilder->getSize(count);
81}
82
83inline Value * StreamSetBuffer::modByBufferBlocks(IDISA::IDISA_Builder * const iBuilder, Value * const offset) const {
84    assert (offset->getType()->isIntegerTy());
85    if (isCapacityGuaranteed(offset, mBufferBlocks)) {
86        return offset;
87    } else if (mBufferBlocks == 1) {
88        return ConstantInt::getNullValue(iBuilder->getSizeTy());
89    } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
90        return iBuilder->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
91    } else {
92        return iBuilder->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
93    }
94}
95
96/**
97 * @brief getRawItemPointer
98 *
99 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
100 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
101 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
102 */
103Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
104    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
105    Value * relativePosition = absolutePosition;
106    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
107    if (bw < 8) {
108        assert (bw  == 1 || bw == 2 || bw == 4);
109        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
110        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
111    } else {
112        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
113    }
114    return iBuilder->CreateGEP(ptr, relativePosition);
115}
116
117Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * fromPosition) const {
118    if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
119        Constant * stride = iBuilder->getSize(iBuilder->getStride());
120        return iBuilder->CreateSub(stride, iBuilder->CreateURem(fromPosition, stride));
121    } else {
122        Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
123        return iBuilder->CreateSub(bufSize, iBuilder->CreateURem(fromPosition, bufSize));
124    }
125}
126
127Value * StreamSetBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, Value * fromBlock) const {
128    Constant * bufBlocks = iBuilder->getSize(mBufferBlocks);
129    return iBuilder->CreateSub(bufBlocks, iBuilder->CreateURem(fromBlock, bufBlocks));
130}
131
132Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * fromPosition) const {
133    return getLinearlyAccessibleItems(iBuilder, fromPosition);
134}
135
136Value * StreamSetBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * fromBlock) const {
137    return getLinearlyAccessibleBlocks(iBuilder, fromBlock);
138}
139
140Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
141    iBuilder->CreateAssert(self, "StreamSetBuffer base address cannot be 0");
142    return self;
143}
144
145void StreamSetBuffer::releaseBuffer(IDISA::IDISA_Builder * const /* iBuilder */, Value * /* self */) const {
146    /* do nothing: memory is stack allocated */
147}
148
149void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
150    Type * i8ptr = iBuilder->getInt8PtrTy();
151    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
152    unsigned numStreams = getType()->getArrayNumElements();
153    auto elemTy = getType()->getArrayElementType();
154    unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
155    Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
156    iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
157}
158
159void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
160    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
161    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
162    Constant * const blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
163    const unsigned numStreams = getType()->getArrayNumElements();
164    const auto elemTy = getType()->getArrayElementType();
165    const auto fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
166    if (numStreams == 1) {
167        Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth));
168        Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
169        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), copyBytes, alignment);
170    } else {
171        Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
172        Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
173        Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
174        Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
175        Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
176        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), blockCopyBytes, alignment);
177        Value * partialCopyBitsPerStream = iBuilder->CreateMul(partialItems, iBuilder->getSize(fieldWidth));
178        Value * partialCopyBytesPerStream = iBuilder->CreateLShr(iBuilder->CreateAdd(partialCopyBitsPerStream, iBuilder->getSize(7)), iBuilder->getSize(3));
179        for (unsigned strm = 0; strm < numStreams; strm++) {
180            Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
181            Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
182            strmTargetPtr = iBuilder->CreateBitCast(strmTargetPtr, int8PtrTy);
183            strmSourcePtr = iBuilder->CreateBitCast(strmSourcePtr, int8PtrTy);
184            iBuilder->CreateMemMove(strmTargetPtr, strmSourcePtr, partialCopyBytesPerStream, alignment);
185        }
186    }
187}
188
189// Source File Buffer
190Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
191    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
192    return iBuilder->CreateLoad(ptr);
193}
194
195void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * size) const {
196    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
197    iBuilder->CreateStore(size, ptr);
198}
199
200void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * addr) const {
201    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
202    iBuilder->CreateStore(iBuilder->CreatePointerCast(addr, ptr->getType()->getPointerElementType()), ptr);
203}
204
205Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * const self) const {
206    iBuilder->CreateAssert(self, "SourceBuffer: instance cannot be null");
207    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
208    Value * const addr = iBuilder->CreateLoad(ptr);
209    iBuilder->CreateAssert(addr, "SourceBuffer: base address cannot be 0");
210    return addr;
211}
212
213Value * SourceBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
214    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
215}
216
217Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value *) const {
218    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
219}
220
221// External File Buffer
222void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
223    report_fatal_error("External buffers cannot be allocated.");
224}
225
226Value * ExternalBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
227    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
228}
229
230Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value *) const {
231    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
232}
233
234// Circular Buffer
235Value * CircularBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * const self, Value * const blockIndex) const {
236    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
237}
238
239Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
240    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
241    Value * relativePosition = iBuilder->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * iBuilder->getBitBlockWidth()));
242    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
243    if (bw < 8) {
244        assert (bw  == 1 || bw == 2 || bw == 4);
245        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
246        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
247    } else {
248        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
249    }
250    return iBuilder->CreateGEP(ptr, relativePosition);
251}
252
253// CircularCopybackBuffer Buffer
254void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
255    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
256}
257
258void CircularCopybackBuffer::createCopyBack(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * overFlowItems) const {
259    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
260    createBlockAlignedCopy(iBuilder, self, overFlowAreaPtr, overFlowItems);
261}
262
263Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * fromPosition) const {
264    return iBuilder->CreateAdd(getLinearlyAccessibleItems(iBuilder, fromPosition), iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
265}
266
267Value * CircularCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * fromBlock) const {
268    return iBuilder->CreateAdd(getLinearlyAccessibleBlocks(iBuilder, fromBlock), iBuilder->getSize(mOverflowBlocks));
269}
270
271// SwizzledCopybackBuffer Buffer
272
273void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
274    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
275}
276
277void SwizzledCopybackBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
278    Type * int8PtrTy = iBuilder->getInt8PtrTy();
279    DataLayout DL(iBuilder->getModule());
280    IntegerType * const intAddrTy = iBuilder->getIntPtrTy(DL);
281
282    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
283    Function * f = iBuilder->GetInsertBlock()->getParent();
284    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
285    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
286    BasicBlock * copyDone = BasicBlock::Create(iBuilder->getContext(), "copyDone", f, 0);
287    const unsigned numStreams = getType()->getArrayNumElements();
288    const unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
289    const auto elemTy = getType()->getArrayElementType();
290    const unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
291    Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
292    Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
293    Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
294    Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
295    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(blocksToCopy, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
296
297    iBuilder->SetInsertPoint(wholeBlockCopy);
298    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
299    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, intAddrTy), iBuilder->CreatePtrToInt(targetBlockPtr, intAddrTy));
300    iBuilder->CreateMemMove(iBuilder->CreatePointerCast(targetBlockPtr, int8PtrTy), iBuilder->CreatePointerCast(sourceBlockPtr, int8PtrTy), copyLength, alignment);
301    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyDone);
302    iBuilder->SetInsertPoint(partialBlockCopy);
303    Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth * swizzleFactor));
304    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
305    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
306        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
307        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
308        iBuilder->CreateMemMove(iBuilder->CreatePointerCast(strmTargetPtr, int8PtrTy), iBuilder->CreatePointerCast(strmSourcePtr, int8PtrTy), copyBytes, alignment);
309    }
310    iBuilder->CreateBr(copyDone);
311
312    iBuilder->SetInsertPoint(copyDone);
313}
314
315void SwizzledCopybackBuffer::createCopyBack(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * overFlowItems) const {
316    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
317    createBlockAlignedCopy(iBuilder, self, overFlowAreaPtr, overFlowItems);
318}
319
320Value * SwizzledCopybackBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
321    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
322}
323
324Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * fromPosition) const {
325    return iBuilder->CreateAdd(getLinearlyAccessibleItems(iBuilder, fromPosition), iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
326}
327
328Value * SwizzledCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * fromBlock) const {
329    return iBuilder->CreateAdd(getLinearlyAccessibleBlocks(iBuilder, fromBlock), iBuilder->getSize(mOverflowBlocks));
330}
331
332// Expandable Buffer
333
334void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
335    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType());
336    Value * const capacityPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
337    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
338    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
339    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
340    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
341    const auto alignment = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
342    Value * const ptr = iBuilder->CreateAlignedMalloc(size, alignment);
343    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
344    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
345    iBuilder->CreateStore(iBuilder->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
346}
347
348std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
349
350    // ENTRY
351    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
352    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
353    Value * const streamSetPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
354    Value * const streamSet = iBuilder->CreateLoad(streamSetPtr);
355    blockIndex = modByBufferBlocks(iBuilder, blockIndex);
356
357    assert (streamIndex->getType() == capacity->getType());
358    Value * const cond = iBuilder->CreateICmpULT(streamIndex, capacity);
359
360    // Are we guaranteed that we can access this stream?
361    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
362        iBuilder->CreateAssert(cond, "ExpandableBuffer: out-of-bounds stream access");
363        Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, capacity), streamIndex);
364        return {streamSet, offset};
365    }
366
367    BasicBlock * const entry = iBuilder->GetInsertBlock();
368    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
369    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
370
371    iBuilder->CreateLikelyCondBr(cond, resume, expand);
372
373    // EXPAND
374    iBuilder->SetInsertPoint(expand);
375
376    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
377    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
378
379    Value * newCapacity = iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1));
380    newCapacity = iBuilder->CreateCeilLog2(newCapacity);
381    newCapacity = iBuilder->CreateShl(iBuilder->getSize(1), newCapacity, "newCapacity");
382
383    std::string tmp;
384    raw_string_ostream out(tmp);
385    out << "__expand";
386    elementType->print(out);
387    std::string name = out.str();
388
389    Module * const m = iBuilder->getModule();
390    Function * expandFunction = m->getFunction(name);
391
392    if (expandFunction == nullptr) {
393
394        const auto ip = iBuilder->saveIP();
395
396        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), iBuilder->getSizeTy(), iBuilder->getSizeTy()}, false);
397        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
398
399        auto args = expandFunction->arg_begin();
400        Value * streamSet = &*args++;
401        Value * capacity = &*args++;
402        Value * newCapacity = &*args;
403
404        BasicBlock * entry = BasicBlock::Create(iBuilder->getContext(), "entry", expandFunction);
405        iBuilder->SetInsertPoint(entry);
406
407        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
408        const auto memAlign = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
409
410        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
411        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
412
413        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
414        for (unsigned i = 0; i < mBufferBlocks; ++i) {
415            ConstantInt * const offset = iBuilder->getSize(i);
416            Value * srcOffset = iBuilder->CreateMul(capacity, offset);
417            Value * srcPtr = iBuilder->CreateGEP(streamSet, srcOffset);
418            Value * destOffset = iBuilder->CreateMul(newCapacity, offset);
419            Value * destPtr = iBuilder->CreateGEP(newStreamSet, destOffset);
420            iBuilder->CreateMemCpy(destPtr, srcPtr, iBuilder->CreateMul(capacity, vectorWidth), alignment);
421            Value * destZeroOffset = iBuilder->CreateAdd(destOffset, capacity);
422            Value * destZeroPtr = iBuilder->CreateGEP(newStreamSet, destZeroOffset);
423            iBuilder->CreateMemZero(destZeroPtr, diffCapacity, alignment);
424        }
425
426        iBuilder->CreateFree(streamSet);
427
428        iBuilder->CreateRet(newStreamSet);
429
430        iBuilder->restoreIP(ip);
431    }
432
433    Value * newStreamSet = iBuilder->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
434    iBuilder->CreateStore(newStreamSet, streamSetPtr);
435    iBuilder->CreateStore(newCapacity, capacityPtr);
436
437    iBuilder->CreateBr(resume);
438
439    // RESUME
440    iBuilder->SetInsertPoint(resume);
441
442    PHINode * phiStreamSet = iBuilder->CreatePHI(streamSet->getType(), 2);
443    phiStreamSet->addIncoming(streamSet, entry);
444    phiStreamSet->addIncoming(newStreamSet, expand);
445
446    PHINode * phiCapacity = iBuilder->CreatePHI(capacity->getType(), 2);
447    phiCapacity->addIncoming(capacity, entry);
448    phiCapacity->addIncoming(newCapacity, expand);
449
450    Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, phiCapacity), streamIndex);
451
452    return {phiStreamSet, offset};
453}
454
455Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
456    Value * ptr, * offset;
457    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
458    return iBuilder->CreateGEP(ptr, offset);
459}
460
461Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
462    Value * ptr, * offset;
463    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
464    return iBuilder->CreateGEP(ptr, {offset, packIndex});
465}
466
467Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
468    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
469}
470
471Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
472    iBuilder->CreateAssert(self, "ExpandableBuffer: instance cannot be null");
473    Value * const baseAddr = iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
474    iBuilder->CreateAssert(self, "ExpandableBuffer: base address cannot be 0");
475    return baseAddr;
476}
477
478void ExpandableBuffer::releaseBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
479    iBuilder->CreateFree(getBaseAddress(iBuilder, self));
480}
481
482Value * ExpandableBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value *, Value *) const {
483    report_fatal_error("Expandable buffers: getStreamSetBlockPtr is not supported.");
484}
485
486Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value *) const {
487    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
488}
489
490SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
491: StreamSetBuffer(BufferKind::SourceBuffer, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), nullptr), 0, StructAddressSpace) {
492    mUniqueID = "B";
493    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
494        mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
495    }
496}
497
498ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, llvm::Value * addr, unsigned AddressSpace)
499: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, AddressSpace) {
500    mUniqueID = "E";
501    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
502    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
503}
504
505CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
506: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
507    mUniqueID = "C" + std::to_string(bufferBlocks);
508    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
509}
510
511CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
512: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
513
514}
515
516CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
517: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, AddressSpace)
518, mOverflowBlocks(overflowBlocks) {
519    mUniqueID = "CC" + std::to_string(bufferBlocks);
520    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
521    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
522}
523
524ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
525: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
526, mInitialCapacity(type->getArrayNumElements()) {
527    mUniqueID = "XP" + std::to_string(bufferBlocks);
528    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
529}
530
531SwizzledCopybackBuffer::SwizzledCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
532: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
533    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
534    if (mOverflowBlocks != 1) {
535        mUniqueID += "_" + std::to_string(mOverflowBlocks);
536    }
537    if (AddressSpace > 0) {
538        mUniqueID += "@" + std::to_string(AddressSpace);
539    }
540}
541
542inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned AddressSpace)
543: mBufferKind(k)
544, mType(resolvedType)
545, mBufferBlocks(BufferBlocks)
546, mAddressSpace(AddressSpace)
547, mStreamSetBufferPtr(nullptr)
548, mBaseType(baseType)
549, mProducer(nullptr) {
550
551}
552
553StreamSetBuffer::~StreamSetBuffer() { }
554
555// Helper routines
556ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
557    unsigned numElements = 1;
558    if (LLVM_LIKELY(type->isArrayTy())) {
559        numElements = type->getArrayNumElements();
560        type = type->getArrayElementType();
561    }
562    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
563        type = type->getVectorElementType();
564        if (LLVM_LIKELY(type->isIntegerTy())) {
565            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
566            type = b->getBitBlockType();
567            if (fieldWidth != 1) {
568                type = ArrayType::get(type, fieldWidth);
569            }
570            return ArrayType::get(type, numElements);
571        }
572    }
573    std::string tmp;
574    raw_string_ostream out(tmp);
575    type->print(out);
576    out << " is an unvalid stream set buffer type.";
577    report_fatal_error(out.str());
578}
579
580StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
581    if (LLVM_LIKELY(type->isArrayTy())) {
582        type = type->getArrayElementType();
583    }
584    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
585        type = type->getVectorElementType();
586        if (LLVM_LIKELY(type->isIntegerTy())) {
587            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
588            type = b->getBitBlockType();
589            if (fieldWidth != 1) {
590                type = ArrayType::get(type, fieldWidth);
591            }
592            return StructType::get(b->getSizeTy(), type->getPointerTo(), nullptr);
593        }
594    }
595    std::string tmp;
596    raw_string_ostream out(tmp);
597    type->print(out);
598    out << " is an unvalid stream set buffer type.";
599    report_fatal_error(out.str());
600}
Note: See TracBrowser for help on using the repository browser.