source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5479

Last change on this file since 5479 was 5479, checked in by cameron, 2 years ago

Multiblock kernels use whole block copy to/from temp buffers; allow input stream sets with variable rates

File size: 31.3 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <llvm/IR/Module.h>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/kernel.h>
10#include <kernels/kernel_builder.h>
11#include <toolchain/toolchain.h>
12
13namespace llvm { class Constant; }
14namespace llvm { class Function; }
15
16using namespace parabix;
17using namespace llvm;
18using namespace IDISA;
19
20ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
21
22StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
23
24void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
25    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
26        Type * const ty = getType();
27        mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
28        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
29    } else {
30        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
31    }
32}
33
34Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
35    iBuilder->CreateAssert(iBuilder->CreateICmpULT(streamIndex, getStreamSetCount(iBuilder, self)), "StreamSetBuffer: out-of-bounds stream access");
36    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex});
37}
38
39Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
40    iBuilder->CreateAssert(iBuilder->CreateICmpULT(streamIndex, getStreamSetCount(iBuilder, self)), "StreamSetBuffer: out-of-bounds stream access");
41    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
42}
43
44void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, Value * /* addr */) const {
45    report_fatal_error("setBaseAddress is not supported by this buffer type");
46}
47
48Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
49    report_fatal_error("getBufferedSize is not supported by this buffer type");
50}
51
52void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* size */) const {
53    report_fatal_error("setBufferedSize is not supported by this buffer type");
54}
55
56inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
57    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
58        if (LLVM_LIKELY(cast<ConstantInt>(index)->getLimitedValue() < capacity)) {
59            return true;
60        }
61    }
62    return false;
63}
64
65Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value *) const {
66    uint64_t count = 1;
67    if (isa<ArrayType>(mBaseType)) {
68        count = mBaseType->getArrayNumElements();
69    }
70    return iBuilder->getSize(count);
71}
72
73inline Value * StreamSetBuffer::modByBufferBlocks(IDISA::IDISA_Builder * const iBuilder, Value * const offset) const {
74    assert (offset->getType()->isIntegerTy());
75    if (isCapacityGuaranteed(offset, mBufferBlocks)) {
76        return offset;
77    } else if (mBufferBlocks == 1) {
78        return ConstantInt::getNullValue(iBuilder->getSizeTy());
79    } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
80        return iBuilder->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
81    } else {
82        return iBuilder->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
83    }
84}
85
86/**
87 * @brief getRawItemPointer
88 *
89 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
90 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
91 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
92 */
93Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
94    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
95    Value * relativePosition = absolutePosition;
96    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
97    if (bw < 8) {
98        assert (bw  == 1 || bw == 2 || bw == 4);
99        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
100        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
101    } else {
102        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
103    }
104    return iBuilder->CreateGEP(ptr, relativePosition);
105}
106
107Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * fromPosition) const {
108    if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
109        Constant * stride = iBuilder->getSize(iBuilder->getStride());
110        return iBuilder->CreateSub(stride, iBuilder->CreateURem(fromPosition, stride));
111    } else {
112        Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
113        return iBuilder->CreateSub(bufSize, iBuilder->CreateURem(fromPosition, bufSize));
114    }
115}
116
117Value * StreamSetBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, Value * fromBlock) const {
118    Constant * bufBlocks = iBuilder->getSize(mBufferBlocks);
119    return iBuilder->CreateSub(bufBlocks, iBuilder->CreateURem(fromBlock, bufBlocks));
120}
121
122Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * fromPosition) const {
123    return getLinearlyAccessibleItems(iBuilder, fromPosition);
124}
125
126Value * StreamSetBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * fromBlock) const {
127    return getLinearlyAccessibleBlocks(iBuilder, fromBlock);
128}
129
130Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
131    iBuilder->CreateAssert(self, "StreamSetBuffer base address cannot be 0");
132    return self;
133}
134
135void StreamSetBuffer::releaseBuffer(IDISA::IDISA_Builder * const /* iBuilder */, Value * /* self */) const {
136    /* do nothing: memory is stack allocated */
137}
138
139void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
140    Type * i8ptr = iBuilder->getInt8PtrTy();
141    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
142    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
143    unsigned numStreams = getType()->getArrayNumElements();
144    auto elemTy = getType()->getArrayElementType();
145    unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
146    Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
147    iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
148}
149
150void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
151    Type * i8ptr = iBuilder->getInt8PtrTy();
152    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
153    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
154    unsigned numStreams = getType()->getArrayNumElements();
155    auto elemTy = getType()->getArrayElementType();
156    unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
157    if (numStreams == 1) {
158        Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth));
159        Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
160        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), copyBytes, alignment);
161        return;
162    }
163    Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
164    Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
165    Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
166    Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
167    Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
168    iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
169    Value * partialCopyBitsPerStream = iBuilder->CreateMul(partialItems, iBuilder->getSize(fieldWidth));
170    Value * partialCopyBytesPerStream = iBuilder->CreateLShr(iBuilder->CreateAdd(partialCopyBitsPerStream, iBuilder->getSize(7)), iBuilder->getSize(3));
171    for (unsigned strm = 0; strm < numStreams; strm++) {
172        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
173        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
174        iBuilder->CreateMemMove(iBuilder->CreateBitCast(strmTargetPtr, i8ptr), iBuilder->CreateBitCast(strmSourcePtr, i8ptr), partialCopyBytesPerStream, alignment);
175    }
176}
177
178
179
180// Source File Buffer
181Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
182    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
183    return iBuilder->CreateLoad(ptr);
184}
185
186void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * size) const {
187    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
188    iBuilder->CreateStore(size, ptr);
189}
190
191void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * addr) const {
192    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
193    iBuilder->CreateStore(iBuilder->CreatePointerCast(addr, ptr->getType()->getPointerElementType()), ptr);
194}
195
196Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * const self) const {
197    iBuilder->CreateAssert(self, "SourceBuffer: instance cannot be null");
198    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
199    Value * const addr = iBuilder->CreateLoad(ptr);
200    iBuilder->CreateAssert(addr, "SourceBuffer: base address cannot be 0");
201    return addr;
202}
203
204Value * SourceBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
205    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
206}
207
208Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value *) const {
209    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
210}
211
212// External File Buffer
213void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
214    report_fatal_error("External buffers cannot be allocated.");
215}
216
217Value * ExternalBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
218    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
219}
220
221Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value *) const {
222    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
223}
224
225// Circular Buffer
226Value * CircularBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * const self, Value * const blockIndex) const {
227    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
228}
229
230Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
231    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
232    Value * relativePosition = iBuilder->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * iBuilder->getBitBlockWidth()));
233    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
234    if (bw < 8) {
235        assert (bw  == 1 || bw == 2 || bw == 4);
236        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
237        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
238    } else {
239        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
240    }
241    return iBuilder->CreateGEP(ptr, relativePosition);
242}
243
244// CircularCopybackBuffer Buffer
245void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
246    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
247}
248
249void CircularCopybackBuffer::createCopyBack(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * overFlowItems) const {
250    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
251    createBlockAlignedCopy(iBuilder, self, overFlowAreaPtr, overFlowItems);
252}
253
254Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * fromPosition) const {
255    return iBuilder->CreateAdd(getLinearlyAccessibleItems(iBuilder, fromPosition), iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
256}
257
258Value * CircularCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * fromBlock) const {
259    return iBuilder->CreateAdd(getLinearlyAccessibleBlocks(iBuilder, fromBlock), iBuilder->getSize(mOverflowBlocks));
260}
261
262// SwizzledCopybackBuffer Buffer
263
264void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
265    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
266}
267
268void SwizzledCopybackBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
269    Type * int8PtrTy = iBuilder->getInt8PtrTy();
270    DataLayout DL(iBuilder->getModule());
271    IntegerType * const intAddrTy = iBuilder->getIntPtrTy(DL);
272
273    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
274    Function * f = iBuilder->GetInsertBlock()->getParent();
275    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
276    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
277    BasicBlock * copyDone = BasicBlock::Create(iBuilder->getContext(), "copyDone", f, 0);
278    const unsigned numStreams = getType()->getArrayNumElements();
279    const unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
280    const auto elemTy = getType()->getArrayElementType();
281    const unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
282    Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
283    Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
284    Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
285    Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
286    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(blocksToCopy, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
287
288    iBuilder->SetInsertPoint(wholeBlockCopy);
289    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
290    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, intAddrTy), iBuilder->CreatePtrToInt(targetBlockPtr, intAddrTy));
291    iBuilder->CreateMemMove(iBuilder->CreatePointerCast(targetBlockPtr, int8PtrTy), iBuilder->CreatePointerCast(sourceBlockPtr, int8PtrTy), copyLength, alignment);
292    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyDone);
293    iBuilder->SetInsertPoint(partialBlockCopy);
294    Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth * swizzleFactor));
295    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
296    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
297        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
298        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
299        iBuilder->CreateMemMove(iBuilder->CreatePointerCast(strmTargetPtr, int8PtrTy), iBuilder->CreatePointerCast(strmSourcePtr, int8PtrTy), copyBytes, alignment);
300    }
301    iBuilder->CreateBr(copyDone);
302
303    iBuilder->SetInsertPoint(copyDone);
304}
305
306void SwizzledCopybackBuffer::createCopyBack(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * overFlowItems) const {
307    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
308    createBlockAlignedCopy(iBuilder, self, overFlowAreaPtr, overFlowItems);
309}
310
311Value * SwizzledCopybackBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
312    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
313}
314
315Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * fromPosition) const {
316    return iBuilder->CreateAdd(getLinearlyAccessibleItems(iBuilder, fromPosition), iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
317}
318
319Value * SwizzledCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * fromBlock) const {
320    return iBuilder->CreateAdd(getLinearlyAccessibleBlocks(iBuilder, fromBlock), iBuilder->getSize(mOverflowBlocks));
321}
322
323// Expandable Buffer
324
325void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
326    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType());
327    Value * const capacityPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
328    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
329    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
330    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
331    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
332    const auto alignment = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
333    Value * const ptr = iBuilder->CreateAlignedMalloc(size, alignment);
334    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
335    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
336    iBuilder->CreateStore(iBuilder->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
337}
338
339std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
340
341    // ENTRY
342    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
343    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
344    Value * const streamSetPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
345    Value * const streamSet = iBuilder->CreateLoad(streamSetPtr);
346    blockIndex = modByBufferBlocks(iBuilder, blockIndex);
347
348    assert (streamIndex->getType() == capacity->getType());
349    Value * const cond = iBuilder->CreateICmpULT(streamIndex, capacity);
350
351    // Are we guaranteed that we can access this stream?
352    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
353        iBuilder->CreateAssert(cond, "ExpandableBuffer: out-of-bounds stream access");
354        Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, capacity), streamIndex);
355        return {streamSet, offset};
356    }
357
358    BasicBlock * const entry = iBuilder->GetInsertBlock();
359    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
360    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
361
362    iBuilder->CreateLikelyCondBr(cond, resume, expand);
363
364    // EXPAND
365    iBuilder->SetInsertPoint(expand);
366
367    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
368    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
369
370    Value * newCapacity = iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1));
371    newCapacity = iBuilder->CreateCeilLog2(newCapacity);
372    newCapacity = iBuilder->CreateShl(iBuilder->getSize(1), newCapacity, "newCapacity");
373
374    std::string tmp;
375    raw_string_ostream out(tmp);
376    out << "__expand";
377    elementType->print(out);
378    std::string name = out.str();
379
380    Module * const m = iBuilder->getModule();
381    Function * expandFunction = m->getFunction(name);
382
383    if (expandFunction == nullptr) {
384
385        const auto ip = iBuilder->saveIP();
386
387        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), iBuilder->getSizeTy(), iBuilder->getSizeTy()}, false);
388        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
389
390        auto args = expandFunction->arg_begin();
391        Value * streamSet = &*args++;
392        Value * capacity = &*args++;
393        Value * newCapacity = &*args;
394
395        BasicBlock * entry = BasicBlock::Create(iBuilder->getContext(), "entry", expandFunction);
396        iBuilder->SetInsertPoint(entry);
397
398        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
399        const auto memAlign = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
400        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
401        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
402
403        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
404        for (unsigned i = 0; i < mBufferBlocks; ++i) {
405            ConstantInt * const offset = iBuilder->getSize(i);
406            Value * srcOffset = iBuilder->CreateMul(capacity, offset);
407            Value * srcPtr = iBuilder->CreateGEP(streamSet, srcOffset);
408            Value * destOffset = iBuilder->CreateMul(newCapacity, offset);
409            Value * destPtr = iBuilder->CreateGEP(newStreamSet, destOffset);
410            iBuilder->CreateMemCpy(destPtr, srcPtr, iBuilder->CreateMul(capacity, vectorWidth), alignment);
411            Value * destZeroOffset = iBuilder->CreateAdd(destOffset, capacity);
412            Value * destZeroPtr = iBuilder->CreateGEP(newStreamSet, destZeroOffset);
413            iBuilder->CreateMemZero(destZeroPtr, diffCapacity, alignment);
414        }
415
416        iBuilder->CreateFree(streamSet);
417
418        iBuilder->CreateRet(newStreamSet);
419
420        iBuilder->restoreIP(ip);
421    }
422
423    Value * newStreamSet = iBuilder->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
424    iBuilder->CreateStore(newStreamSet, streamSetPtr);
425    iBuilder->CreateStore(newCapacity, capacityPtr);
426
427    iBuilder->CreateBr(resume);
428
429    // RESUME
430    iBuilder->SetInsertPoint(resume);
431
432    PHINode * phiStreamSet = iBuilder->CreatePHI(streamSet->getType(), 2);
433    phiStreamSet->addIncoming(streamSet, entry);
434    phiStreamSet->addIncoming(newStreamSet, expand);
435
436    PHINode * phiCapacity = iBuilder->CreatePHI(capacity->getType(), 2);
437    phiCapacity->addIncoming(capacity, entry);
438    phiCapacity->addIncoming(newCapacity, expand);
439
440    Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, phiCapacity), streamIndex);
441
442    return {phiStreamSet, offset};
443}
444
445Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
446    Value * ptr, * offset;
447    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
448    return iBuilder->CreateGEP(ptr, offset);
449}
450
451Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
452    Value * ptr, * offset;
453    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
454    return iBuilder->CreateGEP(ptr, {offset, packIndex});
455}
456
457Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
458    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
459}
460
461Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
462    iBuilder->CreateAssert(self, "ExpandableBuffer: instance cannot be null");
463    Value * const baseAddr = iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
464    iBuilder->CreateAssert(self, "ExpandableBuffer: base address cannot be 0");
465    return baseAddr;
466}
467
468void ExpandableBuffer::releaseBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
469    iBuilder->CreateFree(getBaseAddress(iBuilder, self));
470}
471
472Value * ExpandableBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value *, Value *) const {
473    report_fatal_error("Expandable buffers: getStreamSetBlockPtr is not supported.");
474}
475
476Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value *) const {
477    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
478}
479
480SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
481: StreamSetBuffer(BufferKind::SourceBuffer, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), nullptr), 0, StructAddressSpace) {
482    mUniqueID = "B";
483    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
484        mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
485    }
486}
487
488ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, llvm::Value * addr, unsigned AddressSpace)
489: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, AddressSpace) {
490    mUniqueID = "E";
491    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
492    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
493}
494
495CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
496: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
497    mUniqueID = "C" + std::to_string(bufferBlocks);
498    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
499}
500
501CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
502: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
503
504}
505
506CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
507: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, AddressSpace)
508, mOverflowBlocks(overflowBlocks) {
509    mUniqueID = "CC" + std::to_string(bufferBlocks);
510    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
511    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
512}
513
514ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
515: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
516, mInitialCapacity(type->getArrayNumElements()) {
517    mUniqueID = "XP" + std::to_string(bufferBlocks);
518    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
519}
520
521SwizzledCopybackBuffer::SwizzledCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
522: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
523    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
524    if (mOverflowBlocks != 1) {
525        mUniqueID += "_" + std::to_string(mOverflowBlocks);
526    }
527    if (AddressSpace > 0) {
528        mUniqueID += "@" + std::to_string(AddressSpace);
529    }
530}
531
532inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned AddressSpace)
533: mBufferKind(k)
534, mType(resolvedType)
535, mBufferBlocks(BufferBlocks)
536, mAddressSpace(AddressSpace)
537, mStreamSetBufferPtr(nullptr)
538, mBaseType(baseType)
539, mProducer(nullptr) {
540
541}
542
543StreamSetBuffer::~StreamSetBuffer() { }
544
545// Helper routines
546ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
547    unsigned numElements = 1;
548    if (LLVM_LIKELY(type->isArrayTy())) {
549        numElements = type->getArrayNumElements();
550        type = type->getArrayElementType();
551    }
552    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
553        type = type->getVectorElementType();
554        if (LLVM_LIKELY(type->isIntegerTy())) {
555            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
556            type = b->getBitBlockType();
557            if (fieldWidth != 1) {
558                type = ArrayType::get(type, fieldWidth);
559            }
560            return ArrayType::get(type, numElements);
561        }
562    }
563    std::string tmp;
564    raw_string_ostream out(tmp);
565    type->print(out);
566    out << " is an unvalid stream set buffer type.";
567    report_fatal_error(out.str());
568}
569
570StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
571    if (LLVM_LIKELY(type->isArrayTy())) {
572        type = type->getArrayElementType();
573    }
574    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
575        type = type->getVectorElementType();
576        if (LLVM_LIKELY(type->isIntegerTy())) {
577            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
578            type = b->getBitBlockType();
579            if (fieldWidth != 1) {
580                type = ArrayType::get(type, fieldWidth);
581            }
582            return StructType::get(b->getSizeTy(), type->getPointerTo(), nullptr);
583        }
584    }
585    std::string tmp;
586    raw_string_ostream out(tmp);
587    type->print(out);
588    out << " is an unvalid stream set buffer type.";
589    report_fatal_error(out.str());
590}
Note: See TracBrowser for help on using the repository browser.