source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5540

Last change on this file since 5540 was 5506, checked in by cameron, 2 years ago

:getStreamSetBlockType

File size: 33.6 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <llvm/IR/Module.h>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/kernel.h>
10#include <kernels/kernel_builder.h>
11#include <toolchain/toolchain.h>
12
13namespace llvm { class Constant; }
14namespace llvm { class Function; }
15
16using namespace parabix;
17using namespace llvm;
18using namespace IDISA;
19
20
21Type * StreamSetBuffer::getStreamSetBlockType() const { return mType;}
22
23ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
24
25StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
26
27void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
28    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
29        Type * const ty = getType();
30        mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
31        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
32    } else {
33        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
34    }
35}
36
37Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
38    if (codegen::EnableAsserts) {
39        Value * const count = getStreamSetCount(iBuilder, self);
40        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
41        Value * const cond = iBuilder->CreateICmpULT(index, count);
42        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
43    }
44    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex});
45}
46
47Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
48    if (codegen::EnableAsserts) {
49        Value * const count = getStreamSetCount(iBuilder, self);
50        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
51        Value * const cond = iBuilder->CreateICmpULT(index, count);
52        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
53    }
54    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
55}
56
57void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, Value * /* addr */) const {
58    report_fatal_error("setBaseAddress is not supported by this buffer type");
59}
60
61Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
62    report_fatal_error("getBufferedSize is not supported by this buffer type");
63}
64
65void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* size */) const {
66    report_fatal_error("setBufferedSize is not supported by this buffer type");
67}
68
69Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
70    report_fatal_error("getCapacity is not supported by this buffer type");
71}
72
73void StreamSetBuffer::setCapacity(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* c */) const {
74    report_fatal_error("setCapacity is not supported by this buffer type");
75}
76
77inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
78    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
79        if (LLVM_LIKELY(cast<ConstantInt>(index)->getLimitedValue() < capacity)) {
80            return true;
81        }
82    }
83    return false;
84}
85
86Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value *) const {
87    size_t count = 1;
88    if (isa<ArrayType>(mBaseType)) {
89        count = mBaseType->getArrayNumElements();
90    }
91    return iBuilder->getSize(count);
92}
93
94inline Value * StreamSetBuffer::modByBufferBlocks(IDISA::IDISA_Builder * const iBuilder, Value * const offset) const {
95    assert (offset->getType()->isIntegerTy());
96    if (isCapacityGuaranteed(offset, mBufferBlocks)) {
97        return offset;
98    } else if (mBufferBlocks == 1) {
99        return ConstantInt::getNullValue(iBuilder->getSizeTy());
100    } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
101        return iBuilder->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
102    } else {
103        return iBuilder->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
104    }
105}
106
107/**
108 * @brief getRawItemPointer
109 *
110 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
111 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
112 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
113 */
114Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
115    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
116    Value * relativePosition = absolutePosition;
117    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
118    if (bw < 8) {
119        assert (bw  == 1 || bw == 2 || bw == 4);
120        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
121        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
122    } else {
123        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
124    }
125    return iBuilder->CreateGEP(ptr, relativePosition);
126}
127
128Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition) const {
129    if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
130        Constant * stride = iBuilder->getSize(iBuilder->getStride());
131        return iBuilder->CreateSub(stride, iBuilder->CreateURem(fromPosition, stride));
132    } else {
133        Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
134        return iBuilder->CreateSub(bufSize, iBuilder->CreateURem(fromPosition, bufSize, "linearItems"));
135    }
136}
137
138Value * StreamSetBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock) const {
139    Constant * bufBlocks = iBuilder->getSize(mBufferBlocks);
140    return iBuilder->CreateSub(bufBlocks, iBuilder->CreateURem(fromBlock, bufBlocks), "linearBlocks");
141}
142
143Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition) const {
144    return getLinearlyAccessibleItems(iBuilder, self, fromPosition);
145}
146
147Value * StreamSetBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock) const {
148    return getLinearlyAccessibleBlocks(iBuilder, self, fromBlock);
149}
150
151Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
152    iBuilder->CreateAssert(self, "StreamSetBuffer base address cannot be 0");
153    return self;
154}
155
156void StreamSetBuffer::releaseBuffer(IDISA::IDISA_Builder * const /* iBuilder */, Value * /* self */) const {
157    /* do nothing: memory is stack allocated */
158}
159
160void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
161    Type * i8ptr = iBuilder->getInt8PtrTy();
162    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
163    size_t numStreams = 1;
164    if (isa<ArrayType>(mBaseType)) {
165        numStreams = mBaseType->getArrayNumElements();
166    }
167    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
168    Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
169    iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
170}
171
172void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
173    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
174    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
175    Constant * const blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
176    size_t numStreams = 1;
177    if (isa<ArrayType>(mBaseType)) {
178        numStreams = mBaseType->getArrayNumElements();
179    }
180    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
181    if (numStreams == 1) {
182        Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth));
183        Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
184        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), copyBytes, alignment);
185    } else {
186        Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
187        Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
188        Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
189        Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
190        Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
191        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), blockCopyBytes, alignment);
192        Value * partialCopyBitsPerStream = iBuilder->CreateMul(partialItems, iBuilder->getSize(fieldWidth));
193        Value * partialCopyBytesPerStream = iBuilder->CreateLShr(iBuilder->CreateAdd(partialCopyBitsPerStream, iBuilder->getSize(7)), iBuilder->getSize(3));
194        for (unsigned strm = 0; strm < numStreams; strm++) {
195            Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
196            Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
197            strmTargetPtr = iBuilder->CreateBitCast(strmTargetPtr, int8PtrTy);
198            strmSourcePtr = iBuilder->CreateBitCast(strmSourcePtr, int8PtrTy);
199            iBuilder->CreateMemMove(strmTargetPtr, strmSourcePtr, partialCopyBytesPerStream, alignment);
200        }
201    }
202}
203
204// Source File Buffer
205
206Type * SourceBuffer::getStreamSetBlockType() const {
207    return cast<PointerType>(mType->getStructElementType(int(SourceBuffer::Field::BaseAddress)))->getElementType();
208}
209
210
211Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
212    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
213    return iBuilder->CreateLoad(ptr);
214}
215
216void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * size) const {
217    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
218    iBuilder->CreateStore(size, ptr);
219}
220
221Value * SourceBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
222    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
223    return iBuilder->CreateLoad(ptr);
224}
225
226void SourceBuffer::setCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * c) const {
227    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
228    iBuilder->CreateStore(c, ptr);
229}
230
231void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * addr) const {
232    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
233
234    iBuilder->CreateStore(iBuilder->CreatePointerCast(addr, ptr->getType()->getPointerElementType()), ptr);
235}
236
237Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * const self) const {
238    iBuilder->CreateAssert(self, "SourceBuffer: instance cannot be null");
239    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
240    Value * const addr = iBuilder->CreateLoad(ptr);
241    iBuilder->CreateAssert(addr, "SourceBuffer: base address cannot be 0");
242    return addr;
243}
244
245Value * SourceBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
246    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
247}
248
249Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition) const {
250    return iBuilder->CreateSub(getCapacity(iBuilder, self), fromPosition);
251}
252
253Value * SourceBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock) const {
254    return iBuilder->CreateSub(iBuilder->CreateUDiv(getCapacity(iBuilder, self), iBuilder->getSize(iBuilder->getBitBlockWidth())), fromBlock);
255}
256
257
258// External File Buffer
259void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
260    report_fatal_error("External buffers cannot be allocated.");
261}
262
263Value * ExternalBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
264    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
265}
266
267Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value *) const {
268    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
269}
270
271// Circular Buffer
272Value * CircularBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * const self, Value * const blockIndex) const {
273    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
274}
275
276Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
277    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
278    Value * relativePosition = iBuilder->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * iBuilder->getBitBlockWidth()));
279    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
280    if (bw < 8) {
281        assert (bw  == 1 || bw == 2 || bw == 4);
282        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
283        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
284    } else {
285        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
286    }
287    return iBuilder->CreateGEP(ptr, relativePosition);
288}
289
290// CircularCopybackBuffer Buffer
291void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
292    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
293}
294
295void CircularCopybackBuffer::createCopyBack(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * overFlowItems) const {
296    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
297    createBlockAlignedCopy(iBuilder, self, overFlowAreaPtr, overFlowItems);
298}
299
300Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition) const {
301    return iBuilder->CreateAdd(getLinearlyAccessibleItems(iBuilder, self, fromPosition), iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
302}
303
304Value * CircularCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock) const {
305    return iBuilder->CreateAdd(getLinearlyAccessibleBlocks(iBuilder, self, fromBlock), iBuilder->getSize(mOverflowBlocks));
306}
307
308// SwizzledCopybackBuffer Buffer
309
310void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
311    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
312}
313
314void SwizzledCopybackBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
315    Type * int8PtrTy = iBuilder->getInt8PtrTy();
316    DataLayout DL(iBuilder->getModule());
317    IntegerType * const intAddrTy = iBuilder->getIntPtrTy(DL);
318
319    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
320    Function * f = iBuilder->GetInsertBlock()->getParent();
321    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
322    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
323    BasicBlock * copyDone = BasicBlock::Create(iBuilder->getContext(), "copyDone", f, 0);
324    const unsigned numStreams = getType()->getArrayNumElements();
325    const unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
326    const auto elemTy = getType()->getArrayElementType();
327    const unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
328    Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
329    Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
330    Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
331    Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
332    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(blocksToCopy, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
333
334    iBuilder->SetInsertPoint(wholeBlockCopy);
335    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
336    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, intAddrTy), iBuilder->CreatePtrToInt(targetBlockPtr, intAddrTy));
337    iBuilder->CreateMemMove(iBuilder->CreatePointerCast(targetBlockPtr, int8PtrTy), iBuilder->CreatePointerCast(sourceBlockPtr, int8PtrTy), copyLength, alignment);
338    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyDone);
339    iBuilder->SetInsertPoint(partialBlockCopy);
340    Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth * swizzleFactor));
341    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
342    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
343        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
344        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
345        iBuilder->CreateMemMove(iBuilder->CreatePointerCast(strmTargetPtr, int8PtrTy), iBuilder->CreatePointerCast(strmSourcePtr, int8PtrTy), copyBytes, alignment);
346    }
347    iBuilder->CreateBr(copyDone);
348
349    iBuilder->SetInsertPoint(copyDone);
350}
351
352void SwizzledCopybackBuffer::createCopyBack(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * overFlowItems) const {
353    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
354    createBlockAlignedCopy(iBuilder, self, overFlowAreaPtr, overFlowItems);
355}
356
357Value * SwizzledCopybackBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
358    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
359}
360
361Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition) const {
362    return iBuilder->CreateAdd(getLinearlyAccessibleItems(iBuilder, self, fromPosition), iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
363}
364
365Value * SwizzledCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock) const {
366    return iBuilder->CreateAdd(getLinearlyAccessibleBlocks(iBuilder, self, fromBlock), iBuilder->getSize(mOverflowBlocks));
367}
368
369// Expandable Buffer
370
371void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
372    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType());
373    Value * const capacityPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
374    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
375    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
376    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
377    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
378    const auto alignment = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
379    Value * const ptr = iBuilder->CreateAlignedMalloc(size, alignment);
380    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
381    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
382    iBuilder->CreateStore(iBuilder->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
383}
384
385std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
386
387    // ENTRY
388    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
389    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
390    Value * const streamSetPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
391    Value * const streamSet = iBuilder->CreateLoad(streamSetPtr);
392    blockIndex = modByBufferBlocks(iBuilder, blockIndex);
393
394    assert (streamIndex->getType() == capacity->getType());
395    Value * const cond = iBuilder->CreateICmpULT(streamIndex, capacity);
396
397    // Are we guaranteed that we can access this stream?
398    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
399        iBuilder->CreateAssert(cond, "ExpandableBuffer: out-of-bounds stream access");
400        Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, capacity), streamIndex);
401        return {streamSet, offset};
402    }
403
404    BasicBlock * const entry = iBuilder->GetInsertBlock();
405    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
406    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
407
408    iBuilder->CreateLikelyCondBr(cond, resume, expand);
409
410    // EXPAND
411    iBuilder->SetInsertPoint(expand);
412
413    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
414    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
415
416    Value * newCapacity = iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1));
417    newCapacity = iBuilder->CreateCeilLog2(newCapacity);
418    newCapacity = iBuilder->CreateShl(iBuilder->getSize(1), newCapacity, "newCapacity");
419
420    std::string tmp;
421    raw_string_ostream out(tmp);
422    out << "__expand";
423    elementType->print(out);
424    std::string name = out.str();
425
426    Module * const m = iBuilder->getModule();
427    Function * expandFunction = m->getFunction(name);
428
429    if (expandFunction == nullptr) {
430
431        const auto ip = iBuilder->saveIP();
432
433        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), iBuilder->getSizeTy(), iBuilder->getSizeTy()}, false);
434        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
435
436        auto args = expandFunction->arg_begin();
437        Value * streamSet = &*args++;
438        Value * capacity = &*args++;
439        Value * newCapacity = &*args;
440
441        BasicBlock * entry = BasicBlock::Create(iBuilder->getContext(), "entry", expandFunction);
442        iBuilder->SetInsertPoint(entry);
443
444        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
445        const auto memAlign = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
446
447        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
448        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
449
450        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
451        for (unsigned i = 0; i < mBufferBlocks; ++i) {
452            ConstantInt * const offset = iBuilder->getSize(i);
453            Value * srcOffset = iBuilder->CreateMul(capacity, offset);
454            Value * srcPtr = iBuilder->CreateGEP(streamSet, srcOffset);
455            Value * destOffset = iBuilder->CreateMul(newCapacity, offset);
456            Value * destPtr = iBuilder->CreateGEP(newStreamSet, destOffset);
457            iBuilder->CreateMemCpy(destPtr, srcPtr, iBuilder->CreateMul(capacity, vectorWidth), alignment);
458            Value * destZeroOffset = iBuilder->CreateAdd(destOffset, capacity);
459            Value * destZeroPtr = iBuilder->CreateGEP(newStreamSet, destZeroOffset);
460            iBuilder->CreateMemZero(destZeroPtr, diffCapacity, alignment);
461        }
462
463        iBuilder->CreateFree(streamSet);
464
465        iBuilder->CreateRet(newStreamSet);
466
467        iBuilder->restoreIP(ip);
468    }
469
470    Value * newStreamSet = iBuilder->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
471    iBuilder->CreateStore(newStreamSet, streamSetPtr);
472    iBuilder->CreateStore(newCapacity, capacityPtr);
473
474    iBuilder->CreateBr(resume);
475
476    // RESUME
477    iBuilder->SetInsertPoint(resume);
478
479    PHINode * phiStreamSet = iBuilder->CreatePHI(streamSet->getType(), 2);
480    phiStreamSet->addIncoming(streamSet, entry);
481    phiStreamSet->addIncoming(newStreamSet, expand);
482
483    PHINode * phiCapacity = iBuilder->CreatePHI(capacity->getType(), 2);
484    phiCapacity->addIncoming(capacity, entry);
485    phiCapacity->addIncoming(newCapacity, expand);
486
487    Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, phiCapacity), streamIndex);
488
489    return {phiStreamSet, offset};
490}
491
492Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
493    Value * ptr, * offset;
494    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
495    return iBuilder->CreateGEP(ptr, offset);
496}
497
498Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
499    Value * ptr, * offset;
500    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
501    return iBuilder->CreateGEP(ptr, {offset, packIndex});
502}
503
504Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
505    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
506}
507
508Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
509    iBuilder->CreateAssert(self, "ExpandableBuffer: instance cannot be null");
510    Value * const baseAddr = iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
511    iBuilder->CreateAssert(self, "ExpandableBuffer: base address cannot be 0");
512    return baseAddr;
513}
514
515void ExpandableBuffer::releaseBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
516    iBuilder->CreateFree(getBaseAddress(iBuilder, self));
517}
518
519Value * ExpandableBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value *, Value *) const {
520    report_fatal_error("Expandable buffers: getStreamSetBlockPtr is not supported.");
521}
522
523Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value *) const {
524    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
525}
526
527SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
528: StreamSetBuffer(BufferKind::SourceBuffer, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), b->getSizeTy(), nullptr), 0, StructAddressSpace) {
529    mUniqueID = "B";
530    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
531        mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
532    }
533}
534
535ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, llvm::Value * addr, unsigned AddressSpace)
536: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, AddressSpace) {
537    mUniqueID = "E";
538    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
539    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
540}
541
542CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
543: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
544    mUniqueID = "C" + std::to_string(bufferBlocks);
545    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
546}
547
548CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
549: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
550
551}
552
553CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
554: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, AddressSpace)
555, mOverflowBlocks(overflowBlocks) {
556    mUniqueID = "CC" + std::to_string(bufferBlocks);
557    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
558    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
559}
560
561ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
562: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
563, mInitialCapacity(type->getArrayNumElements()) {
564    mUniqueID = "XP" + std::to_string(bufferBlocks);
565    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
566}
567
568SwizzledCopybackBuffer::SwizzledCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
569: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
570    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
571    if (mOverflowBlocks != 1) {
572        mUniqueID += "_" + std::to_string(mOverflowBlocks);
573    }
574    if (AddressSpace > 0) {
575        mUniqueID += "@" + std::to_string(AddressSpace);
576    }
577}
578
579inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned AddressSpace)
580: mBufferKind(k)
581, mType(resolvedType)
582, mBufferBlocks(BufferBlocks)
583, mAddressSpace(AddressSpace)
584, mStreamSetBufferPtr(nullptr)
585, mBaseType(baseType)
586, mProducer(nullptr) {
587
588}
589
590StreamSetBuffer::~StreamSetBuffer() { }
591
592// Helper routines
593ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
594    unsigned numElements = 1;
595    if (LLVM_LIKELY(type->isArrayTy())) {
596        numElements = type->getArrayNumElements();
597        type = type->getArrayElementType();
598    }
599    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
600        type = type->getVectorElementType();
601        if (LLVM_LIKELY(type->isIntegerTy())) {
602            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
603            type = b->getBitBlockType();
604            if (fieldWidth != 1) {
605                type = ArrayType::get(type, fieldWidth);
606            }
607            return ArrayType::get(type, numElements);
608        }
609    }
610    std::string tmp;
611    raw_string_ostream out(tmp);
612    type->print(out);
613    out << " is an unvalid stream set buffer type.";
614    report_fatal_error(out.str());
615}
616
617StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
618    if (LLVM_LIKELY(type->isArrayTy())) {
619        type = type->getArrayElementType();
620    }
621    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
622        type = type->getVectorElementType();
623        if (LLVM_LIKELY(type->isIntegerTy())) {
624            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
625            type = b->getBitBlockType();
626            if (fieldWidth != 1) {
627                type = ArrayType::get(type, fieldWidth);
628            }
629            return StructType::get(b->getSizeTy(), type->getPointerTo(), nullptr);
630        }
631    }
632    std::string tmp;
633    raw_string_ostream out(tmp);
634    type->print(out);
635    out << " is an unvalid stream set buffer type.";
636    report_fatal_error(out.str());
637}
Note: See TracBrowser for help on using the repository browser.