source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5501

Last change on this file since 5501 was 5501, checked in by cameron, 22 months ago

setCapacity/getCapacity/getLinearlyAvailableItems for SourceBuffer?

File size: 33.1 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <llvm/IR/Module.h>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/kernel.h>
10#include <kernels/kernel_builder.h>
11#include <toolchain/toolchain.h>
12
13namespace llvm { class Constant; }
14namespace llvm { class Function; }
15
16using namespace parabix;
17using namespace llvm;
18using namespace IDISA;
19
20ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
21
22StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
23
24void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
25    if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
26        Type * const ty = getType();
27        mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
28        iBuilder->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, iBuilder->getCacheAlignment());
29    } else {
30        report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
31    }
32}
33
34Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
35    if (codegen::EnableAsserts) {
36        Value * const count = getStreamSetCount(iBuilder, self);
37        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
38        Value * const cond = iBuilder->CreateICmpULT(index, count);
39        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
40    }
41    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex});
42}
43
44Value * StreamSetBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
45    if (codegen::EnableAsserts) {
46        Value * const count = getStreamSetCount(iBuilder, self);
47        Value * const index = iBuilder->CreateZExtOrTrunc(streamIndex, count->getType());
48        Value * const cond = iBuilder->CreateICmpULT(index, count);
49        iBuilder->CreateAssert(cond, "StreamSetBuffer: out-of-bounds stream access");
50    }
51    return iBuilder->CreateGEP(getStreamSetBlockPtr(iBuilder, self, blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
52}
53
54void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, Value * /* addr */) const {
55    report_fatal_error("setBaseAddress is not supported by this buffer type");
56}
57
58Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
59    report_fatal_error("getBufferedSize is not supported by this buffer type");
60}
61
62void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* size */) const {
63    report_fatal_error("setBufferedSize is not supported by this buffer type");
64}
65
66Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * /* self */) const {
67    report_fatal_error("getCapacity is not supported by this buffer type");
68}
69
70void StreamSetBuffer::setCapacity(IDISA::IDISA_Builder * const iBuilder, Value * /* self */, llvm::Value * /* c */) const {
71    report_fatal_error("setCapacity is not supported by this buffer type");
72}
73
74inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
75    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
76        if (LLVM_LIKELY(cast<ConstantInt>(index)->getLimitedValue() < capacity)) {
77            return true;
78        }
79    }
80    return false;
81}
82
83Value * StreamSetBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value *) const {
84    size_t count = 1;
85    if (isa<ArrayType>(mBaseType)) {
86        count = mBaseType->getArrayNumElements();
87    }
88    return iBuilder->getSize(count);
89}
90
91inline Value * StreamSetBuffer::modByBufferBlocks(IDISA::IDISA_Builder * const iBuilder, Value * const offset) const {
92    assert (offset->getType()->isIntegerTy());
93    if (isCapacityGuaranteed(offset, mBufferBlocks)) {
94        return offset;
95    } else if (mBufferBlocks == 1) {
96        return ConstantInt::getNullValue(iBuilder->getSizeTy());
97    } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
98        return iBuilder->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
99    } else {
100        return iBuilder->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
101    }
102}
103
104/**
105 * @brief getRawItemPointer
106 *
107 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
108 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
109 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
110 */
111Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
112    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
113    Value * relativePosition = absolutePosition;
114    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
115    if (bw < 8) {
116        assert (bw  == 1 || bw == 2 || bw == 4);
117        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
118        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
119    } else {
120        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
121    }
122    return iBuilder->CreateGEP(ptr, relativePosition);
123}
124
125Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition) const {
126    if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
127        Constant * stride = iBuilder->getSize(iBuilder->getStride());
128        return iBuilder->CreateSub(stride, iBuilder->CreateURem(fromPosition, stride));
129    } else {
130        Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
131        return iBuilder->CreateSub(bufSize, iBuilder->CreateURem(fromPosition, bufSize, "linearItems"));
132    }
133}
134
135Value * StreamSetBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock) const {
136    Constant * bufBlocks = iBuilder->getSize(mBufferBlocks);
137    return iBuilder->CreateSub(bufBlocks, iBuilder->CreateURem(fromBlock, bufBlocks), "linearBlocks");
138}
139
140Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition) const {
141    return getLinearlyAccessibleItems(iBuilder, self, fromPosition);
142}
143
144Value * StreamSetBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock) const {
145    return getLinearlyAccessibleBlocks(iBuilder, self, fromBlock);
146}
147
148Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
149    iBuilder->CreateAssert(self, "StreamSetBuffer base address cannot be 0");
150    return self;
151}
152
153void StreamSetBuffer::releaseBuffer(IDISA::IDISA_Builder * const /* iBuilder */, Value * /* self */) const {
154    /* do nothing: memory is stack allocated */
155}
156
157void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
158    Type * i8ptr = iBuilder->getInt8PtrTy();
159    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
160    size_t numStreams = 1;
161    if (isa<ArrayType>(mBaseType)) {
162        numStreams = mBaseType->getArrayNumElements();
163    }
164    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
165    Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
166    iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
167}
168
169void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
170    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
171    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
172    Constant * const blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
173    size_t numStreams = 1;
174    if (isa<ArrayType>(mBaseType)) {
175        numStreams = mBaseType->getArrayNumElements();
176    }
177    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
178    if (numStreams == 1) {
179        Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth));
180        Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
181        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), copyBytes, alignment);
182    } else {
183        Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
184        Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
185        Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
186        Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
187        Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
188        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), blockCopyBytes, alignment);
189        Value * partialCopyBitsPerStream = iBuilder->CreateMul(partialItems, iBuilder->getSize(fieldWidth));
190        Value * partialCopyBytesPerStream = iBuilder->CreateLShr(iBuilder->CreateAdd(partialCopyBitsPerStream, iBuilder->getSize(7)), iBuilder->getSize(3));
191        for (unsigned strm = 0; strm < numStreams; strm++) {
192            Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
193            Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
194            strmTargetPtr = iBuilder->CreateBitCast(strmTargetPtr, int8PtrTy);
195            strmSourcePtr = iBuilder->CreateBitCast(strmSourcePtr, int8PtrTy);
196            iBuilder->CreateMemMove(strmTargetPtr, strmSourcePtr, partialCopyBytesPerStream, alignment);
197        }
198    }
199}
200
201// Source File Buffer
202Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
203    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
204    return iBuilder->CreateLoad(ptr);
205}
206
207void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * size) const {
208    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BufferedSize))});
209    iBuilder->CreateStore(size, ptr);
210}
211
212Value * SourceBuffer::getCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
213    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
214    return iBuilder->CreateLoad(ptr);
215}
216
217void SourceBuffer::setCapacity(IDISA::IDISA_Builder * const iBuilder, Value * self, llvm::Value * c) const {
218    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::Capacity))});
219    iBuilder->CreateStore(c, ptr);
220}
221
222void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * addr) const {
223    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
224    iBuilder->CreateStore(iBuilder->CreatePointerCast(addr, ptr->getType()->getPointerElementType()), ptr);
225}
226
227Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * const self) const {
228    iBuilder->CreateAssert(self, "SourceBuffer: instance cannot be null");
229    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
230    Value * const addr = iBuilder->CreateLoad(ptr);
231    iBuilder->CreateAssert(addr, "SourceBuffer: base address cannot be 0");
232    return addr;
233}
234
235Value * SourceBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
236    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
237}
238
239Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition) const {
240    return iBuilder->CreateSub(getCapacity(iBuilder, self), fromPosition);
241}
242
243
244// External File Buffer
245void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
246    report_fatal_error("External buffers cannot be allocated.");
247}
248
249Value * ExternalBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
250    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), blockIndex);
251}
252
253Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value *) const {
254    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
255}
256
257// Circular Buffer
258Value * CircularBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * const self, Value * const blockIndex) const {
259    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
260}
261
262Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
263    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
264    Value * relativePosition = iBuilder->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * iBuilder->getBitBlockWidth()));
265    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
266    if (bw < 8) {
267        assert (bw  == 1 || bw == 2 || bw == 4);
268        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
269        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
270    } else {
271        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
272    }
273    return iBuilder->CreateGEP(ptr, relativePosition);
274}
275
276// CircularCopybackBuffer Buffer
277void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
278    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
279}
280
281void CircularCopybackBuffer::createCopyBack(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * overFlowItems) const {
282    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
283    createBlockAlignedCopy(iBuilder, self, overFlowAreaPtr, overFlowItems);
284}
285
286Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition) const {
287    return iBuilder->CreateAdd(getLinearlyAccessibleItems(iBuilder, self, fromPosition), iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
288}
289
290Value * CircularCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock) const {
291    return iBuilder->CreateAdd(getLinearlyAccessibleBlocks(iBuilder, self, fromBlock), iBuilder->getSize(mOverflowBlocks));
292}
293
294// SwizzledCopybackBuffer Buffer
295
296void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
297    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
298}
299
300void SwizzledCopybackBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
301    Type * int8PtrTy = iBuilder->getInt8PtrTy();
302    DataLayout DL(iBuilder->getModule());
303    IntegerType * const intAddrTy = iBuilder->getIntPtrTy(DL);
304
305    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
306    Function * f = iBuilder->GetInsertBlock()->getParent();
307    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
308    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
309    BasicBlock * copyDone = BasicBlock::Create(iBuilder->getContext(), "copyDone", f, 0);
310    const unsigned numStreams = getType()->getArrayNumElements();
311    const unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
312    const auto elemTy = getType()->getArrayElementType();
313    const unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
314    Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
315    Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
316    Value * partialBlockTargetPtr = iBuilder->CreateGEP(targetBlockPtr, blocksToCopy);
317    Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
318    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(blocksToCopy, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
319
320    iBuilder->SetInsertPoint(wholeBlockCopy);
321    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
322    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, intAddrTy), iBuilder->CreatePtrToInt(targetBlockPtr, intAddrTy));
323    iBuilder->CreateMemMove(iBuilder->CreatePointerCast(targetBlockPtr, int8PtrTy), iBuilder->CreatePointerCast(sourceBlockPtr, int8PtrTy), copyLength, alignment);
324    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyDone);
325    iBuilder->SetInsertPoint(partialBlockCopy);
326    Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth * swizzleFactor));
327    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
328    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
329        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
330        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
331        iBuilder->CreateMemMove(iBuilder->CreatePointerCast(strmTargetPtr, int8PtrTy), iBuilder->CreatePointerCast(strmSourcePtr, int8PtrTy), copyBytes, alignment);
332    }
333    iBuilder->CreateBr(copyDone);
334
335    iBuilder->SetInsertPoint(copyDone);
336}
337
338void SwizzledCopybackBuffer::createCopyBack(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * overFlowItems) const {
339    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
340    createBlockAlignedCopy(iBuilder, self, overFlowAreaPtr, overFlowItems);
341}
342
343Value * SwizzledCopybackBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
344    return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
345}
346
347Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition) const {
348    return iBuilder->CreateAdd(getLinearlyAccessibleItems(iBuilder, self, fromPosition), iBuilder->getSize(mOverflowBlocks * iBuilder->getBitBlockWidth()));
349}
350
351Value * SwizzledCopybackBuffer::getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock) const {
352    return iBuilder->CreateAdd(getLinearlyAccessibleBlocks(iBuilder, self, fromBlock), iBuilder->getSize(mOverflowBlocks));
353}
354
355// Expandable Buffer
356
357void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
358    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType());
359    Value * const capacityPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
360    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
361    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
362    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
363    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
364    const auto alignment = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
365    Value * const ptr = iBuilder->CreateAlignedMalloc(size, alignment);
366    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
367    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
368    iBuilder->CreateStore(iBuilder->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
369}
370
371std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
372
373    // ENTRY
374    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
375    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
376    Value * const streamSetPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
377    Value * const streamSet = iBuilder->CreateLoad(streamSetPtr);
378    blockIndex = modByBufferBlocks(iBuilder, blockIndex);
379
380    assert (streamIndex->getType() == capacity->getType());
381    Value * const cond = iBuilder->CreateICmpULT(streamIndex, capacity);
382
383    // Are we guaranteed that we can access this stream?
384    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
385        iBuilder->CreateAssert(cond, "ExpandableBuffer: out-of-bounds stream access");
386        Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, capacity), streamIndex);
387        return {streamSet, offset};
388    }
389
390    BasicBlock * const entry = iBuilder->GetInsertBlock();
391    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
392    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
393
394    iBuilder->CreateLikelyCondBr(cond, resume, expand);
395
396    // EXPAND
397    iBuilder->SetInsertPoint(expand);
398
399    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
400    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
401
402    Value * newCapacity = iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1));
403    newCapacity = iBuilder->CreateCeilLog2(newCapacity);
404    newCapacity = iBuilder->CreateShl(iBuilder->getSize(1), newCapacity, "newCapacity");
405
406    std::string tmp;
407    raw_string_ostream out(tmp);
408    out << "__expand";
409    elementType->print(out);
410    std::string name = out.str();
411
412    Module * const m = iBuilder->getModule();
413    Function * expandFunction = m->getFunction(name);
414
415    if (expandFunction == nullptr) {
416
417        const auto ip = iBuilder->saveIP();
418
419        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), iBuilder->getSizeTy(), iBuilder->getSizeTy()}, false);
420        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
421
422        auto args = expandFunction->arg_begin();
423        Value * streamSet = &*args++;
424        Value * capacity = &*args++;
425        Value * newCapacity = &*args;
426
427        BasicBlock * entry = BasicBlock::Create(iBuilder->getContext(), "entry", expandFunction);
428        iBuilder->SetInsertPoint(entry);
429
430        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
431        const auto memAlign = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
432
433        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
434        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
435
436        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
437        for (unsigned i = 0; i < mBufferBlocks; ++i) {
438            ConstantInt * const offset = iBuilder->getSize(i);
439            Value * srcOffset = iBuilder->CreateMul(capacity, offset);
440            Value * srcPtr = iBuilder->CreateGEP(streamSet, srcOffset);
441            Value * destOffset = iBuilder->CreateMul(newCapacity, offset);
442            Value * destPtr = iBuilder->CreateGEP(newStreamSet, destOffset);
443            iBuilder->CreateMemCpy(destPtr, srcPtr, iBuilder->CreateMul(capacity, vectorWidth), alignment);
444            Value * destZeroOffset = iBuilder->CreateAdd(destOffset, capacity);
445            Value * destZeroPtr = iBuilder->CreateGEP(newStreamSet, destZeroOffset);
446            iBuilder->CreateMemZero(destZeroPtr, diffCapacity, alignment);
447        }
448
449        iBuilder->CreateFree(streamSet);
450
451        iBuilder->CreateRet(newStreamSet);
452
453        iBuilder->restoreIP(ip);
454    }
455
456    Value * newStreamSet = iBuilder->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
457    iBuilder->CreateStore(newStreamSet, streamSetPtr);
458    iBuilder->CreateStore(newCapacity, capacityPtr);
459
460    iBuilder->CreateBr(resume);
461
462    // RESUME
463    iBuilder->SetInsertPoint(resume);
464
465    PHINode * phiStreamSet = iBuilder->CreatePHI(streamSet->getType(), 2);
466    phiStreamSet->addIncoming(streamSet, entry);
467    phiStreamSet->addIncoming(newStreamSet, expand);
468
469    PHINode * phiCapacity = iBuilder->CreatePHI(capacity->getType(), 2);
470    phiCapacity->addIncoming(capacity, entry);
471    phiCapacity->addIncoming(newCapacity, expand);
472
473    Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, phiCapacity), streamIndex);
474
475    return {phiStreamSet, offset};
476}
477
478Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
479    Value * ptr, * offset;
480    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
481    return iBuilder->CreateGEP(ptr, offset);
482}
483
484Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
485    Value * ptr, * offset;
486    std::tie(ptr, offset) = getInternalStreamBuffer(iBuilder, self, streamIndex, blockIndex, readOnly);
487    return iBuilder->CreateGEP(ptr, {offset, packIndex});
488}
489
490Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
491    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
492}
493
494Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
495    iBuilder->CreateAssert(self, "ExpandableBuffer: instance cannot be null");
496    Value * const baseAddr = iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
497    iBuilder->CreateAssert(self, "ExpandableBuffer: base address cannot be 0");
498    return baseAddr;
499}
500
501void ExpandableBuffer::releaseBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
502    iBuilder->CreateFree(getBaseAddress(iBuilder, self));
503}
504
505Value * ExpandableBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value *, Value *) const {
506    report_fatal_error("Expandable buffers: getStreamSetBlockPtr is not supported.");
507}
508
509Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value *) const {
510    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
511}
512
513SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
514: StreamSetBuffer(BufferKind::SourceBuffer, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), b->getSizeTy(), nullptr), 0, StructAddressSpace) {
515    mUniqueID = "B";
516    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
517        mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
518    }
519}
520
521ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, llvm::Value * addr, unsigned AddressSpace)
522: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, AddressSpace) {
523    mUniqueID = "E";
524    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
525    mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
526}
527
528CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
529: StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
530    mUniqueID = "C" + std::to_string(bufferBlocks);
531    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
532}
533
534CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
535: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
536
537}
538
539CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
540: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, AddressSpace)
541, mOverflowBlocks(overflowBlocks) {
542    mUniqueID = "CC" + std::to_string(bufferBlocks);
543    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
544    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
545}
546
547ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
548: StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
549, mInitialCapacity(type->getArrayNumElements()) {
550    mUniqueID = "XP" + std::to_string(bufferBlocks);
551    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
552}
553
554SwizzledCopybackBuffer::SwizzledCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
555: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
556    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
557    if (mOverflowBlocks != 1) {
558        mUniqueID += "_" + std::to_string(mOverflowBlocks);
559    }
560    if (AddressSpace > 0) {
561        mUniqueID += "@" + std::to_string(AddressSpace);
562    }
563}
564
565inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned AddressSpace)
566: mBufferKind(k)
567, mType(resolvedType)
568, mBufferBlocks(BufferBlocks)
569, mAddressSpace(AddressSpace)
570, mStreamSetBufferPtr(nullptr)
571, mBaseType(baseType)
572, mProducer(nullptr) {
573
574}
575
576StreamSetBuffer::~StreamSetBuffer() { }
577
578// Helper routines
579ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
580    unsigned numElements = 1;
581    if (LLVM_LIKELY(type->isArrayTy())) {
582        numElements = type->getArrayNumElements();
583        type = type->getArrayElementType();
584    }
585    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
586        type = type->getVectorElementType();
587        if (LLVM_LIKELY(type->isIntegerTy())) {
588            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
589            type = b->getBitBlockType();
590            if (fieldWidth != 1) {
591                type = ArrayType::get(type, fieldWidth);
592            }
593            return ArrayType::get(type, numElements);
594        }
595    }
596    std::string tmp;
597    raw_string_ostream out(tmp);
598    type->print(out);
599    out << " is an unvalid stream set buffer type.";
600    report_fatal_error(out.str());
601}
602
603StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
604    if (LLVM_LIKELY(type->isArrayTy())) {
605        type = type->getArrayElementType();
606    }
607    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
608        type = type->getVectorElementType();
609        if (LLVM_LIKELY(type->isIntegerTy())) {
610            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
611            type = b->getBitBlockType();
612            if (fieldWidth != 1) {
613                type = ArrayType::get(type, fieldWidth);
614            }
615            return StructType::get(b->getSizeTy(), type->getPointerTo(), nullptr);
616        }
617    }
618    std::string tmp;
619    raw_string_ostream out(tmp);
620    type->print(out);
621    out << " is an unvalid stream set buffer type.";
622    report_fatal_error(out.str());
623}
Note: See TracBrowser for help on using the repository browser.