source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5413

Last change on this file since 5413 was 5411, checked in by nmedfort, 2 years ago

Potential bug fix for 32-bit. Modified MRemap to check for Linux OS support. Added MMapAdvise to CBuilder.

File size: 33.5 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <IR_Gen/idisa_builder.h>  // for IDISA_Builder
8#include <llvm/IR/BasicBlock.h>    // for BasicBlock
9#include <llvm/IR/Constants.h>     // for ConstantInt
10#include <llvm/IR/DataLayout.h>    // for DataLayout
11#include <llvm/IR/DerivedTypes.h>  // for IntegerType (ptr only), PointerType
12#include <llvm/IR/Module.h>        // for Module
13#include <llvm/IR/Value.h>         // for Value
14#include <llvm/Support/raw_ostream.h>
15#include <llvm/IR/CFG.h>
16#include <kernels/kernel.h>
17#include <kernels/toolchain.h>
18
19namespace llvm { class Constant; }
20namespace llvm { class Function; }
21
22using namespace parabix;
23using namespace llvm;
24using namespace IDISA;
25
26ArrayType * resolveStreamSetType(IDISA_Builder * const b, Type * type);
27
28StructType * resolveExpandableStreamSetType(IDISA_Builder * const b, Type * type);
29
30void StreamSetBuffer::allocateBuffer() {
31    Type * const ty = getType();
32    ConstantInt * blocks = iBuilder->getSize(mBufferBlocks);
33    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
34    Constant * width = ConstantExpr::getMul(ConstantExpr::getSizeOf(ty), blocks);
35    iBuilder->CreateMemZero(mStreamSetBufferPtr, width, iBuilder->getCacheAlignment());
36}
37
38Value * StreamSetBuffer::getStreamBlockPtr(Value * self, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
39    iBuilder->CreateAssert(iBuilder->CreateICmpULT(streamIndex, getStreamSetCount(self)), "StreamSetBuffer: out-of-bounds stream access");
40    return iBuilder->CreateGEP(getStreamSetBlockPtr(getBaseAddress(self), blockIndex), {iBuilder->getInt32(0), streamIndex});
41}
42
43Value * StreamSetBuffer::getStreamPackPtr(Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
44    iBuilder->CreateAssert(iBuilder->CreateICmpULT(streamIndex, getStreamSetCount(self)), "StreamSetBuffer: out-of-bounds stream access");
45    return iBuilder->CreateGEP(getStreamSetBlockPtr(getBaseAddress(self), blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
46}
47
48void StreamSetBuffer::setBaseAddress(Value * /* self */, Value * /* addr */) const {
49    report_fatal_error("setBaseAddress is not supported by this buffer type");
50}
51
52Value * StreamSetBuffer::getBufferedSize(Value * /* self */) const {
53    report_fatal_error("getBufferedSize is not supported by this buffer type");
54}
55
56void StreamSetBuffer::setBufferedSize(Value * /* self */, llvm::Value * /* size */) const {
57    report_fatal_error("setBufferedSize is not supported by this buffer type");
58}
59
60inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
61    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
62        if (LLVM_LIKELY(cast<ConstantInt>(index)->getLimitedValue() < capacity)) {
63            return true;
64        }
65    }
66    return false;
67}
68
69Value * StreamSetBuffer::getStreamSetCount(Value *) const {
70    uint64_t count = 1;
71    if (isa<ArrayType>(mBaseType)) {
72        count = mBaseType->getArrayNumElements();
73    }
74    return iBuilder->getSize(count);
75}
76
77inline Value * StreamSetBuffer::modByBufferBlocks(Value * const offset) const {
78    assert (offset->getType()->isIntegerTy());
79    if (isCapacityGuaranteed(offset, mBufferBlocks)) {
80        return offset;
81    } else if (mBufferBlocks == 1) {
82        return ConstantInt::getNullValue(iBuilder->getSizeTy());
83    } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
84        return iBuilder->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
85    } else {
86        return iBuilder->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
87    }
88}
89
90/**
91 * @brief getRawItemPointer
92 *
93 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
94 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
95 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
96 */
97Value * StreamSetBuffer::getRawItemPointer(Value * self, Value * streamIndex, Value * absolutePosition) const {
98    Value * ptr = getBaseAddress(self);
99    if (!isa<ConstantInt>(streamIndex) || !cast<ConstantInt>(streamIndex)->isZero()) {
100        ptr = iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), streamIndex});
101    }
102    IntegerType * const ty = cast<IntegerType>(mBaseType->getArrayElementType()->getVectorElementType());
103    ptr = iBuilder->CreatePointerCast(ptr, ty->getPointerTo());
104    if (LLVM_UNLIKELY(ty->getBitWidth() < 8)) {
105        const auto bw = ty->getBitWidth();
106        if (LLVM_LIKELY((bw & (bw - 1)) == 0)) { // is power of 2
107            absolutePosition = iBuilder->CreateUDiv(absolutePosition, ConstantInt::get(absolutePosition->getType(), 8 / bw));
108        } else {
109            absolutePosition = iBuilder->CreateMul(absolutePosition, ConstantInt::get(absolutePosition->getType(), bw));
110            absolutePosition = iBuilder->CreateUDiv(absolutePosition, ConstantInt::get(absolutePosition->getType(), 8));
111        }
112    }
113    return iBuilder->CreateGEP(ptr, absolutePosition);
114}
115
116Value * StreamSetBuffer::getLinearlyAccessibleItems(Value * self, Value * fromPosition) const {
117    if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
118        Constant * stride = iBuilder->getSize(iBuilder->getStride());
119        return iBuilder->CreateSub(stride, iBuilder->CreateURem(fromPosition, stride));
120    } else {
121        Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
122        return iBuilder->CreateSub(bufSize, iBuilder->CreateURem(fromPosition, bufSize));
123    }
124}
125
126Value * StreamSetBuffer::getLinearlyAccessibleBlocks(Value * self, Value * fromBlock) const {
127    Constant * bufBlocks = iBuilder->getSize(mBufferBlocks);
128    return iBuilder->CreateSub(bufBlocks, iBuilder->CreateURem(fromBlock, bufBlocks));
129}
130
131void StreamSetBuffer::reserveBytes(Value * self, llvm::Value *requested) const {
132    report_fatal_error("reserve() can only be used with ExtensibleBuffers");
133}
134
135Value * StreamSetBuffer::getBaseAddress(Value * self) const {
136    return self;
137}
138
139void StreamSetBuffer::releaseBuffer(Value * /* self */) const {
140    /* do nothing: memory is stack allocated */
141}
142
143// Single Block Buffer
144
145// For a single block buffer, the block pointer is always the buffer base pointer.
146Value * SingleBlockBuffer::getStreamSetBlockPtr(Value * self, Value *) const {
147    return self;
148}
149
150// External File Buffer
151void ExternalFileBuffer::setStreamSetBuffer(Value * ptr) {
152    mStreamSetBufferPtr = iBuilder->CreatePointerBitCastOrAddrSpaceCast(ptr, getPointerType());
153}
154
155void ExternalFileBuffer::allocateBuffer() {
156    report_fatal_error("External buffers cannot be allocated.");
157}
158
159Value * ExternalFileBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
160    return iBuilder->CreateGEP(self, blockIndex);
161}
162
163Value * ExternalFileBuffer::getLinearlyAccessibleItems(Value * self, Value *) const {
164    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
165}
166
167// Source File Buffer
168Value * SourceFileBuffer::getBufferedSize(Value * self) const {
169    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
170    return iBuilder->CreateLoad(ptr);
171}
172
173void SourceFileBuffer::setBufferedSize(Value * self, llvm::Value * size) const {
174    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
175    iBuilder->CreateStore(size, ptr);
176}
177
178void SourceFileBuffer::setBaseAddress(Value * self, Value * addr) const {
179    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
180    iBuilder->CreateStore(addr, ptr);
181}
182
183Value * SourceFileBuffer::getBaseAddress(Value * const self) const {
184    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
185    return iBuilder->CreateLoad(ptr);
186}
187
188Value * SourceFileBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
189    return iBuilder->CreateGEP(self, blockIndex);
190}
191
192Value * SourceFileBuffer::getLinearlyAccessibleItems(Value * self, Value *) const {
193    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
194}
195
196// ExtensibleBuffer
197Value * ExtensibleBuffer::getLinearlyAccessibleItems(Value * self, Value * fromPosition) const {
198    Value * capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
199    Value * capacity = iBuilder->CreateLoad(capacityPtr);
200    return iBuilder->CreateSub(capacity, fromPosition);
201}
202
203Value * ExtensibleBuffer::roundUpToPageSize(Value * const value) const {
204    const auto pageSize = getpagesize();
205    assert ((pageSize & (pageSize - 1)) == 0);
206    Constant * const pageMask = ConstantInt::get(value->getType(), pageSize - 1);
207    return iBuilder->CreateAnd(iBuilder->CreateAdd(value, pageMask), iBuilder->CreateNot(pageMask));
208}
209
210void ExtensibleBuffer::allocateBuffer() {
211    Type * ty = getType();
212    Value * instance = iBuilder->CreateCacheAlignedAlloca(ty);
213    Value * const capacityPtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
214
215    Type * const elementType = ty->getStructElementType(1)->getPointerElementType();
216    Constant * size = ConstantExpr::getSizeOf(elementType);
217    size = ConstantExpr::getMul(size, iBuilder->getSize(mBufferBlocks));
218    size = ConstantExpr::getIntegerCast(size, iBuilder->getSizeTy(), false);
219    Value * const initialSize = roundUpToPageSize(size);
220
221    iBuilder->CreateStore(initialSize, capacityPtr);
222    Value * addr = iBuilder->CreateAnonymousMMap(size);
223    Value * const addrPtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
224    addr = iBuilder->CreatePointerCast(addr, addrPtr->getType()->getPointerElementType());
225    iBuilder->CreateStore(addr, addrPtr);
226    Value * const bufferSizePtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
227    iBuilder->CreateStore(ConstantInt::getNullValue(bufferSizePtr->getType()->getPointerElementType()), bufferSizePtr);
228    mStreamSetBufferPtr = instance;
229}
230
231Value * ExtensibleBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
232    return iBuilder->CreateGEP(self, blockIndex);
233}
234
235void ExtensibleBuffer::reserveBytes(Value * const self, llvm::Value * const requiredSize) const {
236
237    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
238    Value * const currentSize = iBuilder->CreateLoad(capacityPtr);
239    BasicBlock * const entry = iBuilder->GetInsertBlock();
240    Function * const parent = entry->getParent();
241    IntegerType * const sizeTy = iBuilder->getSizeTy();
242    ConstantInt * const zero = iBuilder->getInt32(0);
243    ConstantInt * const one = iBuilder->getInt32(1);
244
245    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", parent);
246    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", parent);
247
248    Value * noExpansionNeeded = iBuilder->CreateICmpULT(requiredSize, currentSize);
249
250    kernel::KernelBuilder * const kernel = getProducer();
251    auto consumers = kernel->getStreamOutputs();
252    if (consumers.empty()) {
253        iBuilder->CreateLikelyCondBr(noExpansionNeeded, resume, expand);
254    } else { // we cannot risk expanding this buffer until all of the consumers have finished reading the data
255
256        ConstantInt * const size0 = iBuilder->getSize(0);
257        Value * const segNo = kernel->acquireLogicalSegmentNo();
258        const auto n = consumers.size();
259
260        BasicBlock * load[n + 1];
261        BasicBlock * wait[n];
262        for (unsigned i = 0; i < n; ++i) {
263            load[i] = BasicBlock::Create(iBuilder->getContext(), consumers[i].name + "Load", parent);
264            wait[i] = BasicBlock::Create(iBuilder->getContext(), consumers[i].name + "Wait", parent);
265        }
266        load[n] = expand;
267        iBuilder->CreateLikelyCondBr(noExpansionNeeded, resume, load[0]);
268
269        for (unsigned i = 0; i < n; ++i) {
270
271            iBuilder->SetInsertPoint(load[i]);
272            Value * const outputConsumers = kernel->getConsumerState(consumers[i].name);
273
274            Value * const consumerCount = iBuilder->CreateLoad(iBuilder->CreateGEP(outputConsumers, {zero, zero}));
275            Value * const consumerPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(outputConsumers, {zero, one}));
276            Value * const noConsumers = iBuilder->CreateICmpEQ(consumerCount, size0);
277            iBuilder->CreateUnlikelyCondBr(noConsumers, load[i + 1], wait[i]);
278
279            iBuilder->SetInsertPoint(wait[i]);
280            PHINode * const consumerPhi = iBuilder->CreatePHI(sizeTy, 2);
281            consumerPhi->addIncoming(size0, load[i]);
282
283            Value * const conSegPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(consumerPtr, consumerPhi));
284            Value * const processedSegmentCount = iBuilder->CreateAtomicLoadAcquire(conSegPtr);
285            Value * const ready = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
286            assert (ready->getType() == iBuilder->getInt1Ty());
287            Value * const nextConsumerIdx = iBuilder->CreateAdd(consumerPhi, iBuilder->CreateZExt(ready, sizeTy));
288            consumerPhi->addIncoming(nextConsumerIdx, wait[i]);
289            Value * const next = iBuilder->CreateICmpEQ(nextConsumerIdx, consumerCount);
290            iBuilder->CreateCondBr(next, load[i + 1], wait[i]);
291
292        }
293        expand->moveAfter(wait[n - 1]);
294        resume->moveAfter(expand);
295    }
296    iBuilder->SetInsertPoint(expand);
297    Value * const reservedSize = roundUpToPageSize(iBuilder->CreateShl(requiredSize, 1));
298    Value * const baseAddrPtr = iBuilder->CreateGEP(self, {zero, one});
299
300    Value * const baseAddr = iBuilder->CreateLoad(baseAddrPtr);
301    Value * newAddr = iBuilder->CreateMRemap(baseAddr, currentSize, reservedSize);
302    newAddr = iBuilder->CreatePointerCast(newAddr, baseAddr->getType());
303    iBuilder->CreateStore(newAddr, baseAddrPtr);
304    iBuilder->CreateStore(reservedSize, capacityPtr);
305    iBuilder->CreateBr(resume);
306    iBuilder->SetInsertPoint(resume);
307}
308
309Value * ExtensibleBuffer::getBufferedSize(Value * self) const {
310    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
311    return iBuilder->CreateLoad(ptr);
312}
313
314void ExtensibleBuffer::setBufferedSize(Value * self, llvm::Value * size) const {
315    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
316    iBuilder->CreateStore(size, ptr);
317}
318
319Value * ExtensibleBuffer::getBaseAddress(Value * const self) const {
320    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
321    return iBuilder->CreateLoad(ptr);
322}
323
324void ExtensibleBuffer::releaseBuffer(Value * self) const {
325    Value * const sizePtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
326    Value * size = iBuilder->CreateLoad(sizePtr);
327    iBuilder->CreateMUnmap(getBaseAddress(self), size);
328}
329
330// Circular Buffer
331
332Value * CircularBuffer::getStreamSetBlockPtr(Value * const self, Value * const blockIndex) const {
333    return iBuilder->CreateGEP(self, modByBufferBlocks(blockIndex));
334}
335
336// CircularCopybackBuffer Buffer
337
338void CircularCopybackBuffer::allocateBuffer() {
339    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
340}
341
342void CircularCopybackBuffer::createCopyBack(Value * self, Value * overFlowItems) const {
343    Type * size_ty = iBuilder->getSizeTy();
344    Type * i8ptr = iBuilder->getInt8PtrTy();
345    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
346    Function * f = iBuilder->GetInsertBlock()->getParent();
347    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
348    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
349    BasicBlock * copyBackDone = BasicBlock::Create(iBuilder->getContext(), "copyBackDone", f, 0);
350    unsigned numStreams = getType()->getArrayNumElements();
351    auto elemTy = getType()->getArrayElementType();
352    unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
353    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
354    Value * overFlowBlocks = iBuilder->CreateUDiv(overFlowItems, blockSize);
355    Value * partialItems = iBuilder->CreateURem(overFlowItems, blockSize);
356    Value * partialBlockTargetPtr = iBuilder->CreateGEP(self, overFlowBlocks);
357    Value * partialBlockSourcePtr = iBuilder->CreateGEP(overFlowAreaPtr, overFlowBlocks);
358    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(overFlowBlocks, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
359    iBuilder->SetInsertPoint(wholeBlockCopy);
360    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
361    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, size_ty), iBuilder->CreatePtrToInt(self, size_ty));
362    iBuilder->CreateMemMove(iBuilder->CreateBitCast(self, i8ptr), iBuilder->CreateBitCast(overFlowAreaPtr, i8ptr), copyLength, alignment);
363    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyBackDone);
364    iBuilder->SetInsertPoint(partialBlockCopy);
365    Value * copyBits = iBuilder->CreateMul(overFlowItems, iBuilder->getSize(fieldWidth));
366    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
367    for (unsigned strm = 0; strm < numStreams; strm++) {
368        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
369        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
370        iBuilder->CreateMemMove(iBuilder->CreateBitCast(strmTargetPtr, i8ptr), iBuilder->CreateBitCast(strmSourcePtr, i8ptr), copyBytes, alignment);
371    }
372    iBuilder->CreateBr(copyBackDone);
373    iBuilder->SetInsertPoint(copyBackDone);
374}
375
376Value * CircularCopybackBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
377    return iBuilder->CreateGEP(self, modByBufferBlocks(blockIndex));
378}
379
380// SwizzledCopybackBuffer Buffer
381
382void SwizzledCopybackBuffer::allocateBuffer() {
383    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
384}
385
386void SwizzledCopybackBuffer::createCopyBack(Value * self, Value * overFlowItems) const {
387    Type * size_ty = iBuilder->getSizeTy();
388    Type * i8ptr = iBuilder->getInt8PtrTy();
389    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
390    Function * f = iBuilder->GetInsertBlock()->getParent();
391    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
392    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
393    BasicBlock * copyBackDone = BasicBlock::Create(iBuilder->getContext(), "copyBackDone", f, 0);
394    unsigned numStreams = getType()->getArrayNumElements();
395    unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
396    auto elemTy = getType()->getArrayElementType();
397    unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
398    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
399    Value * overFlowBlocks = iBuilder->CreateUDiv(overFlowItems, blockSize);
400    Value * partialItems = iBuilder->CreateURem(overFlowItems, blockSize);
401    Value * partialBlockTargetPtr = iBuilder->CreateGEP(self, overFlowBlocks);
402    Value * partialBlockSourcePtr = iBuilder->CreateGEP(overFlowAreaPtr, overFlowBlocks);
403    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(overFlowBlocks, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
404    iBuilder->SetInsertPoint(wholeBlockCopy);
405    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
406    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, size_ty), iBuilder->CreatePtrToInt(self, size_ty));
407    iBuilder->CreateMemMove(iBuilder->CreateBitCast(self, i8ptr), iBuilder->CreateBitCast(overFlowAreaPtr, i8ptr), copyLength, alignment);
408    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyBackDone);
409    iBuilder->SetInsertPoint(partialBlockCopy);
410    Value * copyBits = iBuilder->CreateMul(overFlowItems, iBuilder->getSize(fieldWidth * swizzleFactor));
411    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
412    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
413        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
414        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
415        iBuilder->CreateMemMove(iBuilder->CreateBitCast(strmTargetPtr, i8ptr), iBuilder->CreateBitCast(strmSourcePtr, i8ptr), copyBytes, alignment);
416    }
417    iBuilder->CreateBr(copyBackDone);
418    iBuilder->SetInsertPoint(copyBackDone);
419}
420
421Value * SwizzledCopybackBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
422    return iBuilder->CreateGEP(self, modByBufferBlocks(blockIndex));
423}
424
425SwizzledCopybackBuffer::SwizzledCopybackBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
426: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
427    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
428    if (mOverflowBlocks != 1) {
429        mUniqueID += "_" + std::to_string(mOverflowBlocks);
430    }
431    if (AddressSpace > 0) {
432        mUniqueID += "@" + std::to_string(AddressSpace);
433    }
434}
435
436// Expandable Buffer
437
438void ExpandableBuffer::allocateBuffer() {
439    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType());
440    Value * const capacityPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
441    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
442    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
443    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
444    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
445    Value * const ptr = iBuilder->CreateAlignedMalloc(size, iBuilder->getCacheAlignment());
446    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
447    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
448    iBuilder->CreateStore(iBuilder->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
449}
450
451std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
452
453    // ENTRY
454    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
455    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
456    Value * const streamSetPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
457    Value * const streamSet = iBuilder->CreateLoad(streamSetPtr);
458    blockIndex = modByBufferBlocks(blockIndex);
459
460    assert (streamIndex->getType() == capacity->getType());
461    Value * const cond = iBuilder->CreateICmpULT(streamIndex, capacity);
462
463    // Are we guaranteed that we can access this stream?
464    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
465        iBuilder->CreateAssert(cond, "ExpandableBuffer: out-of-bounds stream access");
466        Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, capacity), streamIndex);
467        return {streamSet, offset};
468    }
469
470    BasicBlock * const entry = iBuilder->GetInsertBlock();
471    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
472    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
473
474    iBuilder->CreateLikelyCondBr(cond, resume, expand);
475
476    // EXPAND
477    iBuilder->SetInsertPoint(expand);
478
479    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
480    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
481
482    Value * newCapacity = iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1));
483    newCapacity = iBuilder->CreateCeilLog2(newCapacity);
484    newCapacity = iBuilder->CreateShl(iBuilder->getSize(1), newCapacity, "newCapacity");
485
486    std::string tmp;
487    raw_string_ostream out(tmp);
488    out << "__expand";
489    elementType->print(out);
490    std::string name = out.str();
491
492    Module * const m = iBuilder->getModule();
493    Function * expandFunction = m->getFunction(name);
494
495    if (expandFunction == nullptr) {
496
497        const auto ip = iBuilder->saveIP();
498
499        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), iBuilder->getSizeTy(), iBuilder->getSizeTy()}, false);
500        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
501
502        auto args = expandFunction->arg_begin();
503        Value * streamSet = &*args++;
504        Value * capacity = &*args++;
505        Value * newCapacity = &*args;
506
507        BasicBlock * entry = BasicBlock::Create(iBuilder->getContext(), "entry", expandFunction);
508        iBuilder->SetInsertPoint(entry);
509
510        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
511        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), iBuilder->getCacheAlignment()), elementType->getPointerTo());
512        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
513
514        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
515        for (unsigned i = 0; i < mBufferBlocks; ++i) {
516            ConstantInt * const offset = iBuilder->getSize(i);
517            Value * srcOffset = iBuilder->CreateMul(capacity, offset);
518            Value * srcPtr = iBuilder->CreateGEP(streamSet, srcOffset);
519            Value * destOffset = iBuilder->CreateMul(newCapacity, offset);
520            Value * destPtr = iBuilder->CreateGEP(newStreamSet, destOffset);
521            iBuilder->CreateMemCpy(destPtr, srcPtr, iBuilder->CreateMul(capacity, vectorWidth), alignment);
522            Value * destZeroOffset = iBuilder->CreateAdd(destOffset, capacity);
523            Value * destZeroPtr = iBuilder->CreateGEP(newStreamSet, destZeroOffset);
524            iBuilder->CreateMemZero(destZeroPtr, diffCapacity, alignment);
525        }
526
527        iBuilder->CreateAlignedFree(streamSet);
528
529        iBuilder->CreateRet(newStreamSet);
530
531        iBuilder->restoreIP(ip);
532    }
533
534    Value * newStreamSet = iBuilder->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
535    iBuilder->CreateStore(newStreamSet, streamSetPtr);
536    iBuilder->CreateStore(newCapacity, capacityPtr);
537
538    iBuilder->CreateBr(resume);
539
540    // RESUME
541    iBuilder->SetInsertPoint(resume);
542
543    PHINode * phiStreamSet = iBuilder->CreatePHI(streamSet->getType(), 2);
544    phiStreamSet->addIncoming(streamSet, entry);
545    phiStreamSet->addIncoming(newStreamSet, expand);
546
547    PHINode * phiCapacity = iBuilder->CreatePHI(capacity->getType(), 2);
548    phiCapacity->addIncoming(capacity, entry);
549    phiCapacity->addIncoming(newCapacity, expand);
550
551    Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, phiCapacity), streamIndex);
552
553    return {phiStreamSet, offset};
554}
555
556Value * ExpandableBuffer::getStreamBlockPtr(Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
557    Value * ptr, * offset;
558    std::tie(ptr, offset) = getInternalStreamBuffer(self, streamIndex, blockIndex, readOnly);
559    return iBuilder->CreateGEP(ptr, offset);
560}
561
562Value * ExpandableBuffer::getStreamPackPtr(Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
563    Value * ptr, * offset;
564    std::tie(ptr, offset) = getInternalStreamBuffer(self, streamIndex, blockIndex, readOnly);
565    return iBuilder->CreateGEP(ptr, {offset, packIndex});
566}
567
568Value * ExpandableBuffer::getStreamSetCount(Value * self) const {
569    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
570}
571
572Value * ExpandableBuffer::getBaseAddress(Value * self) const {
573    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
574}
575
576void ExpandableBuffer::releaseBuffer(Value * self) const {
577    iBuilder->CreateAlignedFree(getBaseAddress(self));
578}
579
580Value * ExpandableBuffer::getStreamSetBlockPtr(Value *, Value *) const {
581    report_fatal_error("Expandable buffers: getStreamSetBlockPtr is not supported.");
582}
583
584Value * ExpandableBuffer::getLinearlyAccessibleItems(Value * self, Value *) const {
585    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
586}
587
588// Constructors
589SingleBlockBuffer::SingleBlockBuffer(IDISA::IDISA_Builder * b, Type * type)
590: StreamSetBuffer(BufferKind::BlockBuffer, b, type, resolveStreamSetType(b, type), 1, 0) {
591    mUniqueID = "S";
592
593}
594
595ExternalFileBuffer::ExternalFileBuffer(IDISA::IDISA_Builder * b, Type * type, unsigned AddressSpace)
596: StreamSetBuffer(BufferKind::ExternalFileBuffer, b, type, resolveStreamSetType(b, type), 0, AddressSpace) {
597    mUniqueID = "E";
598    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
599}
600
601SourceFileBuffer::SourceFileBuffer(IDISA::IDISA_Builder * b, Type * type, unsigned AddressSpace)
602: StreamSetBuffer(BufferKind::SourceFileBuffer, b, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(), b->getSizeTy(), nullptr), 0, AddressSpace) {
603
604}
605
606ExtensibleBuffer::ExtensibleBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
607: StreamSetBuffer(BufferKind::ExtensibleBuffer, b, type, StructType::get(b->getSizeTy(), resolveStreamSetType(b, type)->getPointerTo(), b->getSizeTy(), nullptr), bufferBlocks, AddressSpace) {
608    mUniqueID = "XT" + std::to_string(bufferBlocks);
609    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
610}
611
612CircularBuffer::CircularBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
613: StreamSetBuffer(BufferKind::CircularBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
614    mUniqueID = "C" + std::to_string(bufferBlocks);
615    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
616
617}
618
619CircularCopybackBuffer::CircularCopybackBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
620: StreamSetBuffer(BufferKind::CircularCopybackBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks) {
621    mUniqueID = "CC" + std::to_string(bufferBlocks);
622    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
623    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
624}
625
626ExpandableBuffer::ExpandableBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
627: StreamSetBuffer(BufferKind::ExpandableBuffer, b, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
628, mInitialCapacity(type->getArrayNumElements()) {
629    mUniqueID = "XP" + std::to_string(bufferBlocks);
630    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
631}
632
633inline StreamSetBuffer::StreamSetBuffer(BufferKind k, IDISA::IDISA_Builder * b, Type * baseType, Type * resolvedType, unsigned blocks, unsigned AddressSpace)
634: mBufferKind(k)
635, iBuilder(b)
636, mType(resolvedType)
637, mBufferBlocks(blocks)
638, mAddressSpace(AddressSpace)
639, mStreamSetBufferPtr(nullptr)
640, mBaseType(baseType)
641, mProducer(nullptr) {
642
643}
644
645StreamSetBuffer::~StreamSetBuffer() { }
646
647// Helper routines
648ArrayType * resolveStreamSetType(IDISA_Builder * const b, Type * type) {
649    unsigned numElements = 1;
650    if (LLVM_LIKELY(type->isArrayTy())) {
651        numElements = type->getArrayNumElements();
652        type = type->getArrayElementType();
653    }
654    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
655        type = type->getVectorElementType();
656        if (LLVM_LIKELY(type->isIntegerTy())) {
657            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
658            type = b->getBitBlockType();
659            if (fieldWidth != 1) {
660                type = ArrayType::get(type, fieldWidth);
661            }
662            return ArrayType::get(type, numElements);
663        }
664    }
665    std::string tmp;
666    raw_string_ostream out(tmp);
667    type->print(out);
668    out << " is an unvalid stream set buffer type.";
669    report_fatal_error(out.str());
670}
671
672StructType * resolveExpandableStreamSetType(IDISA_Builder * const b, Type * type) {
673    if (LLVM_LIKELY(type->isArrayTy())) {
674        type = type->getArrayElementType();
675    }
676    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
677        type = type->getVectorElementType();
678        if (LLVM_LIKELY(type->isIntegerTy())) {
679            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
680            type = b->getBitBlockType();
681            if (fieldWidth != 1) {
682                type = ArrayType::get(type, fieldWidth);
683            }
684            return StructType::get(b->getSizeTy(), type->getPointerTo(), nullptr);
685        }
686    }
687    std::string tmp;
688    raw_string_ostream out(tmp);
689    type->print(out);
690    out << " is an unvalid stream set buffer type.";
691    report_fatal_error(out.str());
692}
Note: See TracBrowser for help on using the repository browser.