source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp @ 5403

Last change on this file since 5403 was 5398, checked in by nmedfort, 2 years ago

Continued work on processing stdin input. Partial integration of ParabixDriver? methods into icgrep and editd. Object cache does not currently work for recursive REs.

File size: 31.2 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <IR_Gen/idisa_builder.h>  // for IDISA_Builder
8#include <llvm/IR/BasicBlock.h>    // for BasicBlock
9#include <llvm/IR/Constants.h>     // for ConstantInt
10#include <llvm/IR/DataLayout.h>    // for DataLayout
11#include <llvm/IR/DerivedTypes.h>  // for IntegerType (ptr only), PointerType
12#include <llvm/IR/Module.h>        // for Module
13#include <llvm/IR/Value.h>         // for Value
14#include <llvm/Support/raw_ostream.h>
15#include <llvm/IR/CFG.h>
16
17namespace llvm { class Constant; }
18namespace llvm { class Function; }
19
20using namespace parabix;
21using namespace llvm;
22using namespace IDISA;
23
24ArrayType * resolveStreamSetType(IDISA_Builder * const b, Type * type);
25
26StructType * resolveExpandableStreamSetType(IDISA_Builder * const b, Type * type);
27
28void StreamSetBuffer::allocateBuffer() {
29    Type * const ty = getType();
30    ConstantInt * blocks = iBuilder->getSize(mBufferBlocks);
31    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(ty, iBuilder->getSize(mBufferBlocks));
32    Constant * width = ConstantExpr::getMul(ConstantExpr::getSizeOf(ty), blocks);
33    iBuilder->CreateMemZero(mStreamSetBufferPtr, width, iBuilder->getCacheAlignment());
34}
35
36Value * StreamSetBuffer::getStreamBlockPtr(Value * self, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
37    iBuilder->CreateAssert(iBuilder->CreateICmpULT(streamIndex, getStreamSetCount(self)), "StreamSetBuffer: out-of-bounds stream access");
38    return iBuilder->CreateGEP(getStreamSetBlockPtr(getBaseAddress(self), blockIndex), {iBuilder->getInt32(0), streamIndex});
39}
40
41Value * StreamSetBuffer::getStreamPackPtr(Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
42    iBuilder->CreateAssert(iBuilder->CreateICmpULT(streamIndex, getStreamSetCount(self)), "StreamSetBuffer: out-of-bounds stream access");
43    return iBuilder->CreateGEP(getStreamSetBlockPtr(getBaseAddress(self), blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
44}
45
46void StreamSetBuffer::setBaseAddress(Value * /* self */, Value * /* addr */) const {
47    report_fatal_error("setBaseAddress is not supported by this buffer type");
48}
49
50Value * StreamSetBuffer::getBufferedSize(Value * /* self */) const {
51    report_fatal_error("getBufferedSize is not supported by this buffer type");
52}
53
54void StreamSetBuffer::setBufferedSize(Value * /* self */, llvm::Value * /* size */) const {
55    report_fatal_error("setBufferedSize is not supported by this buffer type");
56}
57
58inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
59    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
60        if (LLVM_LIKELY(cast<ConstantInt>(index)->getLimitedValue() < capacity)) {
61            return true;
62        }
63    }
64    return false;
65}
66
67Value * StreamSetBuffer::getStreamSetCount(Value *) const {
68    uint64_t count = 1;
69    if (isa<ArrayType>(mBaseType)) {
70        count = mBaseType->getArrayNumElements();
71    }
72    return iBuilder->getSize(count);
73}
74
75inline Value * StreamSetBuffer::modByBufferBlocks(Value * const offset) const {
76    assert (offset->getType()->isIntegerTy());
77    if (isCapacityGuaranteed(offset, mBufferBlocks)) {
78        return offset;
79    } else if (mBufferBlocks == 1) {
80        return ConstantInt::getNullValue(iBuilder->getSizeTy());
81    } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
82        return iBuilder->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
83    } else {
84        return iBuilder->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
85    }
86}
87
88/**
89 * @brief getRawItemPointer
90 *
91 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
92 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
93 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
94 */
95Value * StreamSetBuffer::getRawItemPointer(Value * self, Value * streamIndex, Value * absolutePosition) const {
96    Value * ptr = getBaseAddress(self);
97    if (!isa<ConstantInt>(streamIndex) || !cast<ConstantInt>(streamIndex)->isZero()) {
98        ptr = iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), streamIndex});
99    }
100    IntegerType * const ty = cast<IntegerType>(mBaseType->getArrayElementType()->getVectorElementType());
101    ptr = iBuilder->CreatePointerCast(ptr, ty->getPointerTo());
102    if (LLVM_UNLIKELY(ty->getBitWidth() < 8)) {
103        const auto bw = ty->getBitWidth();
104        if (LLVM_LIKELY((bw & (bw - 1)) == 0)) { // is power of 2
105            absolutePosition = iBuilder->CreateUDiv(absolutePosition, ConstantInt::get(absolutePosition->getType(), 8 / bw));
106        } else {
107            absolutePosition = iBuilder->CreateMul(absolutePosition, ConstantInt::get(absolutePosition->getType(), bw));
108            absolutePosition = iBuilder->CreateUDiv(absolutePosition, ConstantInt::get(absolutePosition->getType(), 8));
109        }
110    }
111    return iBuilder->CreateGEP(ptr, absolutePosition);
112}
113
114Value * StreamSetBuffer::getLinearlyAccessibleItems(Value * self, Value * fromPosition) const {
115    if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
116        Constant * stride = iBuilder->getSize(iBuilder->getStride());
117        return iBuilder->CreateSub(stride, iBuilder->CreateURem(fromPosition, stride));
118    } else {
119        Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
120        return iBuilder->CreateSub(bufSize, iBuilder->CreateURem(fromPosition, bufSize));
121    }
122}
123
124Value * StreamSetBuffer::getLinearlyAccessibleBlocks(Value * self, Value * fromBlock) const {
125    Constant * bufBlocks = iBuilder->getSize(mBufferBlocks);
126    return iBuilder->CreateSub(bufBlocks, iBuilder->CreateURem(fromBlock, bufBlocks));
127}
128
129void StreamSetBuffer::reserveBytes(Value * self, llvm::Value *requested) const {
130    report_fatal_error("reserve() can only be used with ExtensibleBuffers");
131}
132
133Value * StreamSetBuffer::getBaseAddress(Value * self) const {
134    return self;
135}
136
137void StreamSetBuffer::releaseBuffer(Value * /* self */) const {
138    /* do nothing: memory is stack allocated */
139}
140
141// Single Block Buffer
142
143// For a single block buffer, the block pointer is always the buffer base pointer.
144Value * SingleBlockBuffer::getStreamSetBlockPtr(Value * self, Value *) const {
145    return self;
146}
147
148// External File Buffer
149void ExternalFileBuffer::setStreamSetBuffer(Value * ptr) {
150    mStreamSetBufferPtr = iBuilder->CreatePointerBitCastOrAddrSpaceCast(ptr, getPointerType());
151}
152
153void ExternalFileBuffer::allocateBuffer() {
154    report_fatal_error("External buffers cannot be allocated.");
155}
156
157Value * ExternalFileBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
158    return iBuilder->CreateGEP(self, blockIndex);
159}
160
161Value * ExternalFileBuffer::getLinearlyAccessibleItems(Value * self, Value *) const {
162    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
163}
164
165// Source File Buffer
166Value * SourceFileBuffer::getBufferedSize(Value * self) const {
167    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
168    return iBuilder->CreateLoad(ptr);
169}
170
171void SourceFileBuffer::setBufferedSize(Value * self, llvm::Value * size) const {
172    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
173    iBuilder->CreateStore(size, ptr);
174}
175
176void SourceFileBuffer::setBaseAddress(Value * self, Value * addr) const {
177    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
178    iBuilder->CreateStore(addr, ptr);
179}
180
181Value * SourceFileBuffer::getBaseAddress(Value * const self) const {
182    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
183    return iBuilder->CreateLoad(ptr);
184}
185
186Value * SourceFileBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
187    return iBuilder->CreateGEP(self, blockIndex);
188}
189
190Value * SourceFileBuffer::getLinearlyAccessibleItems(Value * self, Value *) const {
191    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
192}
193
194// ExtensibleBuffer
195Value * ExtensibleBuffer::getLinearlyAccessibleItems(Value * self, Value * fromPosition) const {
196    Value * capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
197    Value * capacity = iBuilder->CreateLoad(capacityPtr);
198    return iBuilder->CreateSub(capacity, fromPosition);
199}
200
201void ExtensibleBuffer::allocateBuffer() {
202    Type * ty = getType();
203    Value * instance = iBuilder->CreateCacheAlignedAlloca(ty);
204    Value * const capacityPtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
205    Constant * initialSize = ConstantExpr::getSizeOf(ty->getStructElementType(1)->getPointerElementType());
206    initialSize = ConstantExpr::getMul(initialSize, iBuilder->getSize(mBufferBlocks));
207    initialSize = ConstantExpr::getIntegerCast(initialSize, iBuilder->getSizeTy(), false);
208    iBuilder->CreateStore(initialSize, capacityPtr);
209    Value * addr = iBuilder->CreateAnonymousMMap(initialSize);
210    Value * const addrPtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
211    addr = iBuilder->CreatePointerCast(addr, addrPtr->getType()->getPointerElementType());
212    iBuilder->CreateStore(addr, addrPtr);
213    Value * const bufferSizePtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
214    iBuilder->CreateStore(ConstantInt::getNullValue(bufferSizePtr->getType()->getPointerElementType()), bufferSizePtr);
215    mStreamSetBufferPtr = instance;
216}
217
218Value * ExtensibleBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
219    return iBuilder->CreateGEP(self, blockIndex);
220}
221
222void ExtensibleBuffer::reserveBytes(Value * const self, llvm::Value * const requiredSize) const {
223    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
224    Value * const currentSize = iBuilder->CreateLoad(capacityPtr);
225    BasicBlock * const entry = iBuilder->GetInsertBlock();
226    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
227    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
228    iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpULT(requiredSize, currentSize), resume, expand);
229    iBuilder->SetInsertPoint(expand);
230    Value * const reservedSize = iBuilder->CreateShl(requiredSize, 1);
231#ifdef __APPLE__
232    Value * newAddr = iBuilder->CreateAlignedMalloc(reservedSize, iBuilder->getCacheAlignment());
233    Value * const baseAddrPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
234    Value * const baseAddr = iBuilder->CreateLoad(baseAddrPtr);
235    iBuilder->CreateMemCpy(newAddr, baseAddr, currentSize, iBuilder->getCacheAlignment());
236    iBuilder->CreateAlignedFree(baseAddr);
237    Value * const remainingSize = iBuilder->CreateSub(reservedSize, currentSize);
238    iBuilder->CreateMemZero(iBuilder->CreateGEP(newAddr, currentSize), remainingSize, iBuilder->getBitBlockWidth() / 8);
239    newAddr = iBuilder->CreatePointerCast(newAddr, baseAddr->getType());
240#else
241    Value * const baseAddrPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
242    Value * const baseAddr = iBuilder->CreateLoad(baseAddrPtr);
243    Value * newAddr = iBuilder->CreateMRemap(baseAddr, currentSize, reservedSize);
244    newAddr = iBuilder->CreatePointerCast(newAddr, baseAddr->getType());
245#endif
246    iBuilder->CreateStore(reservedSize, capacityPtr);
247    iBuilder->CreateStore(newAddr, baseAddrPtr);
248    iBuilder->CreateBr(resume);
249    iBuilder->SetInsertPoint(resume);
250}
251
252Value * ExtensibleBuffer::getBufferedSize(Value * self) const {
253    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
254    return iBuilder->CreateLoad(ptr);
255}
256
257void ExtensibleBuffer::setBufferedSize(Value * self, llvm::Value * size) const {
258    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
259    iBuilder->CreateStore(size, ptr);
260}
261
262Value * ExtensibleBuffer::getBaseAddress(Value * const self) const {
263    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
264}
265
266void ExtensibleBuffer::releaseBuffer(Value * self) const {
267    Value * const sizePtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
268    Value * size = iBuilder->CreateLoad(sizePtr);
269    iBuilder->CreateMUnmap(getBaseAddress(self), size);
270}
271
272// Circular Buffer
273
274Value * CircularBuffer::getStreamSetBlockPtr(Value * const self, Value * const blockIndex) const {
275    return iBuilder->CreateGEP(self, modByBufferBlocks(blockIndex));
276}
277
278// CircularCopybackBuffer Buffer
279
280void CircularCopybackBuffer::allocateBuffer() {
281    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
282}
283
284void CircularCopybackBuffer::createCopyBack(Value * self, Value * overFlowItems) const {
285    Type * size_ty = iBuilder->getSizeTy();
286    Type * i8ptr = iBuilder->getInt8PtrTy();
287    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
288    Function * f = iBuilder->GetInsertBlock()->getParent();
289    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
290    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
291    BasicBlock * copyBackDone = BasicBlock::Create(iBuilder->getContext(), "copyBackDone", f, 0);
292    unsigned numStreams = getType()->getArrayNumElements();
293    auto elemTy = getType()->getArrayElementType();
294    unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
295    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
296    Value * overFlowBlocks = iBuilder->CreateUDiv(overFlowItems, blockSize);
297    Value * partialItems = iBuilder->CreateURem(overFlowItems, blockSize);
298    Value * partialBlockTargetPtr = iBuilder->CreateGEP(self, overFlowBlocks);
299    Value * partialBlockSourcePtr = iBuilder->CreateGEP(overFlowAreaPtr, overFlowBlocks);
300    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(overFlowBlocks, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
301    iBuilder->SetInsertPoint(wholeBlockCopy);
302    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
303    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, size_ty), iBuilder->CreatePtrToInt(self, size_ty));
304    iBuilder->CreateMemMove(iBuilder->CreateBitCast(self, i8ptr), iBuilder->CreateBitCast(overFlowAreaPtr, i8ptr), copyLength, alignment);
305    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyBackDone);
306    iBuilder->SetInsertPoint(partialBlockCopy);
307    Value * copyBits = iBuilder->CreateMul(overFlowItems, iBuilder->getSize(fieldWidth));
308    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
309    for (unsigned strm = 0; strm < numStreams; strm++) {
310        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
311        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
312        iBuilder->CreateMemMove(iBuilder->CreateBitCast(strmTargetPtr, i8ptr), iBuilder->CreateBitCast(strmSourcePtr, i8ptr), copyBytes, alignment);
313    }
314    iBuilder->CreateBr(copyBackDone);
315    iBuilder->SetInsertPoint(copyBackDone);
316}
317
318Value * CircularCopybackBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
319    return iBuilder->CreateGEP(self, modByBufferBlocks(blockIndex));
320}
321
322// SwizzledCopybackBuffer Buffer
323
324void SwizzledCopybackBuffer::allocateBuffer() {
325    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks + mOverflowBlocks));
326}
327
328void SwizzledCopybackBuffer::createCopyBack(Value * self, Value * overFlowItems) const {
329    Type * size_ty = iBuilder->getSizeTy();
330    Type * i8ptr = iBuilder->getInt8PtrTy();
331    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
332    Function * f = iBuilder->GetInsertBlock()->getParent();
333    BasicBlock * wholeBlockCopy = BasicBlock::Create(iBuilder->getContext(), "wholeBlockCopy", f, 0);
334    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
335    BasicBlock * copyBackDone = BasicBlock::Create(iBuilder->getContext(), "copyBackDone", f, 0);
336    unsigned numStreams = getType()->getArrayNumElements();
337    unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
338    auto elemTy = getType()->getArrayElementType();
339    unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
340    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
341    Value * overFlowBlocks = iBuilder->CreateUDiv(overFlowItems, blockSize);
342    Value * partialItems = iBuilder->CreateURem(overFlowItems, blockSize);
343    Value * partialBlockTargetPtr = iBuilder->CreateGEP(self, overFlowBlocks);
344    Value * partialBlockSourcePtr = iBuilder->CreateGEP(overFlowAreaPtr, overFlowBlocks);
345    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(overFlowBlocks, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
346    iBuilder->SetInsertPoint(wholeBlockCopy);
347    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
348    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, size_ty), iBuilder->CreatePtrToInt(self, size_ty));
349    iBuilder->CreateMemMove(iBuilder->CreateBitCast(self, i8ptr), iBuilder->CreateBitCast(overFlowAreaPtr, i8ptr), copyLength, alignment);
350    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyBackDone);
351    iBuilder->SetInsertPoint(partialBlockCopy);
352    Value * copyBits = iBuilder->CreateMul(overFlowItems, iBuilder->getSize(fieldWidth * swizzleFactor));
353    Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
354    for (unsigned strm = 0; strm < numStreams; strm += swizzleFactor) {
355        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
356        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
357        iBuilder->CreateMemMove(iBuilder->CreateBitCast(strmTargetPtr, i8ptr), iBuilder->CreateBitCast(strmSourcePtr, i8ptr), copyBytes, alignment);
358    }
359    iBuilder->CreateBr(copyBackDone);
360    iBuilder->SetInsertPoint(copyBackDone);
361}
362
363Value * SwizzledCopybackBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
364    return iBuilder->CreateGEP(self, modByBufferBlocks(blockIndex));
365}
366
367SwizzledCopybackBuffer::SwizzledCopybackBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
368: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
369    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
370    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
371    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
372
373}
374
375// Expandable Buffer
376
377void ExpandableBuffer::allocateBuffer() {
378    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType());
379    Value * const capacityPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
380    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
381    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
382    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
383    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
384    Value * const ptr = iBuilder->CreateAlignedMalloc(size, iBuilder->getCacheAlignment());
385    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
386    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
387    iBuilder->CreateStore(iBuilder->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
388}
389
390std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
391
392    // ENTRY
393    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
394    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
395    Value * const streamSetPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
396    Value * const streamSet = iBuilder->CreateLoad(streamSetPtr);
397    blockIndex = modByBufferBlocks(blockIndex);
398
399    assert (streamIndex->getType() == capacity->getType());
400    Value * const cond = iBuilder->CreateICmpULT(streamIndex, capacity);
401
402    // Are we guaranteed that we can access this stream?
403    if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
404        iBuilder->CreateAssert(cond, "ExpandableBuffer: out-of-bounds stream access");
405        Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, capacity), streamIndex);
406        return {streamSet, offset};
407    }
408
409    BasicBlock * const entry = iBuilder->GetInsertBlock();
410    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
411    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
412
413    iBuilder->CreateLikelyCondBr(cond, resume, expand);
414
415    // EXPAND
416    iBuilder->SetInsertPoint(expand);
417
418    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
419    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
420
421    Value * newCapacity = iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1));
422    newCapacity = iBuilder->CreateCeilLog2(newCapacity);
423    newCapacity = iBuilder->CreateShl(iBuilder->getSize(1), newCapacity, "newCapacity");
424
425    std::string tmp;
426    raw_string_ostream out(tmp);
427    out << "__expand";
428    elementType->print(out);
429    std::string name = out.str();
430
431    Module * const m = iBuilder->getModule();
432    Function * expandFunction = m->getFunction(name);
433
434    if (expandFunction == nullptr) {
435
436        const auto ip = iBuilder->saveIP();
437
438        FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), iBuilder->getSizeTy(), iBuilder->getSizeTy()}, false);
439        expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
440
441        auto args = expandFunction->arg_begin();
442        Value * streamSet = &*args++;
443        Value * capacity = &*args++;
444        Value * newCapacity = &*args;
445
446        BasicBlock * entry = BasicBlock::Create(iBuilder->getContext(), "entry", expandFunction);
447        iBuilder->SetInsertPoint(entry);
448
449        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
450        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), iBuilder->getCacheAlignment()), elementType->getPointerTo());
451        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
452
453        const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
454        for (unsigned i = 0; i < mBufferBlocks; ++i) {
455            ConstantInt * const offset = iBuilder->getSize(i);
456            Value * srcOffset = iBuilder->CreateMul(capacity, offset);
457            Value * srcPtr = iBuilder->CreateGEP(streamSet, srcOffset);
458            Value * destOffset = iBuilder->CreateMul(newCapacity, offset);
459            Value * destPtr = iBuilder->CreateGEP(newStreamSet, destOffset);
460            iBuilder->CreateMemCpy(destPtr, srcPtr, iBuilder->CreateMul(capacity, vectorWidth), alignment);
461            Value * destZeroOffset = iBuilder->CreateAdd(destOffset, capacity);
462            Value * destZeroPtr = iBuilder->CreateGEP(newStreamSet, destZeroOffset);
463            iBuilder->CreateMemZero(destZeroPtr, diffCapacity, alignment);
464        }
465
466        iBuilder->CreateAlignedFree(streamSet);
467
468        iBuilder->CreateRet(newStreamSet);
469
470        iBuilder->restoreIP(ip);
471    }
472
473    Value * newStreamSet = iBuilder->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
474    iBuilder->CreateStore(newStreamSet, streamSetPtr);
475    iBuilder->CreateStore(newCapacity, capacityPtr);
476
477    iBuilder->CreateBr(resume);
478
479    // RESUME
480    iBuilder->SetInsertPoint(resume);
481
482    PHINode * phiStreamSet = iBuilder->CreatePHI(streamSet->getType(), 2);
483    phiStreamSet->addIncoming(streamSet, entry);
484    phiStreamSet->addIncoming(newStreamSet, expand);
485
486    PHINode * phiCapacity = iBuilder->CreatePHI(capacity->getType(), 2);
487    phiCapacity->addIncoming(capacity, entry);
488    phiCapacity->addIncoming(newCapacity, expand);
489
490    Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, phiCapacity), streamIndex);
491
492    return {phiStreamSet, offset};
493}
494
495Value * ExpandableBuffer::getStreamBlockPtr(Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
496    Value * ptr, * offset;
497    std::tie(ptr, offset) = getInternalStreamBuffer(self, streamIndex, blockIndex, readOnly);
498    return iBuilder->CreateGEP(ptr, offset);
499}
500
501Value * ExpandableBuffer::getStreamPackPtr(Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
502    Value * ptr, * offset;
503    std::tie(ptr, offset) = getInternalStreamBuffer(self, streamIndex, blockIndex, readOnly);
504    return iBuilder->CreateGEP(ptr, {offset, packIndex});
505}
506
507Value * ExpandableBuffer::getStreamSetCount(Value * self) const {
508    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
509}
510
511Value * ExpandableBuffer::getBaseAddress(Value * self) const {
512    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
513}
514
515void ExpandableBuffer::releaseBuffer(Value * self) const {
516    iBuilder->CreateAlignedFree(getBaseAddress(self));
517}
518
519Value * ExpandableBuffer::getStreamSetBlockPtr(Value *, Value *) const {
520    report_fatal_error("Expandable buffers: getStreamSetBlockPtr is not supported.");
521}
522
523Value * ExpandableBuffer::getLinearlyAccessibleItems(Value * self, Value *) const {
524    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
525}
526
527// Constructors
528SingleBlockBuffer::SingleBlockBuffer(IDISA::IDISA_Builder * b, Type * type)
529: StreamSetBuffer(BufferKind::BlockBuffer, b, type, resolveStreamSetType(b, type), 1, 0) {
530    mUniqueID = "S";
531
532}
533
534ExternalFileBuffer::ExternalFileBuffer(IDISA::IDISA_Builder * b, Type * type, unsigned AddressSpace)
535: StreamSetBuffer(BufferKind::ExternalFileBuffer, b, type, resolveStreamSetType(b, type), 0, AddressSpace) {
536    mUniqueID = "E";
537    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
538}
539
540SourceFileBuffer::SourceFileBuffer(IDISA::IDISA_Builder * b, Type * type, unsigned AddressSpace)
541: StreamSetBuffer(BufferKind::SourceFileBuffer, b, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(), b->getSizeTy(), nullptr), 0, AddressSpace) {
542
543}
544
545ExtensibleBuffer::ExtensibleBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
546: StreamSetBuffer(BufferKind::ExtensibleBuffer, b, type, StructType::get(b->getSizeTy(), resolveStreamSetType(b, type)->getPointerTo(), b->getSizeTy(), nullptr), bufferBlocks, AddressSpace) {
547    mUniqueID = "XT" + std::to_string(bufferBlocks);
548    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
549}
550
551CircularBuffer::CircularBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
552: StreamSetBuffer(BufferKind::CircularBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
553    mUniqueID = "C" + std::to_string(bufferBlocks);
554    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
555
556}
557
558CircularCopybackBuffer::CircularCopybackBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
559: StreamSetBuffer(BufferKind::CircularCopybackBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks) {
560    mUniqueID = "CC" + std::to_string(bufferBlocks);
561    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
562    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
563}
564
565ExpandableBuffer::ExpandableBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
566: StreamSetBuffer(BufferKind::ExpandableBuffer, b, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
567, mInitialCapacity(type->getArrayNumElements()) {
568    mUniqueID = "XP" + std::to_string(bufferBlocks);
569    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
570}
571
572inline StreamSetBuffer::StreamSetBuffer(BufferKind k, IDISA::IDISA_Builder * b, Type * baseType, Type * resolvedType, unsigned blocks, unsigned AddressSpace)
573: mBufferKind(k)
574, iBuilder(b)
575, mType(resolvedType)
576, mBufferBlocks(blocks)
577, mAddressSpace(AddressSpace)
578, mStreamSetBufferPtr(nullptr)
579, mBaseType(baseType) {
580
581}
582
583StreamSetBuffer::~StreamSetBuffer() { }
584
585// Helper routines
586ArrayType * resolveStreamSetType(IDISA_Builder * const b, Type * type) {
587    unsigned numElements = 1;
588    if (LLVM_LIKELY(type->isArrayTy())) {
589        numElements = type->getArrayNumElements();
590        type = type->getArrayElementType();
591    }
592    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
593        type = type->getVectorElementType();
594        if (LLVM_LIKELY(type->isIntegerTy())) {
595            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
596            type = b->getBitBlockType();
597            if (fieldWidth != 1) {
598                type = ArrayType::get(type, fieldWidth);
599            }
600            return ArrayType::get(type, numElements);
601        }
602    }
603    std::string tmp;
604    raw_string_ostream out(tmp);
605    type->print(out);
606    out << " is an unvalid stream set buffer type.";
607    report_fatal_error(out.str());
608}
609
610StructType * resolveExpandableStreamSetType(IDISA_Builder * const b, Type * type) {
611    if (LLVM_LIKELY(type->isArrayTy())) {
612        type = type->getArrayElementType();
613    }
614    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
615        type = type->getVectorElementType();
616        if (LLVM_LIKELY(type->isIntegerTy())) {
617            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
618            type = b->getBitBlockType();
619            if (fieldWidth != 1) {
620                type = ArrayType::get(type, fieldWidth);
621            }
622            return StructType::get(b->getSizeTy(), type->getPointerTo(), nullptr);
623        }
624    }
625    std::string tmp;
626    raw_string_ostream out(tmp);
627    type->print(out);
628    out << " is an unvalid stream set buffer type.";
629    report_fatal_error(out.str());
630}
Note: See TracBrowser for help on using the repository browser.