source: icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

Last change on this file was 6261, checked in by nmedfort, 7 months ago

Work on OptimizationBranch?; revisited pipeline termination

File size: 32.5 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "streamset.h"
7#include <llvm/IR/Module.h>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/kernel.h>
10#include <kernels/kernel_builder.h>
11#include <toolchain/toolchain.h>
12#include <llvm/Support/Debug.h>
13#include <llvm/Support/Format.h>
14#include <array>
15
16namespace llvm { class Constant; }
17namespace llvm { class Function; }
18
19using namespace llvm;
20using IDISA::IDISA_Builder;
21
22inline static bool is_power_2(const uint64_t n) {
23    return ((n & (n - 1)) == 0) && n;
24}
25
26namespace kernel {
27
28inline Value * StreamSetBuffer::getHandle(IDISA_Builder * const /* b */) const {
29    return mHandle;
30}
31
32void StreamSetBuffer::setHandle(const std::unique_ptr<kernel::KernelBuilder> & b, Value * const handle) {
33    assert ("handle cannot be null!" && handle);
34    assert ("handle is not of the correct type" && handle->getType() == getHandlePointerType(b));
35    #ifndef NDEBUG
36    const Function * const handleFunction = isa<Argument>(handle) ? cast<Argument>(handle)->getParent() : cast<Instruction>(handle)->getParent()->getParent();
37    const Function * const builderFunction = b->GetInsertBlock()->getParent();
38    assert ("handle is not from the current function." && (handleFunction == builderFunction));
39    #endif
40    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
41        b->CreateAssert(handle, "handle cannot be null!");
42    }
43    mHandle = handle;
44}
45
46inline void StreamSetBuffer::assertValidStreamIndex(IDISA_Builder * const b, Value * streamIndex) const {
47    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
48        Value * const count = getStreamSetCount(b);
49        Value * const withinSet = b->CreateICmpULT(b->CreateZExtOrTrunc(streamIndex, count->getType()), count);
50        b->CreateAssert(withinSet, "out-of-bounds stream access");
51    }
52}
53
54Value * StreamSetBuffer::getStreamBlockPtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex) const {
55    assertValidStreamIndex(b, streamIndex);
56    return b->CreateGEP(getBaseAddress(b), {blockIndex, streamIndex});
57}
58
59Value * StreamSetBuffer::getStreamPackPtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex, Value * const packIndex) const {
60    assertValidStreamIndex(b, streamIndex);
61    return b->CreateGEP(getBaseAddress(b), {blockIndex, streamIndex, packIndex});
62}
63
64Value * StreamSetBuffer::getStreamSetCount(IDISA_Builder * const b) const {
65    size_t count = 1;
66    if (isa<ArrayType>(getBaseType())) {
67        count = getBaseType()->getArrayNumElements();
68    }
69    return b->getSize(count);
70}
71
72/**
73 * @brief getRawItemPointer
74 *
75 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
76 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
77 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
78 */
79Value * StreamSetBuffer::getRawItemPointer(IDISA_Builder * const b, Value * absolutePosition) const {
80    Value * ptr = getBaseAddress(b);
81    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
82    const auto bw = elemTy->getPrimitiveSizeInBits();
83    assert (is_power_2(bw));
84    if (bw < 8) {
85        Constant * const fw = ConstantInt::get(absolutePosition->getType(), 8 / bw);
86        if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
87            b->CreateAssertZero(b->CreateURem(absolutePosition, fw), "absolutePosition must be byte aligned");
88        }
89        absolutePosition = b->CreateUDiv(absolutePosition, fw);
90        ptr = b->CreatePointerCast(ptr, b->getInt8PtrTy());
91    } else {
92        ptr = b->CreatePointerCast(ptr, elemTy->getPointerTo());
93    }
94    return b->CreateGEP(ptr, absolutePosition);
95}
96
97Value * StreamSetBuffer::addOverflow(const std::unique_ptr<kernel::KernelBuilder> & b, Value * const bufferCapacity, Value * const overflowItems, Value * const consumedOffset) const {
98    if (overflowItems) {
99        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
100            Value * const overflowCapacity = b->getSize(getOverflowCapacity(b));
101            Value * const valid = b->CreateICmpULE(overflowItems, overflowCapacity);
102            b->CreateAssert(valid, "overflow items exceeds overflow capacity");
103        }
104        // limit the overflow so that we do not overwrite our unconsumed data during a copyback
105        Value * const effectiveOverflow = b->CreateUMin(consumedOffset, overflowItems);
106        return b->CreateAdd(bufferCapacity, effectiveOverflow);
107    } else { // no overflow
108        return bufferCapacity;
109    }
110}
111
112// External File Buffer
113Type * ExternalBuffer::getHandleType(const std::unique_ptr<kernel::KernelBuilder> & b) const {
114    PointerType * const ptrTy = getPointerType();
115    IntegerType * const sizeTy = b->getSizeTy();
116    return StructType::get(b->getContext(), {ptrTy, sizeTy});
117}
118
119void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
120    report_fatal_error("allocateBuffer is not supported by external buffers");
121}
122
123void ExternalBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & /* b */) const {
124    // this buffer is not responsible for free-ing th data associated with it
125}
126
127void ExternalBuffer::setBaseAddress(IDISA_Builder * const b, Value * const addr) const {
128    assert (mHandle && "has not been set prior to calling setBaseAddress");
129    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
130        b->CreateAssert(addr, "base address cannot be null");
131    }
132    Value * const p = b->CreateGEP(getHandle(b), {b->getInt32(0), b->getInt32(BaseAddress)});
133    b->CreateStore(b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType()), p);
134}
135
136Value * ExternalBuffer::getBaseAddress(IDISA_Builder * const b) const {
137    assert (mHandle && "has not been set prior to calling getBaseAddress");
138    Value * const p = b->CreateGEP(getHandle(b), {b->getInt32(0), b->getInt32(BaseAddress)});
139    return b->CreateLoad(p);
140}
141
142size_t ExternalBuffer::getOverflowCapacity(const std::unique_ptr<kernel::KernelBuilder> & b) const {
143    return 0;
144}
145
146Value * ExternalBuffer::getOverflowAddress(IDISA_Builder * const /* b */) const {
147    report_fatal_error("getOverflowAddress is not supported by external buffers");
148}
149
150void ExternalBuffer::setCapacity(IDISA_Builder * const b, Value * const capacity) const {
151    assert (mHandle && "has not been set prior to calling setCapacity");
152    Value *  const p = b->CreateGEP(getHandle(b), {b->getInt32(0), b->getInt32(Capacity)});
153    b->CreateStore(b->CreateZExt(capacity, b->getSizeTy()), p);
154}
155
156Value * ExternalBuffer::getCapacity(IDISA_Builder * const b) const {
157    assert (mHandle && "has not been set prior to calling getCapacity");
158    Value * const p = b->CreateGEP(getHandle(b), {b->getInt32(0), b->getInt32(Capacity)});
159    return b->CreateLoad(p);
160}
161
162Value * ExternalBuffer::getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> & b, Value * const fromPosition, Value * const totalItems, Value * /* overflowItems */) const {
163    return b->CreateSub(totalItems, fromPosition);
164}
165
166Value * ExternalBuffer::getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, Value * const fromPosition, Value * const /* consumed */, Value * /* overflowItems */) const {
167    assert (fromPosition);
168    Value * const capacity = getCapacity(b.get());
169    assert (fromPosition->getType() == capacity->getType());
170    return b->CreateSub(capacity, fromPosition);
171}
172
173inline void ExternalBuffer::assertValidBlockIndex(IDISA_Builder * const b, Value * blockIndex) const {
174    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
175        Value * const blockCount = b->CreateCeilUDiv(getCapacity(b), b->getSize(b->getBitBlockWidth()));
176        blockIndex = b->CreateZExtOrTrunc(blockIndex, blockCount->getType());
177        Value * const withinCapacity = b->CreateICmpULT(blockIndex, blockCount);
178        b->CreateAssert(withinCapacity, "blockIndex exceeds buffer capacity");
179    }
180}
181
182Value * ExternalBuffer::getStreamBlockPtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex) const {
183    //assertValidBlockIndex(b, blockIndex);
184    return StreamSetBuffer::getStreamBlockPtr(b, streamIndex, blockIndex);
185}
186
187Value * ExternalBuffer::getStreamPackPtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex, Value * const packIndex) const {
188    //assertValidBlockIndex(b, blockIndex);
189    return StreamSetBuffer::getStreamPackPtr(b, streamIndex, blockIndex, packIndex);
190}
191
192Value * ExternalBuffer::getStreamLogicalBasePtr(IDISA_Builder * const b, Value * const streamIndex, Value * /* blockIndex */) const {
193    return StreamSetBuffer::getStreamBlockPtr(b, streamIndex, b->getSize(0));
194}
195
196// Static Buffer
197Type * StaticBuffer::getHandleType(const std::unique_ptr<kernel::KernelBuilder> & /* b */) const {
198    return getPointerType();
199}
200
201void StaticBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
202    assert (mHandle && "has not been set prior to calling allocateBuffer");
203    Value * const buffer = b->CreateCacheAlignedMalloc(getType(), b->getSize(mCapacity + mOverflow), mAddressSpace);
204    b->CreateStore(buffer, mHandle);
205}
206
207LLVM_READNONE inline ConstantPointerNull * nullPointerFor(Value * ptr) {
208    return ConstantPointerNull::get(cast<PointerType>(ptr->getType()));
209}
210
211void StaticBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
212    Value * buffer = b->CreateLoad(mHandle);
213    b->CreateFree(buffer);
214    b->CreateStore(nullPointerFor(buffer), mHandle);
215}
216
217inline bool isCapacityGuaranteed(const Value * const index, const size_t capacity) {
218    return isa<ConstantInt>(index) ? cast<ConstantInt>(index)->getLimitedValue() < capacity : false;
219}
220
221Value * StaticBuffer::modByCapacity(IDISA_Builder * const b, Value * const offset) const {
222    assert (offset->getType()->isIntegerTy());
223    if (LLVM_UNLIKELY(isCapacityGuaranteed(offset, mCapacity))) {
224        return offset;
225    } else if (LLVM_UNLIKELY(mCapacity == 1)) {
226        return ConstantInt::getNullValue(offset->getType());
227    } else if (LLVM_LIKELY(is_power_2(mCapacity))) {
228        return b->CreateAnd(offset, ConstantInt::get(offset->getType(), mCapacity - 1));
229    } else {
230        return b->CreateURem(offset, ConstantInt::get(offset->getType(), mCapacity));
231    }
232}
233
234Value * StaticBuffer::getCapacity(IDISA_Builder * const b) const {
235    return b->getSize(mCapacity * b->getBitBlockWidth());
236}
237
238void StaticBuffer::setCapacity(IDISA_Builder * const /* b */, Value * /* c */) const {
239    report_fatal_error("setCapacity is not supported by static buffers");
240}
241
242Value * StaticBuffer::getBaseAddress(IDISA_Builder * const b) const {
243    return b->CreateLoad(getHandle(b));
244}
245
246void StaticBuffer::setBaseAddress(IDISA_Builder * const /* b */, Value * /* addr */) const {
247    report_fatal_error("setBaseAddress is not supported by static buffers");
248}
249
250size_t StaticBuffer::getOverflowCapacity(const std::unique_ptr<kernel::KernelBuilder> & b) const {
251    return mOverflow * b->getBitBlockWidth();
252}
253
254Value * StaticBuffer::getOverflowAddress(IDISA_Builder * const b) const {
255    return b->CreateGEP(getBaseAddress(b), b->getSize(mCapacity));
256}
257
258Value * StaticBuffer::getStreamBlockPtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex) const {
259    return StreamSetBuffer::getStreamBlockPtr(b, streamIndex, modByCapacity(b, blockIndex));
260}
261
262Value * StaticBuffer::getStreamPackPtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex, Value * const packIndex) const {
263    return StreamSetBuffer::getStreamPackPtr(b, streamIndex, modByCapacity(b, blockIndex), packIndex);
264}
265
266Value * StaticBuffer::getStreamLogicalBasePtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex) const {
267    Value * const baseBlockIndex = b->CreateSub(modByCapacity(b, blockIndex), blockIndex);
268    return StreamSetBuffer::getStreamBlockPtr(b, streamIndex, baseBlockIndex);
269}
270
271Value * StaticBuffer::getRawItemPointer(IDISA_Builder * const b, Value * const absolutePosition) const {
272    return StreamSetBuffer::getRawItemPointer(b, b->CreateURem(absolutePosition, getCapacity(b)));
273}
274
275Value * StaticBuffer::getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> & b, Value * const fromPosition, Value * const totalItems, Value * overflowItems) const {
276    Value * const capacity = getCapacity(b.get());
277    Value * const availableItems = b->CreateSub(totalItems, fromPosition);
278    Value * const fromOffset = b->CreateURem(fromPosition, capacity);
279    Value * const capacityWithOverflow = addOverflow(b, capacity, overflowItems);
280    Value * const linearSpace = b->CreateSub(capacityWithOverflow, fromOffset);
281    return b->CreateUMin(availableItems, linearSpace);
282}
283
284Value * StaticBuffer::getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, Value * const fromPosition, Value * const consumedItems, Value * overflowItems) const {
285    Value * const capacity = getCapacity(b.get());
286    Value * const unconsumedItems = b->CreateSub(fromPosition, consumedItems);
287    Value * const full = b->CreateICmpUGE(unconsumedItems, capacity);
288    Value * const fromOffset = b->CreateURem(fromPosition, capacity);
289    Value * const consumedOffset = b->CreateURem(consumedItems, capacity);
290    Value * const toEnd = b->CreateICmpULE(consumedOffset, fromOffset);
291    Value * const capacityWithOverflow = addOverflow(b, capacity, overflowItems, consumedOffset);
292    Value * const limit = b->CreateSelect(toEnd, capacityWithOverflow, consumedOffset);
293    Value * const remaining = b->CreateSub(limit, fromOffset);
294    return b->CreateSelect(full, b->getSize(0), remaining);
295}
296
297// Dynamic Buffer
298Type * DynamicBuffer::getHandleType(const std::unique_ptr<kernel::KernelBuilder> & b) const {
299    PointerType * typePtr = getPointerType();
300    IntegerType * sizeTy = b->getSizeTy();
301    return StructType::get(b->getContext(), {typePtr, typePtr, sizeTy});
302}
303
304void DynamicBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
305    assert (mHandle && "has not been set prior to calling allocateBuffer");
306    Constant * const capacity = b->getSize(mInitialCapacity * b->getBitBlockWidth());
307    // note: when adding extensible stream sets, make sure to set the initial count here.
308    Value * const bufferSize = b->CreateRoundUp(getAllocationSize(b.get(), capacity, mOverflow), b->getSize(b->getCacheAlignment()));
309    Value * const baseAddressField = b->CreateGEP(mHandle, {b->getInt32(0), b->getInt32(BaseAddress)});
310    Type * const baseAddressPtrTy = baseAddressField->getType()->getPointerElementType();
311    Value * const baseAddress = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufferSize), baseAddressPtrTy);
312    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
313        b->CallPrintInt("allocated: ", baseAddress);
314        b->CallPrintInt("allocated capacity: ", bufferSize);
315    }
316    b->CreateStore(baseAddress, baseAddressField);
317    Value * const priorAddressField = b->CreateGEP(mHandle, {b->getInt32(0), b->getInt32(PriorBaseAddress)});
318    b->CreateStore(ConstantPointerNull::getNullValue(baseAddressPtrTy), priorAddressField);
319    Value * const capacityField = b->CreateGEP(mHandle, {b->getInt32(0), b->getInt32(Capacity)});
320    b->CreateStore(b->getSize(mInitialCapacity), capacityField);
321}
322
323void DynamicBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
324    /* Free the dynamically allocated buffer(s). */
325    Value * const handle = getHandle(b.get());
326    Value * priorAddressField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(PriorBaseAddress)});
327    Value * priorAddress = b->CreateLoad(priorAddressField);
328    b->CreateFree(priorAddress);
329    b->CreateStore(nullPointerFor(priorAddress), priorAddressField);
330    Value * baseAddressField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
331    Value * baseAddress = b->CreateLoad(baseAddressField);
332    b->CreateFree(baseAddress);
333    b->CreateStore(nullPointerFor(baseAddress), baseAddressField);
334}
335
336void DynamicBuffer::setBaseAddress(IDISA_Builder * const /* b */, Value * /* addr */) const {
337    report_fatal_error("setBaseAddress is not supported by DynamicBuffers");
338}
339
340Value * DynamicBuffer::getBaseAddress(IDISA_Builder * const b) const {
341    Value * const ptr = b->CreateGEP(getHandle(b), {b->getInt32(0), b->getInt32(BaseAddress)});
342    return b->CreateLoad(ptr);
343}
344
345size_t DynamicBuffer::getOverflowCapacity(const std::unique_ptr<kernel::KernelBuilder> & b) const {
346    return mOverflow * b->getBitBlockWidth();
347}
348
349Value * DynamicBuffer::getOverflowAddress(IDISA_Builder * const b) const {
350    Value * const capacityPtr = b->CreateGEP(getHandle(b), {b->getInt32(0), b->getInt32(Capacity)});
351    Value * const capacity = b->CreateLoad(capacityPtr);
352    return b->CreateGEP(getBaseAddress(b), capacity);
353}
354
355Value * DynamicBuffer::modByCapacity(IDISA_Builder * const b, Value * const offset) const {
356    assert (offset->getType()->isIntegerTy());
357    if (isCapacityGuaranteed(offset, mInitialCapacity)) {
358        return offset;
359    } else {
360        Value * const capacityPtr = b->CreateGEP(getHandle(b), {b->getInt32(0), b->getInt32(Capacity)});
361        Value * const capacity = b->CreateLoad(capacityPtr);
362        return b->CreateURem(b->CreateZExtOrTrunc(offset, capacity->getType()), capacity);
363    }
364}
365
366Value * DynamicBuffer::getStreamBlockPtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex) const {
367    return StreamSetBuffer::getStreamBlockPtr(b, streamIndex, modByCapacity(b, blockIndex));
368}
369
370Value * DynamicBuffer::getStreamPackPtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex, Value * const packIndex) const {
371    return StreamSetBuffer::getStreamPackPtr(b, streamIndex, modByCapacity(b, blockIndex), packIndex);
372}
373
374Value * DynamicBuffer::getStreamLogicalBasePtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex) const {
375    Value * const baseBlockIndex = b->CreateSub(modByCapacity(b, blockIndex), blockIndex);
376    return StreamSetBuffer::getStreamBlockPtr(b, streamIndex, baseBlockIndex);
377}
378
379Value * DynamicBuffer::getRawItemPointer(IDISA_Builder * const b, Value * absolutePosition) const {
380    return StreamSetBuffer::getRawItemPointer(b, b->CreateURem(absolutePosition, getCapacity(b)));
381}
382
383Value * DynamicBuffer::getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> &b, Value * const fromPosition, Value * const totalItems, Value * overflowItems) const {
384    Value * const capacity = getCapacity(b.get());
385    Value * const availableItems = b->CreateSub(totalItems, fromPosition);
386    Value * const fromOffset = b->CreateURem(fromPosition, capacity);
387    Value * const capacityWithOverflow = addOverflow(b, capacity, overflowItems);
388    Value * const linearSpace = b->CreateSub(capacityWithOverflow, fromOffset);
389    return b->CreateUMin(availableItems, linearSpace);
390}
391
392Value * DynamicBuffer::getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, Value * const fromPosition, Value * const consumedItems, Value * overflowItems) const {
393    Value * const capacity = getCapacity(b.get());
394    Value * const unconsumedItems = b->CreateSub(fromPosition, consumedItems);
395    Value * const full = b->CreateICmpUGE(unconsumedItems, capacity);
396    Value * const fromOffset = b->CreateURem(fromPosition, capacity);
397    Value * const consumedOffset = b->CreateURem(consumedItems, capacity);
398    Value * const toEnd = b->CreateICmpULE(consumedOffset, fromOffset);
399    Value * const capacityWithOverflow = addOverflow(b, capacity, overflowItems, consumedOffset);
400    Value * const limit = b->CreateSelect(toEnd, capacityWithOverflow, consumedOffset);
401    Value * const remaining = b->CreateSub(limit, fromOffset);
402    return b->CreateSelect(full, b->getSize(0), remaining);
403}
404
405Value * DynamicBuffer::getCapacity(IDISA_Builder * const b) const {
406    Value * ptr = b->CreateGEP(getHandle(b), {b->getInt32(0), b->getInt32(Capacity)});
407    return b->CreateMul(b->CreateLoad(ptr), b->getSize(b->getBitBlockWidth()));
408}
409
410void DynamicBuffer::setCapacity(IDISA_Builder * const b, Value * required) const {
411
412    std::vector<Value *> indices(2);
413    indices[0] = b->getInt32(0);
414    indices[1] = b->getInt32(Capacity);
415
416    Constant * const LOG_2_BIT_BLOCK_WIDTH = b->getSize(std::log2(b->getBitBlockWidth()));
417    Value * const handle = getHandle(b);
418    Value * const capacityField = b->CreateGEP(handle, indices);
419    Value * const capacity = b->CreateShl(b->CreateLoad(capacityField), LOG_2_BIT_BLOCK_WIDTH);
420    Value * const newCapacity = b->CreateRoundUp(required, b->CreateShl(capacity, 1));
421    Value * const newBufferSize = b->CreateRoundUp(getAllocationSize(b, newCapacity, mOverflow), b->getSize(b->getCacheAlignment()));
422    Value * const newBaseAddress = b->CreateCacheAlignedMalloc(newBufferSize);
423    indices[1] = b->getInt32(BaseAddress);
424    Value * const baseAddressField = b->CreateGEP(handle, indices);
425    Value * const currentBaseAddress = b->CreateLoad(baseAddressField);
426    indices[1] = b->getInt32(PriorBaseAddress);
427    Value * const priorBaseAddressField = b->CreateGEP(handle, indices);
428    Value * const priorBaseAddress = b->CreateLoad(priorBaseAddressField);
429
430    // Copy the data twice to handle the potential of a dynamic circular buffer. E.g., suppose p is the processed
431    // item count of some kernel. Conceptually, all As after p will be processed before any B, despite the fact
432    // that Bs are placed before the As in the buffer. When we double the size of the buffer, we double the modulus
433    // of the circular buffer. By copying the data to both halves,
434
435    //                          p
436    // Current Buffer   |BBBBBBB|AAAAAAAAAAAAAAAAA|
437
438    //                          p
439    // New Buffer       |BBBBBBB|AAAAAAAAAAAAAAAAA|BBBBBBB|AAAAAAAAAAAAAAAAA|
440
441    // TODO: what if this has to be more than doubled? original method just repeated the doubling process.
442
443    Value * const bufferSize = getAllocationSize(b, capacity, 0);
444    b->CreateMemCpy(newBaseAddress, currentBaseAddress, bufferSize, b->getCacheAlignment());
445    Value * const expandedBaseAddress = b->CreateGEP(newBaseAddress, bufferSize);
446    b->CreateMemCpy(expandedBaseAddress, currentBaseAddress, bufferSize, b->getCacheAlignment());
447
448    b->CreateStore(b->CreatePointerCast(newBaseAddress, currentBaseAddress->getType()), baseAddressField);
449    b->CreateStore(b->CreateLShr(newCapacity, LOG_2_BIT_BLOCK_WIDTH), capacityField);
450    b->CreateStore(currentBaseAddress, priorBaseAddressField);
451    b->CreateFree(priorBaseAddress);
452
453}
454
455#if 0
456
457/**
458 * @brief expandBuffer
459 *
460 * Expand the buffer, ensuring that we have at least the required space after the current produced offset and that
461 * the total size of the buffer is at least 2x the current capacity.
462 */
463void DynamicBuffer::expandBuffer(const std::unique_ptr<KernelBuilder> & b, Value * const consumed, Value * const produced, Value * const required) const {
464
465    ConstantInt * const ZERO = b->getSize(0);
466    ConstantInt * const LOG_2_BIT_BLOCK_WIDTH = b->getSize(std::log2(b->getBitBlockWidth()));
467
468    Value * const handle = getHandle(b.get());
469    Value * const capacityField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Capacity)});
470    Value * const blockCapacity = b->CreateLoad(capacityField);
471    Value * const currentCapacity = b->CreateShl(blockCapacity, LOG_2_BIT_BLOCK_WIDTH);
472
473    //                             c       p
474    // Current Buffer |CCCCCCCCCCCC|PPPPPPP|...|
475
476    // New Buffer       |..........|PPPPPPP|RRRRRRRRRR|...................|
477
478    Value * const stepSize = b->CreateRoundUp(required, b->getSize(b->getBitBlockWidth()));
479    Value * const unconsumed = b->CreateSub(produced, consumed);
480    Value * const expansion = b->CreateAdd(unconsumed, stepSize);
481    Value * const baseNewCapacity = b->CreateRoundUp(expansion, currentCapacity);
482    BasicBlock * const entryBlock = b->GetInsertBlock();
483    BasicBlock * const calculate = b->CreateBasicBlock("capacityCalculation");
484    BasicBlock * const expand = b->CreateBasicBlock("capacityExpansion");
485    b->CreateBr(calculate);
486
487    // ENSURE: (consumed % newCapacity) < (produced % newCapacity) && (produced % newCapacity) + required < newCapacity
488
489    b->SetInsertPoint(calculate);
490    PHINode * const newCapacity = b->CreatePHI(baseNewCapacity->getType(), 2);
491    newCapacity->addIncoming(baseNewCapacity, entryBlock);
492    Value * const consumedOffset = b->CreateURem(consumed, newCapacity);
493    Value * const producedOffset = b->CreateURem(produced, newCapacity);
494    Value * const dataIsArrangedCorrectly = b->CreateICmpULE(consumedOffset, producedOffset);
495    Value * const hasEnoughSpace = b->CreateICmpULE(b->CreateAdd(producedOffset, stepSize), newCapacity);
496    Value * const valid = b->CreateOr(dataIsArrangedCorrectly, hasEnoughSpace);
497    Value * const newCapacity2 = b->CreateAdd(newCapacity, stepSize);
498    newCapacity->addIncoming(newCapacity2, calculate);
499    b->CreateLikelyCondBr(valid, expand, calculate);
500
501    b->SetInsertPoint(expand);
502    Value * const baseAddressField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
503    Value * const currentBaseAddress = b->CreateLoad(baseAddressField);
504    Value * const priorBaseAddressField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(PriorBaseAddress)});
505    Value * const priorBaseAddress = b->CreateLoad(priorBaseAddressField);
506
507    Value * const allocationSize = getAllocationSize(b.get(), newCapacity, mOverflow);
508    Value * const newBufferSize = b->CreateRoundUp(allocationSize, b->getSize(b->getCacheAlignment()));
509    Value * const newBaseAddress = b->CreatePointerCast(b->CreateCacheAlignedMalloc(newBufferSize), currentBaseAddress->getType());
510    Value * const sourceConsumedOffset = b->CreateURem(consumed, currentCapacity);
511    Value * const sourceProducedOffset = b->CreateURem(produced, currentCapacity);
512
513    BasicBlock * const copyLinear = b->CreateBasicBlock("copyLinear");
514    BasicBlock * const copyNonLinear = b->CreateBasicBlock("copyNonLinear");
515    BasicBlock * const storeNewBuffer = b->CreateBasicBlock("storeNewBuffer");
516
517    Value * const consumedIndex = b->CreateLShr(consumedOffset, LOG_2_BIT_BLOCK_WIDTH);
518    Value * const consumedAddr = b->CreateGEP(newBaseAddress, {consumedIndex, ZERO});
519
520    Value * const sourceConsumedIndex = b->CreateLShr(sourceConsumedOffset, LOG_2_BIT_BLOCK_WIDTH);
521    Value * const sourceProducedOffsetCeil = b->CreateAdd(sourceProducedOffset, b->getSize(b->getBitBlockWidth() - 1));
522    Value * const sourceProducedIndex = b->CreateLShr(sourceProducedOffsetCeil, LOG_2_BIT_BLOCK_WIDTH);
523
524    DataLayout DL(b->getModule());
525    Type * const intPtrTy = DL.getIntPtrType(newBaseAddress->getType());
526
527    Value * const sourceConsumedAddr = b->CreateGEP(currentBaseAddress, {sourceConsumedIndex, ZERO});
528    Value * const sourceConsumedAddrInt = b->CreatePtrToInt(sourceConsumedAddr, intPtrTy);
529
530    Value * const sourceProducedAddr = b->CreateGEP(currentBaseAddress, {sourceProducedIndex, ZERO});
531    Value * const sourceProducedAddrInt = b->CreatePtrToInt(sourceProducedAddr, intPtrTy);
532
533    Value * const initiallyValid = b->CreateICmpULE(sourceConsumedOffset, sourceProducedOffset);
534    b->CreateLikelyCondBr(initiallyValid, copyLinear, copyNonLinear);
535
536    b->SetInsertPoint(copyLinear); // consumed <= produced
537    Value * const copyLength = b->CreateSub(sourceProducedAddrInt, sourceConsumedAddrInt);
538    b->CreateMemCpy(consumedAddr, sourceConsumedAddr, copyLength, b->getCacheAlignment());
539    b->CreateBr(storeNewBuffer);
540
541    b->SetInsertPoint(copyNonLinear); // consumed > produced
542    Value * const bufferEnd = b->CreateGEP(currentBaseAddress, {blockCapacity, ZERO});
543    Value * const bufferEndInt = b->CreatePtrToInt(bufferEnd, intPtrTy);
544    Value * const copyLength1 = b->CreateSub(bufferEndInt, sourceConsumedAddrInt);
545    b->CreateMemCpy(consumedAddr, sourceConsumedAddr, copyLength1, b->getCacheAlignment());
546    Constant * const elementSize = ConstantExpr::getSizeOf(consumedAddr->getType()->getPointerElementType());
547    Value * const continuationIndex = b->CreateAdd(consumedIndex, b->CreateExactUDiv(copyLength1, elementSize));
548    Value * const continuationAddr = b->CreateGEP(newBaseAddress, {continuationIndex, ZERO});
549    Value * const baseAddressInt = b->CreatePtrToInt(currentBaseAddress, intPtrTy);
550    Value * const copyLength2 = b->CreateSub(sourceProducedAddrInt, baseAddressInt);
551    b->CreateMemCpy(continuationAddr, currentBaseAddress, copyLength2, b->getCacheAlignment());
552    b->CreateBr(storeNewBuffer);
553
554    b->SetInsertPoint(storeNewBuffer);
555    b->CreateStore(newBaseAddress, baseAddressField);
556    b->CreateStore(b->CreateLShr(newCapacity, LOG_2_BIT_BLOCK_WIDTH), capacityField);
557    b->CreateStore(currentBaseAddress, priorBaseAddressField);
558    b->CreateFree(priorBaseAddress);
559
560}
561
562#endif
563
564Value * DynamicBuffer::getAllocationSize(IDISA_Builder * const b, Value * const requiredItemCapacity, const size_t overflow) const {
565    Value * itemCapacity = requiredItemCapacity;
566    if (overflow) {
567        Constant * const overflowSize =  b->getSize(overflow * b->getBitBlockWidth());
568        itemCapacity = b->CreateAdd(requiredItemCapacity, overflowSize);
569    }
570    Value * const numOfStreams = getStreamSetCount(b);
571    Value * bufferSize = b->CreateMul(itemCapacity, numOfStreams);
572    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
573    if (LLVM_LIKELY(fieldWidth < 8)) {
574        bufferSize = b->CreateCeilUDiv(bufferSize, b->getSize(8 / fieldWidth));
575    } else if (LLVM_UNLIKELY(fieldWidth > 8)) {
576        bufferSize = b->CreateMul(bufferSize, b->getSize(fieldWidth / 8));
577    }
578    return bufferSize;
579}
580
581ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * const type,
582                               const unsigned AddressSpace)
583: StreamSetBuffer(BufferKind::ExternalBuffer, b, type, AddressSpace) {
584
585}
586
587StaticBuffer::StaticBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * const type,
588                           const size_t capacity, const size_t overflowSize, const unsigned AddressSpace)
589: StreamSetBuffer(BufferKind::StaticBuffer, b, type, AddressSpace)
590, mCapacity(capacity / b->getBitBlockWidth())
591, mOverflow(overflowSize / b->getBitBlockWidth()) {
592    assert ("static buffer cannot have 0 capacity" && capacity);
593    assert ("static buffer capacity must be a multiple of bitblock width" && (capacity % b->getBitBlockWidth()) == 0);
594    assert ("static buffer overflow must be a multiple of bitblock width" && (overflowSize % b->getBitBlockWidth()) == 0);
595}
596
597DynamicBuffer::DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * const type,
598                             const size_t initialCapacity, const size_t overflowSize, const unsigned AddressSpace)
599: StreamSetBuffer(BufferKind::DynamicBuffer, b, type, AddressSpace)
600, mInitialCapacity(initialCapacity / b->getBitBlockWidth())
601, mOverflow(overflowSize / b->getBitBlockWidth()) {
602    assert ("dynamic buffer cannot have 0 initial capacity" && initialCapacity);
603    assert ("dynamic buffer capacity must be a multiple of bitblock width" && (initialCapacity % b->getBitBlockWidth()) == 0);
604    assert ("dynamic buffer overflow must be a multiple of bitblock width" && (overflowSize % b->getBitBlockWidth()) == 0);
605}
606
607/** ------------------------------------------------------------------------------------------------------------- *
608 * @brief resolveStreamSetType
609 ** ------------------------------------------------------------------------------------------------------------- */
610Type * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * const streamSetType) {
611    unsigned numElements = 1;
612    Type * type = streamSetType;
613    if (LLVM_LIKELY(type->isArrayTy())) {
614        numElements = type->getArrayNumElements();
615        type = type->getArrayElementType();
616    }
617    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
618        type = type->getVectorElementType();
619        if (LLVM_LIKELY(type->isIntegerTy())) {
620            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
621            type = b->getBitBlockType();
622            if (fieldWidth != 1) {
623                type = ArrayType::get(type, fieldWidth);
624            }
625            return ArrayType::get(type, numElements);
626        }
627    }
628    std::string tmp;
629    raw_string_ostream out(tmp);
630    streamSetType->print(out);
631    out << " is an unvalid stream set buffer type.";
632    report_fatal_error(out.str());
633}
634
635StreamSetBuffer::StreamSetBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b,
636                                 Type * const baseType, const unsigned AddressSpace)
637: mBufferKind(k)
638, mHandle(nullptr)
639, mType(resolveStreamSetType(b, baseType))
640, mAddressSpace(AddressSpace)
641, mBaseType(baseType) {
642
643}
644
645StreamSetBuffer::~StreamSetBuffer() { }
646
647}
Note: See TracBrowser for help on using the repository browser.