source: icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp @ 5998

Last change on this file since 5998 was 5998, checked in by nmedfort, 12 months ago

Added temporary buffer functionality to the pipeline for single stream source buffers. Fixed memory leak from UCD::UnicodeBreakRE()

File size: 30.5 KB
Line 
1#include "kernel_builder.h"
2#include <toolchain/toolchain.h>
3#include <kernels/kernel.h>
4#include <kernels/streamset.h>
5#include <llvm/Support/raw_ostream.h>
6#include <llvm/IR/Module.h>
7
8using namespace llvm;
9using namespace parabix;
10
11inline static bool is_power_2(const uint64_t n) {
12    return ((n & (n - 1)) == 0) && n;
13}
14
15namespace kernel {
16
17using Port = Kernel::Port;
18
19Value * KernelBuilder::getScalarFieldPtr(llvm::Value * const instance, Value * const index) {
20    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
21        CreateAssert(instance, "getScalarFieldPtr: instance cannot be null!");
22    }
23    return CreateGEP(instance, {getInt32(0), index});
24}
25
26Value * KernelBuilder::getScalarFieldPtr(llvm::Value * const handle, const std::string & fieldName) {
27    return getScalarFieldPtr(handle, getInt32(mKernel->getScalarIndex(fieldName)));
28}
29
30llvm::Value * KernelBuilder::getScalarFieldPtr(llvm::Value * const index) {
31    return getScalarFieldPtr(mKernel->getInstance(), index);
32}
33
34llvm::Value *KernelBuilder:: getScalarFieldPtr(const std::string & fieldName) {
35    return getScalarFieldPtr(mKernel->getInstance(), fieldName);
36}
37
38Value * KernelBuilder::getScalarField(const std::string & fieldName) {
39    return CreateLoad(getScalarFieldPtr(fieldName), fieldName);
40}
41
42void KernelBuilder::setScalarField(const std::string & fieldName, Value * value) {
43    CreateStore(value, getScalarFieldPtr(fieldName));
44}
45
46Value * KernelBuilder::getStreamHandle(const std::string & name) {
47    Value * const ptr = getScalarField(name + BUFFER_SUFFIX);
48    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
49        CreateAssert(ptr, name + " handle cannot be null!");
50    }
51    return ptr;
52}
53
54LoadInst * KernelBuilder::acquireLogicalSegmentNo() {
55    return CreateAtomicLoadAcquire(getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR));
56}
57
58void KernelBuilder::releaseLogicalSegmentNo(Value * const nextSegNo) {
59    CreateAtomicStoreRelease(nextSegNo, getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR));
60}
61
62Value * KernelBuilder::getCycleCountPtr() {
63    return getScalarFieldPtr(CYCLECOUNT_SCALAR);
64}
65
66Value * KernelBuilder::getNamedItemCount(const std::string & name, const std::string & suffix) {
67    const ProcessingRate & rate = mKernel->getBinding(name).getRate();
68    Value * itemCount = nullptr;
69    if (LLVM_UNLIKELY(rate.isRelative())) {
70        Port port; unsigned index;
71        std::tie(port, index) = mKernel->getStreamPort(rate.getReference());
72        if (port == Port::Input) {
73            itemCount = getProcessedItemCount(rate.getReference());
74        } else {
75            itemCount = getProducedItemCount(rate.getReference());
76        }
77        itemCount = CreateMul2(itemCount, rate.getRate());
78    } else {
79        itemCount = getScalarField(name + suffix);
80    }
81    return itemCount;
82}
83
84void KernelBuilder::setNamedItemCount(const std::string & name, const std::string & suffix, llvm::Value * const value) {
85    const ProcessingRate & rate = mKernel->getBinding(name).getRate();
86    const auto safetyCheck = mKernel->treatUnsafeKernelOperationsAsErrors();
87    if (LLVM_UNLIKELY(rate.isDerived() && safetyCheck)) {
88        report_fatal_error("Cannot set item count: " + name + " is a derived rate stream");
89    }
90    if (codegen::DebugOptionIsSet(codegen::TraceCounts)) {
91        CallPrintIntToStderr(mKernel->getName() + ": " + name + suffix, value);
92    }
93    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts) && safetyCheck)) {
94        Value * const current = getScalarField(name + suffix);
95        CreateAssert(CreateICmpUGE(value, current), name + " " + suffix + " must be monotonically non-decreasing");
96    }
97    setScalarField(name + suffix, value);
98}
99
100
101Value * KernelBuilder::getAvailableItemCount(const std::string & name) {
102    const auto & inputs = mKernel->getStreamInputs();
103    for (unsigned i = 0; i < inputs.size(); ++i) {
104        if (inputs[i].getName() == name) {
105            return mKernel->getAvailableItemCount(i);
106        }
107    }
108    return nullptr;
109}
110
111Value * KernelBuilder::getTerminationSignal() {
112    return CreateICmpNE(getScalarField(TERMINATION_SIGNAL), getSize(0));
113}
114
115void KernelBuilder::setTerminationSignal(llvm::Value * const value) {
116    assert (value->getType() == getInt1Ty());
117    if (codegen::DebugOptionIsSet(codegen::TraceCounts)) {
118        CallPrintIntToStderr(mKernel->getName() + ": setTerminationSignal", value);
119    }
120    setScalarField(TERMINATION_SIGNAL, CreateZExt(value, getSizeTy()));
121}
122
123Value * KernelBuilder::getLinearlyAccessibleItems(const std::string & name, Value * fromPosition, Value * avail, bool reverse) {
124    const StreamSetBuffer * const buf = mKernel->getInputStreamSetBuffer(name);
125    return buf->getLinearlyAccessibleItems(this, getStreamHandle(name), fromPosition, avail, reverse);
126}
127
128Value * KernelBuilder::getLinearlyWritableItems(const std::string & name, Value * fromPosition, bool reverse) {
129    const StreamSetBuffer * const buf = mKernel->getOutputStreamSetBuffer(name);
130    return buf->getLinearlyWritableItems(this, getStreamHandle(name), fromPosition, getConsumedItemCount(name), reverse);
131}
132
133/** ------------------------------------------------------------------------------------------------------------- *
134 * @brief CreatePrepareOverflow
135 ** ------------------------------------------------------------------------------------------------------------- */
136void KernelBuilder::CreatePrepareOverflow(const std::string & name) {
137    const StreamSetBuffer * const buf = mKernel->getOutputStreamSetBuffer(name);
138    assert (buf->supportsCopyBack());
139    Constant * const overflowSize = ConstantExpr::getSizeOf(buf->getType());
140    Value * const handle = getStreamHandle(name);
141    // TODO: handle non constant stream set counts
142    assert (isa<Constant>(buf->getStreamSetCount(this, handle)));
143    Value * const base = buf->getBaseAddress(this, handle);
144    Value * const overflow = buf->getOverflowAddress(this, handle);
145    const auto blockSize = getBitBlockWidth() / 8;
146    CreateMemZero(overflow, overflowSize, blockSize);
147    CreateMemZero(base, overflowSize, blockSize);
148}
149
150/** ------------------------------------------------------------------------------------------------------------- *
151 * @brief getItemWidth
152 ** ------------------------------------------------------------------------------------------------------------- */
153inline unsigned LLVM_READNONE getItemWidth(const Type * ty ) {
154    if (LLVM_LIKELY(isa<ArrayType>(ty))) {
155        ty = ty->getArrayElementType();
156    }
157    return cast<IntegerType>(ty->getVectorElementType())->getBitWidth();
158}
159
160/** ------------------------------------------------------------------------------------------------------------- *
161 * @brief CreateNonLinearCopyFromOverflow
162 ** ------------------------------------------------------------------------------------------------------------- */
163void KernelBuilder::CreateNonLinearCopyFromOverflow(const Binding & output, llvm::Value * const itemsToCopy, Value * overflowOffset) {
164
165    Value * const handle = getStreamHandle(output.getName());
166    Type * const bitBlockPtrTy = getBitBlockType()->getPointerTo();
167    const StreamSetBuffer * const buf = mKernel->getOutputStreamSetBuffer(output.getName());
168    assert (buf->supportsCopyBack());
169    Value * const target = CreatePointerCast(buf->getBaseAddress(this, handle), bitBlockPtrTy);
170    Value * const source = CreatePointerCast(buf->getOverflowAddress(this, handle), bitBlockPtrTy);
171    const auto blockSize = getBitBlockWidth() / 8;
172    Constant * const BLOCK_WIDTH = getSize(getBitBlockWidth());
173    Constant * const ITEM_WIDTH = getSize(getItemWidth(buf->getBaseType()));
174    Value * const streamCount = buf->getStreamSetCount(this, handle);
175
176    // If we have a computed overflow position, the base and overflow regions were not speculatively zeroed out prior
177    // to the kernel writing over them. To handle them, we compute a mask of valid items and exclude any bit not in
178    // them before OR-ing together the streams.
179    if (overflowOffset) {
180
181        overflowOffset = CreateMul(overflowOffset, ITEM_WIDTH);
182        Value * targetMask = bitblock_mask_from(CreateURem(overflowOffset, BLOCK_WIDTH));
183        Value * sourceMask = CreateNot(targetMask);
184        Value * const overflowBlockCount = CreateUDiv(overflowOffset, BLOCK_WIDTH);
185        Value * const blockOffset = CreateMul(overflowBlockCount, streamCount);
186        Value * const fullCopyLength = CreateMul(blockOffset, getSize(blockSize));
187        CreateMemCpy(target, source, fullCopyLength, blockSize);
188
189        BasicBlock * const partialCopyEntry = GetInsertBlock();
190        BasicBlock * const partialCopyLoop = CreateBasicBlock();
191        BasicBlock * const partialCopyExit = CreateBasicBlock();
192
193        Value * const partialBlockCount = CreateAdd(blockOffset, streamCount);
194        CreateBr(partialCopyLoop);
195
196        SetInsertPoint(partialCopyLoop);
197        PHINode * const blockIndex = CreatePHI(getSizeTy(), 2);
198        blockIndex->addIncoming(blockOffset, partialCopyEntry);
199        Value * const sourcePtr = CreateGEP(source, blockIndex);
200        Value * sourceValue = CreateBlockAlignedLoad(sourcePtr);
201        sourceValue = CreateAnd(sourceValue, sourceMask);
202        Value * const targetPtr = CreateGEP(target, blockIndex);
203        Value * targetValue = CreateBlockAlignedLoad(targetPtr);
204        targetValue = CreateAnd(targetValue, targetMask);
205        targetValue = CreateOr(targetValue, sourceValue);
206        CreateBlockAlignedStore(targetValue, targetPtr);
207        Value * const nextBlockIndex = CreateAdd(blockIndex, getSize(1));
208        blockIndex->addIncoming(nextBlockIndex, partialCopyLoop);
209        CreateCondBr(CreateICmpNE(nextBlockIndex, partialBlockCount), partialCopyLoop, partialCopyExit);
210
211        SetInsertPoint(partialCopyExit);
212
213    } else {
214
215        BasicBlock * const mergeCopyEntry = GetInsertBlock();
216        BasicBlock * const mergeCopyLoop = CreateBasicBlock();
217        BasicBlock * const mergeCopyExit = CreateBasicBlock();
218
219        Value * blocksToCopy = CreateCeilUDiv(itemsToCopy, BLOCK_WIDTH);
220        blocksToCopy = CreateMul(blocksToCopy, ITEM_WIDTH);
221        blocksToCopy = CreateMul(blocksToCopy, streamCount);
222
223        CreateBr(mergeCopyLoop);
224
225        SetInsertPoint(mergeCopyLoop);
226        PHINode * const blockIndex = CreatePHI(getSizeTy(), 2);
227        blockIndex->addIncoming(getSize(0), mergeCopyEntry);
228        Value * const sourcePtr = CreateGEP(source, blockIndex);
229        Value * const sourceValue = CreateBlockAlignedLoad(sourcePtr);
230        Value * const targetPtr = CreateGEP(target, blockIndex);
231        Value * targetValue = CreateBlockAlignedLoad(targetPtr);
232        targetValue = CreateOr(targetValue, sourceValue);
233        CreateBlockAlignedStore(targetValue, targetPtr);
234        Value * const nextBlockIndex = CreateAdd(blockIndex, getSize(1));
235        blockIndex->addIncoming(nextBlockIndex, mergeCopyLoop);
236        CreateCondBr(CreateICmpNE(nextBlockIndex, blocksToCopy), mergeCopyLoop, mergeCopyExit);
237
238        SetInsertPoint(mergeCopyExit);
239    }
240
241
242
243}
244
245/** ------------------------------------------------------------------------------------------------------------- *
246 * @brief CreateCopyFromOverflow
247 ** ------------------------------------------------------------------------------------------------------------- */
248void KernelBuilder::CreateCopyFromOverflow(const Binding & output, llvm::Value * const itemsToCopy) {
249
250    Value * const handle = getStreamHandle(output.getName());
251    Type * const bitBlockPtrTy = getBitBlockType()->getPointerTo();
252    const StreamSetBuffer * const buf = mKernel->getOutputStreamSetBuffer(output.getName());
253    assert (buf->supportsCopyBack());
254    Value * const target = CreatePointerCast(buf->getBaseAddress(this, handle), bitBlockPtrTy);
255    Value * const source = CreatePointerCast(buf->getOverflowAddress(this, handle), bitBlockPtrTy);
256    Constant * const BLOCK_WIDTH = getSize(getBitBlockWidth());
257    Constant * const ITEM_WIDTH = getSize(getItemWidth(buf->getBaseType()));
258    Value * const streamCount = buf->getStreamSetCount(this, handle);
259
260    BasicBlock * const mergeCopyEntry = GetInsertBlock();
261    BasicBlock * const mergeCopyLoop = CreateBasicBlock();
262    BasicBlock * const mergeCopyExit = CreateBasicBlock();
263
264    Value * blocksToCopy = CreateCeilUDiv(itemsToCopy, BLOCK_WIDTH);
265    blocksToCopy = CreateMul(blocksToCopy, ITEM_WIDTH);
266    blocksToCopy = CreateMul(blocksToCopy, streamCount);
267
268    CreateCondBr(CreateICmpEQ(blocksToCopy, getSize(0)), mergeCopyExit, mergeCopyLoop);
269
270    SetInsertPoint(mergeCopyLoop);
271    PHINode * const blockIndex = CreatePHI(getSizeTy(), 2);
272    blockIndex->addIncoming(getSize(0), mergeCopyEntry);
273    Value * const sourcePtr = CreateGEP(source, blockIndex);
274    Value * const sourceValue = CreateBlockAlignedLoad(sourcePtr);
275    Value * const targetPtr = CreateGEP(target, blockIndex);
276    CreateBlockAlignedStore(sourceValue, targetPtr);
277    Value * const nextBlockIndex = CreateAdd(blockIndex, getSize(1));
278    blockIndex->addIncoming(nextBlockIndex, mergeCopyLoop);
279    CreateCondBr(CreateICmpNE(nextBlockIndex, blocksToCopy), mergeCopyLoop, mergeCopyExit);
280
281    SetInsertPoint(mergeCopyExit);
282}
283
284
285/** ------------------------------------------------------------------------------------------------------------- *
286 * @brief CreateCopyToOverflow
287 ** ------------------------------------------------------------------------------------------------------------- */
288void KernelBuilder::CreateCopyToOverflow(const std::string & name) {
289    const StreamSetBuffer * const buf = mKernel->getOutputStreamSetBuffer(name);
290    assert (buf->supportsCopyBack());
291    Value * const handle = getStreamHandle(name);
292    // TODO: handle non constant stream set counts
293    assert (isa<Constant>(buf->getStreamSetCount(this, handle)));
294    Value * const target = buf->getBaseAddress(this, handle);
295    Value * const source = buf->getOverflowAddress(this, handle);
296    Constant * const overflowSize = ConstantExpr::getSizeOf(buf->getType());
297    CreateMemCpy(target, source, overflowSize, getBitBlockWidth() / 8);
298}
299
300/** ------------------------------------------------------------------------------------------------------------- *
301 * @brief AcquireTemporaryBuffer
302 ** ------------------------------------------------------------------------------------------------------------- */
303std::pair<Value *, Value *> KernelBuilder::AcquireTemporaryBuffer(const std::string & name, Value * offset, Value * itemsToCopy) {
304    const StreamSetBuffer * const buf = mKernel->getAnyStreamSetBuffer(name);
305    const auto itemWidth = getItemWidth(buf->getBaseType());
306    const Binding & binding = mKernel->getBinding(name);
307    if (LLVM_UNLIKELY(!binding.getRate().isFixed())) {
308        Constant * const BIT_BLOCK_WIDTH = ConstantInt::get(offset->getType(), getBitBlockWidth());
309        Value * const alignedOffset = CreateAnd(offset, CreateNeg(BIT_BLOCK_WIDTH));
310        itemsToCopy = CreateAdd(itemsToCopy, CreateSub(offset, alignedOffset));
311        offset = alignedOffset;
312    }
313    Value * bytesToCopy = itemsToCopy;
314    if (itemWidth < 8) {
315        bytesToCopy = CreateCeilUDiv(itemsToCopy, getSize(8 / itemWidth));
316    } else if (itemWidth > 8) {
317        bytesToCopy = CreateMul(itemsToCopy, getSize(itemWidth / 8));
318    }
319    Constant * const baseSize = ConstantExpr::getTrunc(ConstantExpr::getSizeOf(buf->getStreamSetBlockType()), getSizeTy());
320    Constant * const itemsConsumedPerIteration = getSize(std::max(ceiling(mKernel->getUpperBound(binding.getRate())), 1U));
321    Constant * const paddedSize =  ConstantExpr::getMul(baseSize, itemsConsumedPerIteration);
322
323    // one is added to bytes to copy to ensure that the stream is "zero-extended" by one block to properly handle any
324    // final block processing.o
325    Value * const size = CreateRoundUp(CreateAdd(bytesToCopy, getSize(1)), paddedSize);
326    Value * const handle = getStreamHandle(name);
327    Value * const base = buf->getBaseAddress(this, handle);
328    Value * const buffer = CreateAlignedMalloc(size, getCacheAlignment());
329    // TODO: handle split copy? currently no SourceBuffers could support it and I'm not sure how useful it'd be to do so.
330    Value * const from = buf->getRawItemPointer(this, handle, offset);
331    CreateMemCpy(buffer, from, bytesToCopy, 1);
332    CreateMemZero(CreateGEP(buffer, bytesToCopy), CreateSub(size, bytesToCopy), 1);
333    // get the difference between our base and from position then compute an offsetted temporary buffer address
334    Value * const diff = CreatePtrDiff(CreatePointerCast(base, from->getType()), from);
335    Value * const offsettedBuffer = CreatePointerCast(CreateGEP(buffer, diff), base->getType());
336    buf->setBaseAddress(this, handle, offsettedBuffer);
337    Value * const tempBuffer = CreatePointerCast(buffer, base->getType());
338    return std::make_pair(base, tempBuffer);
339}
340
341Value * KernelBuilder::getConsumerLock(const std::string & name) {
342    return getScalarField(name + CONSUMER_SUFFIX);
343}
344
345void KernelBuilder::setConsumerLock(const std::string & name, Value * const value) {
346    setScalarField(name + CONSUMER_SUFFIX, value);
347}
348
349Value * KernelBuilder::getInputStreamBlockPtr(const std::string & name, Value * const streamIndex, Value * const blockOffset) {
350    const StreamSetBuffer * const buf = mKernel->getInputStreamSetBuffer(name);
351    Value * blockIndex = CreateLShr(getProcessedItemCount(name), std::log2(getBitBlockWidth()));
352    if (blockOffset) {
353        assert (blockOffset->getType() == blockIndex->getType());
354        blockIndex = CreateAdd(blockIndex, blockOffset);
355    }
356    return buf->getStreamBlockPtr(this, getStreamHandle(name), streamIndex, blockIndex, true);
357}
358
359Value * KernelBuilder::getInputStreamPackPtr(const std::string & name, Value * const streamIndex, Value * const packIndex, Value * const blockOffset) {
360    const StreamSetBuffer * const buf = mKernel->getInputStreamSetBuffer(name);
361    Value * blockIndex = CreateLShr(getProcessedItemCount(name), std::log2(getBitBlockWidth()));
362    if (blockOffset) {
363        assert (blockOffset->getType() == blockIndex->getType());
364        blockIndex = CreateAdd(blockIndex, blockOffset);
365    }
366    return buf->getStreamPackPtr(this, getStreamHandle(name), streamIndex, blockIndex, packIndex, true);
367}
368
369Value * KernelBuilder::loadInputStreamBlock(const std::string & name, Value * const streamIndex, Value * const blockOffset) {
370    return CreateBlockAlignedLoad(getInputStreamBlockPtr(name, streamIndex, blockOffset));
371}
372
373Value * KernelBuilder::loadInputStreamPack(const std::string & name, Value * const streamIndex, Value * const packIndex, Value * const blockOffset) {
374    return CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex, blockOffset));
375}
376
377Value * KernelBuilder::getInputStreamSetCount(const std::string & name) {
378    const StreamSetBuffer * const buf = mKernel->getInputStreamSetBuffer(name);
379    return buf->getStreamSetCount(this, getStreamHandle(name));
380}
381
382Value * KernelBuilder::getOutputStreamBlockPtr(const std::string & name, Value * streamIndex, Value * const blockOffset) {
383    const StreamSetBuffer * const buf = mKernel->getOutputStreamSetBuffer(name);
384    Value * blockIndex = CreateLShr(getProducedItemCount(name), std::log2(getBitBlockWidth()));
385    if (blockOffset) {
386        assert (blockOffset->getType() == blockIndex->getType());
387        blockIndex = CreateAdd(blockIndex, blockOffset);
388    }
389    return buf->getStreamBlockPtr(this, getStreamHandle(name), streamIndex, blockIndex, false);
390}
391
392Value * KernelBuilder::getOutputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex, llvm::Value * blockOffset) {
393    const StreamSetBuffer * const buf = mKernel->getOutputStreamSetBuffer(name);
394    Value * blockIndex = CreateLShr(getProducedItemCount(name), std::log2(getBitBlockWidth()));
395    if (blockOffset) {
396        assert (blockOffset->getType() == blockIndex->getType());
397        blockIndex = CreateAdd(blockIndex, blockOffset);
398    }
399    return buf->getStreamPackPtr(this, getStreamHandle(name), streamIndex, blockIndex, packIndex, false);
400}
401
402
403StoreInst * KernelBuilder::storeOutputStreamBlock(const std::string & name, Value * streamIndex, llvm::Value * blockOffset, Value * toStore) {
404    Value * const ptr = getOutputStreamBlockPtr(name, streamIndex, blockOffset);
405    Type * const storeTy = toStore->getType();
406    Type * const ptrElemTy = ptr->getType()->getPointerElementType();
407    if (LLVM_UNLIKELY(storeTy != ptrElemTy)) {
408        if (LLVM_LIKELY(storeTy->canLosslesslyBitCastTo(ptrElemTy))) {
409            toStore = CreateBitCast(toStore, ptrElemTy);
410        } else {
411            std::string tmp;
412            raw_string_ostream out(tmp);
413            out << "invalid type conversion when calling storeOutputStreamBlock on " <<  name << ": ";
414            ptrElemTy->print(out);
415            out << " vs. ";
416            storeTy->print(out);
417        }
418    }
419    return CreateBlockAlignedStore(toStore, ptr);
420}
421
422StoreInst * KernelBuilder::storeOutputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex, llvm::Value * blockOffset, Value * toStore) {
423    Value * const ptr = getOutputStreamPackPtr(name, streamIndex, packIndex, blockOffset);
424    Type * const storeTy = toStore->getType();
425    Type * const ptrElemTy = ptr->getType()->getPointerElementType();
426    if (LLVM_UNLIKELY(storeTy != ptrElemTy)) {
427        if (LLVM_LIKELY(storeTy->canLosslesslyBitCastTo(ptrElemTy))) {
428            toStore = CreateBitCast(toStore, ptrElemTy);
429        } else {
430            std::string tmp;
431            raw_string_ostream out(tmp);
432            out << "invalid type conversion when calling storeOutputStreamPack on " <<  name << ": ";
433            ptrElemTy->print(out);
434            out << " vs. ";
435            storeTy->print(out);
436        }
437    }
438    return CreateBlockAlignedStore(toStore, ptr);
439}
440
441Value * KernelBuilder::getOutputStreamSetCount(const std::string & name) {
442    const StreamSetBuffer * const buf = mKernel->getOutputStreamSetBuffer(name);
443    return buf->getStreamSetCount(this, getStreamHandle(name));
444}
445
446Value * KernelBuilder::getRawInputPointer(const std::string & name, Value * absolutePosition) {
447    const StreamSetBuffer * const buf = mKernel->getInputStreamSetBuffer(name);
448    return buf->getRawItemPointer(this, getStreamHandle(name), absolutePosition);
449}
450
451Value * KernelBuilder::getRawOutputPointer(const std::string & name, Value * absolutePosition) {
452    const StreamSetBuffer * const buf = mKernel->getOutputStreamSetBuffer(name);
453    return buf->getRawItemPointer(this, getStreamHandle(name), absolutePosition);
454}
455
456Value * KernelBuilder::getBaseAddress(const std::string & name) {
457    return mKernel->getAnyStreamSetBuffer(name)->getBaseAddress(this, getStreamHandle(name));
458}
459
460void KernelBuilder::setBaseAddress(const std::string & name, Value * const addr) {
461    return mKernel->getAnyStreamSetBuffer(name)->setBaseAddress(this, getStreamHandle(name), addr);
462}
463
464Value * KernelBuilder::getBufferedSize(const std::string & name) {
465    return mKernel->getAnyStreamSetBuffer(name)->getBufferedSize(this, getStreamHandle(name));
466}
467
468void KernelBuilder::setBufferedSize(const std::string & name, Value * size) {
469    mKernel->getAnyStreamSetBuffer(name)->setBufferedSize(this, getStreamHandle(name), size);
470}
471
472Value * KernelBuilder::getCapacity(const std::string & name) {
473    return mKernel->getAnyStreamSetBuffer(name)->getCapacity(this, getStreamHandle(name));
474}
475
476void KernelBuilder::setCapacity(const std::string & name, Value * c) {
477    mKernel->getAnyStreamSetBuffer(name)->setCapacity(this, getStreamHandle(name), c);
478}
479
480void KernelBuilder::protectOutputStream(const std::string & name, const bool readOnly) {
481    const StreamSetBuffer * const buf = mKernel->getOutputStreamSetBuffer(name);
482    Value * const handle = getStreamHandle(name);
483    Value * const base = buf->getBaseAddress(this, handle);
484    Value * sz = ConstantExpr::getSizeOf(buf->getType());
485    sz = CreateMul(sz, getInt64(buf->getBufferBlocks()));
486    sz = CreateMul(sz, CreateZExt(buf->getStreamSetCount(this, handle), getInt64Ty()));
487    CreateMProtect(base, sz, readOnly ? CBuilder::READ : (CBuilder::READ | CBuilder::WRITE));
488}
489   
490CallInst * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) {
491    return mKernel->makeDoSegmentCall(*this, args);
492}
493
494Value * KernelBuilder::getAccumulator(const std::string & accumName) {
495    auto results = mKernel->mOutputScalarResult;
496    if (LLVM_UNLIKELY(results == nullptr)) {
497        report_fatal_error("Cannot get accumulator " + accumName + " until " + mKernel->getName() + " has terminated.");
498    }
499    const auto & outputs = mKernel->getScalarOutputs();
500    const auto n = outputs.size();
501    if (LLVM_UNLIKELY(n == 0)) {
502        report_fatal_error(mKernel->getName() + " has no output scalars.");
503    } else {
504        for (unsigned i = 0; i < n; ++i) {
505            const Binding & b = outputs[i];
506            if (b.getName() == accumName) {
507                if (n == 1) {
508                    return results;
509                } else {
510                    return CreateExtractValue(results, {i});
511                }
512            }
513        }
514        report_fatal_error(mKernel->getName() + " has no output scalar named " + accumName);
515    }
516}
517
518void KernelBuilder::doubleCapacity(const std::string & name) {
519    const StreamSetBuffer * const buf = mKernel->getAnyStreamSetBuffer(name);
520    return buf->doubleCapacity(this, getStreamHandle(name));
521}
522
523BasicBlock * KernelBuilder::CreateConsumerWait() {
524    const auto consumers = mKernel->getStreamOutputs();
525    BasicBlock * const entry = GetInsertBlock();
526    if (consumers.empty()) {
527        return entry;
528    } else {
529        Function * const parent = entry->getParent();
530        IntegerType * const sizeTy = getSizeTy();
531        ConstantInt * const zero = getInt32(0);
532        ConstantInt * const one = getInt32(1);
533        ConstantInt * const size0 = getSize(0);
534
535        Value * const segNo = acquireLogicalSegmentNo();
536        const auto n = consumers.size();
537        BasicBlock * load[n + 1];
538        BasicBlock * wait[n];
539        for (unsigned i = 0; i < n; ++i) {
540            load[i] = BasicBlock::Create(getContext(), consumers[i].getName() + "Load", parent);
541            wait[i] = BasicBlock::Create(getContext(), consumers[i].getName() + "Wait", parent);
542        }
543        load[n] = BasicBlock::Create(getContext(), "Resume", parent);
544        CreateBr(load[0]);
545        for (unsigned i = 0; i < n; ++i) {
546
547            SetInsertPoint(load[i]);
548            Value * const outputConsumers = getConsumerLock(consumers[i].getName());
549
550            Value * const consumerCount = CreateLoad(CreateGEP(outputConsumers, {zero, zero}));
551            Value * const consumerPtr = CreateLoad(CreateGEP(outputConsumers, {zero, one}));
552            Value * const noConsumers = CreateICmpEQ(consumerCount, size0);
553            CreateUnlikelyCondBr(noConsumers, load[i + 1], wait[i]);
554
555            SetInsertPoint(wait[i]);
556            PHINode * const consumerPhi = CreatePHI(sizeTy, 2);
557            consumerPhi->addIncoming(size0, load[i]);
558
559            Value * const conSegPtr = CreateLoad(CreateGEP(consumerPtr, consumerPhi));
560            Value * const processedSegmentCount = CreateAtomicLoadAcquire(conSegPtr);
561            Value * const ready = CreateICmpEQ(segNo, processedSegmentCount);
562            assert (ready->getType() == getInt1Ty());
563            Value * const nextConsumerIdx = CreateAdd(consumerPhi, CreateZExt(ready, sizeTy));
564            consumerPhi->addIncoming(nextConsumerIdx, wait[i]);
565            Value * const next = CreateICmpEQ(nextConsumerIdx, consumerCount);
566            CreateCondBr(next, load[i + 1], wait[i]);
567        }
568
569        BasicBlock * const exit = load[n];
570        SetInsertPoint(exit);
571        return exit;
572    }
573}
574
575/** ------------------------------------------------------------------------------------------------------------- *
576 * @brief CreateUDiv2
577 ** ------------------------------------------------------------------------------------------------------------- */
578Value * KernelBuilder::CreateUDiv2(Value * const number, const ProcessingRate::RateValue & divisor, const Twine & Name) {
579    if (divisor.numerator() == 1 && divisor.denominator() == 1) {
580        return number;
581    }
582    Constant * const n = ConstantInt::get(number->getType(), divisor.numerator());
583    if (LLVM_LIKELY(divisor.denominator() == 1)) {
584        return CreateUDiv(number, n, Name);
585    } else {
586        Constant * const d = ConstantInt::get(number->getType(), divisor.denominator());
587        return CreateUDiv(CreateMul(number, d), n);
588    }
589}
590
591/** ------------------------------------------------------------------------------------------------------------- *
592 * @brief CreateCeilUDiv2
593 ** ------------------------------------------------------------------------------------------------------------- */
594Value * KernelBuilder::CreateCeilUDiv2(Value * const number, const ProcessingRate::RateValue & divisor, const Twine & Name) {
595    if (divisor.numerator() == 1 && divisor.denominator() == 1) {
596        return number;
597    }
598    Constant * const n = ConstantInt::get(number->getType(), divisor.numerator());
599    if (LLVM_LIKELY(divisor.denominator() == 1)) {
600        return CreateCeilUDiv(number, n, Name);
601    } else {
602        //   âŒŠ(num + ratio - 1) / ratio⌋
603        // = ⌊(num - 1) / (n/d)⌋ + (ratio/ratio)
604        // = ⌊(d * (num - 1)) / n⌋ + 1
605        Constant * const ONE = ConstantInt::get(number->getType(), 1);
606        Constant * const d = ConstantInt::get(number->getType(), divisor.denominator());
607        return CreateAdd(CreateUDiv(CreateMul(CreateSub(number, ONE), d), n), ONE, Name);
608    }
609}
610
611/** ------------------------------------------------------------------------------------------------------------- *
612 * @brief CreateMul2
613 ** ------------------------------------------------------------------------------------------------------------- */
614Value * KernelBuilder::CreateMul2(Value * const number, const ProcessingRate::RateValue & factor, const Twine & Name) {
615    if (factor.numerator() == 1 && factor.denominator() == 1) {
616        return number;
617    }
618    Constant * const n = ConstantInt::get(number->getType(), factor.numerator());
619    if (LLVM_LIKELY(factor.denominator() == 1)) {
620        return CreateMul(number, n, Name);
621    } else {
622        Constant * const d = ConstantInt::get(number->getType(), factor.denominator());
623        return CreateUDiv(CreateMul(number, n), d, Name);
624    }
625}
626
627/** ------------------------------------------------------------------------------------------------------------- *
628 * @brief CreateMulCeil2
629 ** ------------------------------------------------------------------------------------------------------------- */
630Value * KernelBuilder::CreateCeilUMul2(Value * const number, const ProcessingRate::RateValue & factor, const Twine & Name) {
631    if (factor.denominator() == 1) {
632        return CreateMul2(number, factor, Name);
633    }
634    Constant * const n = ConstantInt::get(number->getType(), factor.numerator());
635    Constant * const d = ConstantInt::get(number->getType(), factor.denominator());
636    return CreateCeilUDiv(CreateMul(number, n), d, Name);
637}
638
639}
Note: See TracBrowser for help on using the repository browser.