Ignore:
Timestamp:
Apr 25, 2018, 2:57:33 PM (13 months ago)
Author:
cameron
Message:

AbortOnNull? functionality - initial check-in

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5985 r5989  
    1111#include <re/re_toolchain.h>
    1212#include <re/re_reverse.h>
     13#include <grep/grep_engine.h>
    1314#include <pablo/codegenstate.h>
    1415#include <pablo/pablo_toolchain.h>
     
    2829#include <re/re_compiler.h>
    2930#include <UCD/ucd_compiler.hpp>
     31#include <llvm/IR/Module.h>
    3032#include <llvm/Support/raw_ostream.h>
    3133
     
    447449
    448450}
     451
     452
     453void AbortOnNull::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) {
     454    Module * const m = b->getModule();
     455    DataLayout DL(m);
     456    IntegerType * const intPtrTy = DL.getIntPtrType(m->getContext());
     457    Type * voidPtrTy = b->getVoidPtrTy();
     458    const auto blocksPerStride = getStride() / b->getBitBlockWidth();
     459    Constant * const BLOCKS_PER_STRIDE = b->getSize(blocksPerStride);
     460    BasicBlock * const entry = b->GetInsertBlock();
     461    BasicBlock * const strideLoop = b->CreateBasicBlock("strideLoop");
     462    BasicBlock * const stridesDone = b->CreateBasicBlock("stridesDone");
     463    BasicBlock * const nullByteDetection = b->CreateBasicBlock("nullByteDetection");
     464    BasicBlock * const nullByteFound = b->CreateBasicBlock("nullByteFound");
     465    BasicBlock * const finalStride = b->CreateBasicBlock("finalStride");
     466    BasicBlock * const segmentDone = b->CreateBasicBlock("segmentDone");
     467
     468    Value * const numOfBlocks = b->CreateMul(numOfStrides, BLOCKS_PER_STRIDE);
     469    Value * availItems = b->getAvailableItemCount("bytedata");
     470    //
     471    // Fast loop to prove that there are no null bytes in a multiblock region.
     472    // We repeatedly combine byte packs using a SIMD unsigned min operation
     473    // (implemented as a Select/ICmpULT combination).
     474    //
     475    Value * byteStreamBasePtr = b->getInputStreamBlockPtr("bytedata", b->getSize(0), b->getSize(0));
     476    Value * outputStreamBasePtr = b->getOutputStreamBlockPtr("untilNull", b->getSize(0), b->getSize(0));
     477
     478    //
     479    // We set up a a set of eight accumulators to accumulate the minimum byte
     480    // values seen at each position in a block.   The initial min value at
     481    // each position is 0xFF (all ones).
     482    Value * blockMin[8];
     483    for (unsigned i = 0; i < 8; i++) {
     484        blockMin[i] = b->fwCast(8, b->allOnes());
     485    }
     486    // If we're in the final block bypass the fast loop.
     487    b->CreateCondBr(mIsFinal, finalStride, strideLoop);
     488   
     489    b->SetInsertPoint(strideLoop);
     490    PHINode * const baseBlockIndex = b->CreatePHI(b->getSizeTy(), 2);
     491    baseBlockIndex->addIncoming(ConstantInt::get(baseBlockIndex->getType(), 0), entry);
     492    PHINode * const blocksRemaining = b->CreatePHI(b->getSizeTy(), 2);
     493    blocksRemaining->addIncoming(numOfBlocks, entry);
     494    for (unsigned i = 0; i < 8; i++) {
     495        Value * next = b->CreateBlockAlignedLoad(b->CreateGEP(byteStreamBasePtr, {baseBlockIndex, b->getSize(i)}));
     496        b->CreateBlockAlignedStore(next, b->CreateGEP(outputStreamBasePtr, {baseBlockIndex, b->getSize(i)}));
     497        next = b->fwCast(8, next);
     498        blockMin[i] = b->CreateSelect(b->CreateICmpULT(next, blockMin[i]), next, blockMin[i]);
     499    }
     500    Value * nextBlockIndex = b->CreateAdd(baseBlockIndex, ConstantInt::get(baseBlockIndex->getType(), 1));
     501    Value * nextRemaining = b->CreateSub(blocksRemaining, ConstantInt::get(blocksRemaining->getType(), 1));
     502    baseBlockIndex->addIncoming(nextBlockIndex, strideLoop);
     503    blocksRemaining->addIncoming(nextRemaining, strideLoop);
     504    b->CreateCondBr(b->CreateICmpUGT(nextRemaining, ConstantInt::getNullValue(blocksRemaining->getType())), strideLoop, stridesDone);
     505   
     506    b->SetInsertPoint(stridesDone);
     507    // Combine the 8 blockMin values.
     508    for (unsigned i = 0; i < 4; i++) {
     509        blockMin[i] = b->CreateSelect(b->CreateICmpULT(blockMin[i], blockMin[i+4]), blockMin[i], blockMin[i+4]);
     510    }
     511    for (unsigned i = 0; i < 2; i++) {
     512        blockMin[i] = b->CreateSelect(b->CreateICmpULT(blockMin[i], blockMin[i+4]), blockMin[i], blockMin[i+2]);
     513    }
     514    blockMin[0] = b->CreateSelect(b->CreateICmpULT(blockMin[0], blockMin[1]), blockMin[0], blockMin[1]);
     515    Value * anyNull = b->bitblock_any(b->simd_eq(8, blockMin[0], b->allZeroes()));
     516   
     517    b->CreateCondBr(anyNull, nullByteDetection, segmentDone);
     518   
     519   
     520    b->SetInsertPoint(finalStride);
     521    b->CreateMemCpy(b->CreatePointerCast(outputStreamBasePtr, voidPtrTy), b->CreatePointerCast(byteStreamBasePtr, voidPtrTy), availItems, 1);
     522    b->CreateBr(nullByteDetection);
     523   
     524    b->SetInsertPoint(nullByteDetection);
     525    //  Find the exact location using memchr, which should be fast enough.
     526    //
     527    Value * ptrToNull = b->CreateMemChr(b->CreatePointerCast(byteStreamBasePtr, voidPtrTy), b->getInt32(0), availItems);
     528    Value * ptrAddr = b->CreatePtrToInt(ptrToNull, intPtrTy);
     529    b->CreateCondBr(b->CreateICmpEQ(ptrAddr, ConstantInt::getNullValue(intPtrTy)), segmentDone, nullByteFound);
     530   
     531    // A null byte has been located; set the termination code and call the signal handler.
     532    b->SetInsertPoint(nullByteFound);
     533    Value * nullPosn = b->CreateSub(b->CreatePtrToInt(ptrToNull, intPtrTy), b->CreatePtrToInt(byteStreamBasePtr, intPtrTy));
     534    b->setTerminationSignal();
     535    Function * const dispatcher = m->getFunction("signal_dispatcher"); assert (dispatcher);
     536    Value * handler = b->getScalarField("handler_address");
     537    b->CreateCall(dispatcher, {handler, ConstantInt::get(b->getInt32Ty(), static_cast<unsigned>(grep::GrepSignal::BinaryFile))});
     538    b->CreateBr(segmentDone);
     539   
     540    b->SetInsertPoint(segmentDone);
     541    PHINode * const produced = b->CreatePHI(b->getSizeTy(), 3);
     542    produced->addIncoming(nullPosn, nullByteFound);
     543    produced->addIncoming(availItems, stridesDone);
     544    produced->addIncoming(availItems, nullByteDetection);
     545    Value * producedCount = b->getProducedItemCount("untilNull");
     546    producedCount = b->CreateAdd(producedCount, produced);
     547    b->setProducedItemCount("untilNull", producedCount);
     548}
     549
     550AbortOnNull::AbortOnNull(const std::unique_ptr<kernel::KernelBuilder> & b)
     551: MultiBlockKernel("AbortOnNull",
     552                   // inputs
     553{Binding{b->getStreamSetTy(1, 8), "bytedata"}},
     554                   // outputs
     555{Binding{b->getStreamSetTy(1, 8), "untilNull", FixedRate(), Deferred()}},
     556                   // input scalars
     557{Binding{b->getIntAddrTy(), "handler_address"}},
     558{}, {}) {
     559    addAttribute(CanTerminateEarly());
     560}
Note: See TracChangeset for help on using the changeset viewer.