Ignore:
Timestamp:
Sep 4, 2016, 8:47:47 PM (3 years ago)
Author:
cameron
Message:

pablo.Lookahead support updated for stream set access/kernel system

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r5124 r5141  
    2424namespace pablo {
    2525
    26 #define DSSLI_FIELDWIDTH 64
    27 
    2826PabloCompiler::PabloCompiler(IDISA::IDISA_Builder * b, PabloKernel * k, PabloFunction * const function)
    2927: mMod(b->getModule())
     
    4240
    4341
    44 Type * PabloCompiler::initializeCarryData() {
     42Type * PabloCompiler::initializeKernelData() {
     43    Examine(mPabloFunction);
     44   
    4545    mCarryManager = make_unique<CarryManager>(iBuilder);
    4646    Type * carryDataType = mCarryManager->initializeCarryData(mPabloFunction);
     
    5656    #endif
    5757
    58     Examine(mPabloFunction);
    59    
    6058    //Generate Kernel//
    6159    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     
    104102    mIfDepth = 0;
    105103    mMaxWhileDepth = 0;
    106     LookaheadOffsetMap offsetMap;
    107     Examine(function->getEntryBlock(), offsetMap);
    108     mInputStreamOffset.clear();
    109     for (const auto & oi : offsetMap) {
    110         for (const auto offset : oi.second) {
    111             mInputStreamOffset.insert(offset / iBuilder->getBitBlockWidth());
    112         }
    113     }
    114 }
    115 
    116 void PabloCompiler::Examine(const PabloBlock * const block, LookaheadOffsetMap & offsetMap) {
     104    Examine(function->getEntryBlock());
     105}
     106
     107void PabloCompiler::Examine(const PabloBlock * const block) {
     108    unsigned maxOffset = 0;
    117109    for (const Statement * stmt : *block) {
    118110         boost::container::flat_set<unsigned> offsets;
     
    120112            const Lookahead * const la = cast<Lookahead>(stmt);
    121113            assert (isa<Var>(la->getExpr()));
    122             offsets.insert(la->getAmount());
    123             offsets.insert(la->getAmount() + iBuilder->getBitBlockWidth() - 1);
     114            if (la->getAmount() > maxOffset) maxOffset = la->getAmount();
    124115        } else {
    125             for (unsigned i = 0; i < stmt->getNumOperands(); ++i) {
    126                 const PabloAST * expr = stmt->getOperand(i);
    127                 if (isa<Var>(expr)) {
    128                     offsets.insert(0);
    129                 } else if (LLVM_LIKELY(isa<Statement>(expr) && !isa<Assign>(expr) && !isa<Next>(expr))) {
    130                     const auto f = offsetMap.find(expr);
    131                     assert (f != offsetMap.end());
    132                     const auto & o = f->second;
    133                     offsets.insert(o.begin(), o.end());
    134                 }
    135             }
    136116            if (LLVM_UNLIKELY(isa<If>(stmt))) {
    137                 Examine(cast<If>(stmt)->getBody(), offsetMap);
     117                Examine(cast<If>(stmt)->getBody());
    138118            } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
    139119                mMaxWhileDepth = std::max(mMaxWhileDepth, ++mWhileDepth);
    140                 Examine(cast<While>(stmt)->getBody(), offsetMap);
     120                Examine(cast<While>(stmt)->getBody());
    141121                --mWhileDepth;
    142122            }
    143123        }
    144         offsetMap.emplace(stmt, offsets);
     124        mKernelBuilder->setLookAhead(maxOffset);
    145125    }
    146126}
     
    375355        PabloAST * const var = l->getExpr();
    376356        if (LLVM_UNLIKELY(!isa<Var>(var))) {
    377             throw std::runtime_error("Lookahead input type must be a Var object");
     357            throw std::runtime_error("Lookahead operations may only be applied to input streams");
    378358        }
    379359        unsigned index = 0;
     
    386366            throw std::runtime_error("Lookahead has an illegal Var operand");
    387367        }
    388         const unsigned offset0 = (l->getAmount() / iBuilder->getBitBlockWidth());
    389         const unsigned offset1 = ((l->getAmount() + iBuilder->getBitBlockWidth() - 1) / iBuilder->getBitBlockWidth());
    390         const unsigned shift = (l->getAmount() % iBuilder->getBitBlockWidth());
    391         Value * const v0 = nullptr;//iBuilder->CreateBlockAlignedLoad(mKernelBuilder->getInputStream(index, offset0));
    392         Value * const v1 = nullptr;//iBuilder->CreateBlockAlignedLoad(mKernelBuilder->getInputStream(index, offset1));
    393         if (LLVM_UNLIKELY((shift % 8) == 0)) { // Use a single whole-byte shift, if possible.
    394             expr = iBuilder->mvmd_dslli(8, v1, v0, (shift / 8));
    395         } else if (LLVM_LIKELY(shift < DSSLI_FIELDWIDTH)) {
    396             Value * ahead = iBuilder->mvmd_dslli(DSSLI_FIELDWIDTH, v1, v0, 1);
    397             ahead = iBuilder->simd_slli(DSSLI_FIELDWIDTH, ahead, DSSLI_FIELDWIDTH - shift);
    398             Value * value = iBuilder->simd_srli(DSSLI_FIELDWIDTH, v0, shift);
    399             expr = iBuilder->simd_or(value, ahead);
    400         } else {
    401             Type  * const streamType = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
    402             Value * b0 = iBuilder->CreateBitCast(v0, streamType);
    403             Value * b1 = iBuilder->CreateBitCast(v1, streamType);
    404             Value * result = iBuilder->CreateOr(iBuilder->CreateShl(b1, iBuilder->getBitBlockWidth() - shift), iBuilder->CreateLShr(b0, shift));
    405             expr = iBuilder->CreateBitCast(result, mBitBlockType);
     368        const unsigned bit_shift = (l->getAmount() % iBuilder->getBitBlockWidth());
     369        const unsigned block_shift = (l->getAmount() / iBuilder->getBitBlockWidth());
     370        std::string inputName = mKernelBuilder->mStreamSetInputs[0].ssName;
     371        Value * blockNo = mKernelBuilder->getScalarField(mSelf, blockNoScalar);
     372        Value * lookAhead_blockPtr  = mKernelBuilder->getStreamSetBlockPtr(mSelf, inputName, iBuilder->CreateAdd(blockNo, ConstantInt::get(iBuilder->getSizeTy(), block_shift)));
     373        Value * lookAhead_inputPtr = iBuilder->CreateGEP(lookAhead_blockPtr, {iBuilder->getInt32(0), iBuilder->getInt32(index)});
     374        Value * lookAhead = iBuilder->CreateBlockAlignedLoad(lookAhead_inputPtr);
     375        if (bit_shift == 0) {  // Simple case with no intra-block shifting.
     376            expr = lookAhead; 
     377        }
     378        else { // Need to form shift result from two adjacent blocks.
     379            Value * lookAhead_blockPtr1  = mKernelBuilder->getStreamSetBlockPtr(mSelf, inputName, iBuilder->CreateAdd(blockNo, ConstantInt::get(iBuilder->getSizeTy(), block_shift + 1)));
     380            Value * lookAhead_inputPtr1 = iBuilder->CreateGEP(lookAhead_blockPtr1, {iBuilder->getInt32(0), iBuilder->getInt32(index)});
     381            Value * lookAhead1 = iBuilder->CreateBlockAlignedLoad(lookAhead_inputPtr1);
     382            if (LLVM_UNLIKELY((bit_shift % 8) == 0)) { // Use a single whole-byte shift, if possible.
     383                expr = iBuilder->mvmd_dslli(8, lookAhead1, lookAhead, (bit_shift / 8));
     384            }
     385            else {
     386                Type  * const streamType = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
     387                Value * b1 = iBuilder->CreateBitCast(lookAhead1, streamType);
     388                Value * b0 = iBuilder->CreateBitCast(lookAhead, streamType);
     389                Value * result = iBuilder->CreateOr(iBuilder->CreateShl(b1, iBuilder->getBitBlockWidth() - bit_shift), iBuilder->CreateLShr(b0, bit_shift));
     390                expr = iBuilder->CreateBitCast(result, mBitBlockType);
     391            }
    406392        }
    407393    } else {
Note: See TracChangeset for help on using the changeset viewer.