Ignore:
Timestamp:
Oct 28, 2017, 5:03:07 PM (2 years ago)
Author:
cameron
Message:

Arbitrary long indexed advance progress

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r5713 r5715  
    113113    }
    114114    for (unsigned i = 0; i < mIndexedLongAdvanceTotal; i++) {
    115         kernel->addScalar(b->getSizeTy(), "LongAdvancePosition" + std::to_string(i));
     115        kernel->addScalar(b->getSizeTy(), "IndexedAdvancePosition" + std::to_string(i));
    116116    }
    117117}
     
    635635        return result;
    636636    } else {
     637        unsigned summaryFrame = mCurrentFrameIndex;
     638        if (mIfDepth > 0) {
     639            // Skip over summary frame to perform the long indexed advance.
     640            mCurrentFrameIndex++;
     641        }
     642        Type * iBitBlock = b->getIntNTy(b->getBitBlockWidth());
     643        Constant * blockWidth = b->getSize(b->getBitBlockWidth());
     644        Constant * blockWidth_1 = b->getSize(b->getBitBlockWidth() - 1);
     645        Value * carryPosition = b->getScalarField("IndexedAdvancePosition" + std::to_string(mIndexedLongAdvanceIndex));
     646        Value * carryBlockEndPos = b->CreateAdd(carryPosition, blockWidth_1);
     647        unsigned carry_blocks = nearest_pow2(20+ceil_udiv(shiftAmount, b->getBitBlockWidth()));
     648        Constant * carryQueueBlocks = b->getSize(carry_blocks);
     649        Value * carryBlock = b->CreateTrunc(b->CreateURem(b->CreateUDiv(carryPosition, blockWidth), carryQueueBlocks), b->getInt32Ty());
     650        Value * carryEndBlock = b->CreateTrunc(b->CreateURem(b->CreateUDiv(carryBlockEndPos, blockWidth), carryQueueBlocks), b->getInt32Ty());
     651        Value * lo_GEP = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), carryBlock});
     652        Value * hi_GEP = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), carryEndBlock});
     653        Value * c_lo = b->CreateBitCast(b->CreateBlockAlignedLoad(lo_GEP), iBitBlock);
     654        Value * c_hi = b->CreateBitCast(b->CreateBlockAlignedLoad(hi_GEP), iBitBlock);
     655        Value * lo_shift = b->CreateZExt(b->CreateURem(carryPosition, blockWidth), iBitBlock);
     656        Value * hi_shift = b->CreateZExt(b->CreateSub(blockWidth_1, b->CreateURem(carryBlockEndPos, blockWidth)), iBitBlock);
     657        Value * carryIn = b->CreateOr(b->CreateLShr(c_lo, lo_shift), b->CreateShl(c_hi, hi_shift));
     658        Value * carryOut, * result;
     659        std::tie(carryOut, result) = b->bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount);
     660        carryOut = b->CreateBitCast(carryOut, iBitBlock);
     661        Value * adv = b->mvmd_extract(sizeof(size_t) * 8, b->simd_popcount(b->getBitBlockWidth(), index_strm), 0);
     662        b->setScalarField("IndexedAdvancePosition" + std::to_string(mIndexedLongAdvanceIndex), b->CreateAdd(carryPosition, adv));
     663        Value * carryOutPosition = b->CreateAdd(carryPosition, b->getSize(shiftAmount));
     664        Value * carryOutEndPos = b->CreateAdd(carryOutPosition, blockWidth_1);
     665        carryBlock = b->CreateTrunc(b->CreateURem(b->CreateUDiv(carryOutPosition, blockWidth), carryQueueBlocks), b->getInt32Ty());
     666        carryEndBlock = b->CreateTrunc(b->CreateURem(b->CreateUDiv(carryOutEndPos, blockWidth), carryQueueBlocks), b->getInt32Ty());
     667        lo_GEP = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), carryBlock});
     668        hi_GEP = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), carryEndBlock});
     669        lo_shift = b->CreateZExt(b->CreateURem(carryOutPosition, blockWidth), iBitBlock);
     670        hi_shift = b->CreateZExt(b->CreateSub(blockWidth_1, b->CreateURem(carryOutEndPos, blockWidth)), iBitBlock);
     671        c_lo = b->CreateOr(b->CreateBitCast(b->CreateBlockAlignedLoad(lo_GEP), iBitBlock), b->CreateShl(carryOut, lo_shift));
     672        c_hi = b->CreateLShr(carryOut, hi_shift);
     673        b->CreateBlockAlignedStore(b->CreateBitCast(c_lo, b->getBitBlockType()), lo_GEP);
     674        b->CreateBlockAlignedStore(b->CreateBitCast(c_hi, b->getBitBlockType()), hi_GEP);
    637675        mIndexedLongAdvanceIndex++;
    638         llvm::report_fatal_error("IndexedAdvance > BlockSize not yet supported.");
     676        mCurrentFrameIndex++;
     677        // Now handle the summary.
     678        if (mIfDepth > 0) {
     679            const auto summaryBlocks = ceil_udiv(shiftAmount, b->getBitBlockWidth());
     680            const auto summarySize = ceil_udiv(summaryBlocks, b->getBitBlockWidth());
     681            for (unsigned i = 0; i < summarySize; i++) {
     682                // All ones summary for now.
     683                b->CreateBlockAlignedStore(b->allOnes(), b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(summaryFrame), b->getInt32(i)}));
     684            }
     685        }
     686        return result;
    639687    }
    640688}
     
    940988                const auto blockWidth = b->getBitBlockWidth();
    941989                const auto blocks = ceil_udiv(amount, blockWidth);
    942                 type = ArrayType::get(blockTy, nearest_pow2(blocks + ((loopDepth != 0) ? 1 : 0)));
     990                type = ArrayType::get(blockTy, nearest_pow2(blocks + (isa<IndexedAdvance>(stmt) ? 20:0) + ((loopDepth != 0) ? 1 : 0)));
    943991                if (LLVM_UNLIKELY(ifDepth > 0 && blocks != 1)) {
    944992                    const auto summarySize = ceil_udiv(blocks, blockWidth);
Note: See TracChangeset for help on using the changeset viewer.