Changeset 4549 for icGREP


Ignore:
Timestamp:
May 11, 2015, 11:02:12 AM (4 years ago)
Author:
cameron
Message:

Long Advance optimization using circular buffer

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r4548 r4549  
    365365}
    366366   
     367uint64_t log2ceil (uint64_t v) {
     368    unsigned ceil = 1;
     369    while (ceil < v) ceil *= 2;
     370    return ceil;
     371}
     372
     373unsigned const LongAdvanceBase = BLOCK_SIZE;
     374   
     375
     376   
    367377// CarryDataNumbering
    368378//
     
    394404        if (Advance * adv = dyn_cast<Advance>(stmt)) {
    395405            adv->setLocalAdvanceIndex(localAdvances);
    396             localAdvances += (adv->getAdvanceAmount() + BLOCK_SIZE - 1) / BLOCK_SIZE;
     406            unsigned shift_amount = adv->getAdvanceAmount();
     407            if (shift_amount >= LongAdvanceBase) {
     408                int advEntries = (shift_amount + BLOCK_SIZE - 1) / BLOCK_SIZE;
     409                int advCeil = log2ceil(advEntries);
     410                localAdvances += advCeil;
     411            }
     412            else {
     413                localAdvances += (shift_amount + BLOCK_SIZE - 1) / BLOCK_SIZE;
     414            }
    397415        }
    398416        else if (MatchStar * m = dyn_cast<MatchStar>(stmt)) {
     
    969987}
    970988
    971 unsigned const LongAdvanceBase = 64;
    972    
    973989Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
    974990    if (shift_amount >= LongAdvanceBase) {
     
    10221038Value* PabloCompiler::genLongAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
    10231039    IRBuilder<> b(mBasicBlock);
    1024     int advEntries = (shift_amount - 1) / BLOCK_SIZE + 1;
    1025     //int advCeil = log2ceil(advEntries);
    1026     //Value * indexMask = b.getInt64(advCeil - 1);
    1027     int block_shift = shift_amount % BLOCK_SIZE;
    1028     const auto advanceIndex = blk->getCarryIndexBase() + blk->getLocalCarryCount() + localIndex;
    1029     //Value * blockIndex = b.CreateAnd(mBlockNo, indexMask);
    1030     //Value * idx2 = b.CreateAnd(b.CreateAdd(mBlockNo - b.getInt64(advEntries - 1)), indexMask);
    1031     const auto storeIdx = advanceIndex;
    1032     const auto loadIdx = advanceIndex + advEntries - 1;
     1040    const unsigned advanceIndex = blk->getCarryIndexBase() + blk->getLocalCarryCount() + localIndex;
     1041    const unsigned advanceEntries = (shift_amount + BLOCK_SIZE - 1) / BLOCK_SIZE;
     1042    const unsigned block_shift = shift_amount % BLOCK_SIZE;
     1043    const unsigned advanceCeil = log2ceil(advanceEntries);
     1044    Value * indexMask = b.getInt64(advanceCeil - 1);
     1045    Value * advBaseIndex = b.getInt64(advanceIndex);
     1046    Value * storeIndex = b.CreateAdd(b.CreateAnd(mBlockNo, indexMask), advBaseIndex);
     1047    Value * loadIndex = b.CreateAdd(b.CreateAnd(b.CreateSub(mBlockNo, b.getInt64(advanceEntries)), indexMask), advBaseIndex);
     1048    Value * storePtr = b.CreateGEP(mCarryDataPtr, storeIndex);
     1049    Value * loadPtr = b.CreateGEP(mCarryDataPtr, loadIndex);
    10331050    Value* result_value;
    10341051
    10351052    if (block_shift == 0) {
    1036         result_value = genCarryDataLoad(loadIdx);
    1037     }
    1038     else if (advEntries == 1) {
    1039         Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), b.getIntNTy(BLOCK_SIZE));
     1053        result_value = b.CreateAlignedLoad(loadPtr, BLOCK_SIZE/8);
     1054    }
     1055    else if (advanceEntries == 1) {
     1056        Value* advanceq_longint = b.CreateBitCast(b.CreateAlignedLoad(loadPtr, BLOCK_SIZE/8), b.getIntNTy(BLOCK_SIZE));
    10401057        Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
    10411058        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
     
    10431060    }
    10441061    else {
    1045         // The advance is based on the two oldest bit blocks in the advance queue.
    1046         Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), b.getIntNTy(BLOCK_SIZE));
    1047         Value* strm_longint = b.CreateBitCast(genCarryDataLoad(loadIdx-1), b.getIntNTy(BLOCK_SIZE));
     1062        // The advance is based on the two oldest bit blocks in the advance buffer.
     1063        // The buffer is maintained as a circular buffer of size advanceCeil.
     1064        // Indexes within the buffer are computed by bitwise and with the indexMask.
     1065        Value * loadIndex2 = b.CreateAdd(b.CreateAnd(b.CreateSub(mBlockNo, b.getInt64(advanceEntries-1)), indexMask), advBaseIndex);
     1066        Value * loadPtr2 = b.CreateGEP(mCarryDataPtr, loadIndex2);
     1067        Value* advanceq_longint = b.CreateBitCast(b.CreateAlignedLoad(loadPtr, BLOCK_SIZE/8), b.getIntNTy(BLOCK_SIZE));
     1068        //genPrintRegister("advanceq_longint", b.CreateBitCast(advanceq_longint, mBitBlockType));
     1069        Value* strm_longint = b.CreateBitCast(b.CreateAlignedLoad(loadPtr2, BLOCK_SIZE/8), b.getIntNTy(BLOCK_SIZE));
     1070        //genPrintRegister("strm_longint", b.CreateBitCast(strm_longint, mBitBlockType));
    10481071        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
    10491072        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
    10501073    }
    1051     // copy entries from previous blocks forward
    1052     for (int i = loadIdx; i > storeIdx; i--) {
    1053         genCarryDataStore(genCarryDataLoad(i-1), i);
    1054     }
    1055     genCarryDataStore(strm_value, storeIdx);
     1074    b.CreateAlignedStore(strm_value, storePtr, BLOCK_SIZE/8);
    10561075    return result_value;
    10571076}
Note: See TracChangeset for help on using the changeset viewer.