Changeset 4547


Ignore:
Timestamp:
May 9, 2015, 7:39:24 AM (3 years ago)
Author:
cameron
Message:

Separate out the LongAdvance? case

Location:
icGREP/icgrep-devel/icgrep/pablo
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r4546 r4547  
    958958    return b.CreateXor(expr, mOneInitializer, "not");
    959959}
     960
     961unsigned const LongAdvanceBase = 64;
     962   
    960963Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
     964    if (shift_amount >= LongAdvanceBase) {
     965        return genLongAdvanceWithCarry(strm_value, shift_amount, localIndex, blk);
     966    }
     967    IRBuilder<> b(mBasicBlock);
     968    const auto advanceIndex = blk->getCarryIndexBase() + blk->getLocalCarryCount() + localIndex;
     969    Value* result_value;
     970   
     971    if (shift_amount == 0) {
     972        result_value = genCarryDataLoad(advanceIndex);
     973        //b.CreateCall(mFunc_print_register, result_value);
     974    }
     975#if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
     976    if (shift_amount == 1) {
     977        Value* advanceq_value = genShiftHighbitToLow(BLOCK_SIZE, genCarryDataLoad(advanceIndex));
     978        Value* srli_1_value = b.CreateLShr(strm_value, 63);
     979        Value* packed_shuffle;
     980        Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
     981        Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
     982        packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
     983       
     984        Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
     985        Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
     986       
     987        Value* shl_value = b.CreateShl(strm_value, const_packed_2);
     988        result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
     989    }
     990    else { //if (block_shift < BLOCK_SIZE) {
     991        // This is the preferred logic, but is too slow for the general case.
     992        // We need to speed up our custom LLVM for this code.
     993        Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(advanceIndex), b.getIntNTy(BLOCK_SIZE));
     994        Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
     995        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
     996        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
     997    }
     998#else
     999    Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(advanceIndex), b.getIntNTy(BLOCK_SIZE));
     1000    Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
     1001    Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
     1002    result_value = b.CreateBitCast(adv_longint, mBitBlockType);
     1003   
     1004#endif
     1005    genCarryDataStore(strm_value, advanceIndex);
     1006    return result_value;
     1007}
     1008
     1009//
     1010// Generate code for long advances >= LongAdvanceBase
     1011//
     1012Value* PabloCompiler::genLongAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
    9611013    IRBuilder<> b(mBasicBlock);
    9621014    int advEntries = (shift_amount - 1) / BLOCK_SIZE + 1;
     
    9661018    const auto loadIdx = advanceIndex + advEntries - 1;
    9671019    Value* result_value;
    968    
    969     if (advEntries == 1) {
    970         if (block_shift == 0) { 
    971             result_value = genCarryDataLoad(loadIdx);
    972             //b.CreateCall(mFunc_print_register, result_value);
    973         }
    974 #if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
    975         if (block_shift == 1) {
    976             Value* advanceq_value = genShiftHighbitToLow(BLOCK_SIZE, genCarryDataLoad(loadIdx));
    977             Value* srli_1_value = b.CreateLShr(strm_value, 63);
    978             Value* packed_shuffle;
    979             Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
    980             Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
    981             packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
    982 
    983             Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
    984             Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
    985 
    986             Value* shl_value = b.CreateShl(strm_value, const_packed_2);
    987             result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
    988         }
    989         else { //if (block_shift < BLOCK_SIZE) {
    990             // This is the preferred logic, but is too slow for the general case.
    991             // We need to speed up our custom LLVM for this code.
    992             Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), b.getIntNTy(BLOCK_SIZE));
    993             Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
    994             Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
    995             result_value = b.CreateBitCast(adv_longint, mBitBlockType);
    996         }
    997 #else
     1020
     1021    if (block_shift == 0) {
     1022        result_value = genCarryDataLoad(loadIdx);
     1023    }
     1024    else if (advEntries == 1) {
    9981025        Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), b.getIntNTy(BLOCK_SIZE));
    9991026        Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
    10001027        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
    10011028        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
    1002 
    1003 #endif
    10041029    }
    10051030    else {
    1006         if (block_shift == 0) {
    1007             result_value = genCarryDataLoad(loadIdx);
    1008         }
    1009         else {
    1010             // The advance is based on the two oldest bit blocks in the advance queue.
    1011             Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), b.getIntNTy(BLOCK_SIZE));
    1012             Value* strm_longint = b.CreateBitCast(genCarryDataLoad(loadIdx-1), b.getIntNTy(BLOCK_SIZE));
    1013             Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
    1014             result_value = b.CreateBitCast(adv_longint, mBitBlockType);
    1015             //b.CreateCall(mFunc_print_register, genCarryDataLoad(loadIdx));
    1016             //b.CreateCall(mFunc_print_register, genCarryDataLoad(loadIdx-1));
    1017             //b.CreateCall(mFunc_print_register, result_value);
    1018         }
    1019         // copy entries from previous blocks forward
    1020         for (int i = loadIdx; i > storeIdx; i--) {
    1021             genCarryDataStore(genCarryDataLoad(i-1), i);
    1022         }
     1031        // The advance is based on the two oldest bit blocks in the advance queue.
     1032        Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), b.getIntNTy(BLOCK_SIZE));
     1033        Value* strm_longint = b.CreateBitCast(genCarryDataLoad(loadIdx-1), b.getIntNTy(BLOCK_SIZE));
     1034        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
     1035        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
     1036    }
     1037    // copy entries from previous blocks forward
     1038    for (int i = loadIdx; i > storeIdx; i--) {
     1039        genCarryDataStore(genCarryDataLoad(i-1), i);
    10231040    }
    10241041    genCarryDataStore(strm_value, storeIdx);
    10251042    return result_value;
    10261043}
    1027 
     1044   
    10281045void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
    10291046    IRBuilder<> b(mBasicBlock);
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.h

    r4545 r4547  
    114114    Value* genAddWithCarry(Value* e1, Value* e2, unsigned localIndex, const PabloBlock * blk);
    115115    Value* genAdvanceWithCarry(Value* e1, int shift_amount, unsigned localIndex, const PabloBlock * blk);
     116    Value* genLongAdvanceWithCarry(Value* e1, int shift_amount, unsigned localIndex, const PabloBlock * blk);
    116117    Value* genBitBlockAny(Value* test);
    117118    Value* genShiftHighbitToLow(unsigned FieldWidth, Value * op);
Note: See TracChangeset for help on using the changeset viewer.