Changeset 4289


Ignore:
Timestamp:
Nov 3, 2014, 5:14:00 PM (4 years ago)
Author:
linmengl
Message:

add USE_LONG_INTEGER_SHIFT macro for i128 shifting

Location:
icGREP/icgrep-devel/icgrep/pablo
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r4288 r4289  
    227227        mMarkerMap.insert(std::make_pair(name, basisBit));
    228228    }
    229    
     229
    230230    //Generate the IR instructions for the function.
    231231    compileStatements(pb.statements());
     
    298298    //A pointer to the BitBlock vector.
    299299    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
    300         // Advance q array
     300    // Advance q array
    301301    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
    302302
     
    346346                                              /*isVarArg=*/false);
    347347
    348     mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" + 
     348    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
    349349                                              std::to_string(BLOCK_SIZE));
    350350    if (!mFunctionUaddOverflow) {
     
    649649                genAdvanceInLoad(baseAdvanceQueueIdx + i);
    650650            }
    651         }       
     651        }
    652652
    653653        SmallVector<const Next*, 4> nextNodes;
     
    661661        // mCarryQueueVector with the appropriate values. Although we're not actually entering a new basic
    662662        // block yet, increment the nesting depth so that any calls to genCarryInLoad or genCarryOutStore
    663         // will refer to the previous value.       
     663        // will refer to the previous value.
    664664
    665665        ++mNestingDepth;
    666666
    667667        compileStatements(whileStatement->getBody());
    668        
     668
    669669        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
    670670        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
     
    685685        IRBuilder<> bCond(whileCondBlock);
    686686        // generate phi nodes for any carry propogating instruction
    687         int whileCarryCount = whileStatement->getInclusiveCarryCount();
    688         int whileAdvanceCount = whileStatement->getInclusiveAdvanceCount();
     687        int whileCarryCount = whileStatement->getInclusiveCarryCount();
     688        int whileAdvanceCount = whileStatement->getInclusiveAdvanceCount();
    689689        std::vector<PHINode*> phiNodes(whileCarryCount + whileAdvanceCount + nextNodes.size());
    690690        unsigned index = 0;
     
    743743
    744744        // EXIT BLOCK
    745         mBasicBlock = whileEndBlock;   
     745        mBasicBlock = whileEndBlock;
    746746        if (--mNestingDepth == 0) {
    747747            for (index = 0; index != whileCarryCount; ++index) {
    748748                genCarryOutStore(phiNodes[index], baseCarryQueueIdx + index);
    749             }
     749            }
    750750            for (index = 0; index != whileAdvanceCount; ++index) {
    751751                genAdvanceOutStore(phiNodes[whileCarryCount + index], baseAdvanceQueueIdx + index);
     
    891891}
    892892
    893 Value* PabloCompiler::genCarryInLoad(const unsigned index) {   
     893Value* PabloCompiler::genCarryInLoad(const unsigned index) {
    894894    assert (index < mCarryQueueVector.size());
    895895    if (mNestingDepth == 0) {
     
    903903    assert (carryOut);
    904904    assert (index < mCarryQueueVector.size());
    905     if (mNestingDepth == 0) {       
     905    if (mNestingDepth == 0) {
    906906        IRBuilder<> b(mBasicBlock);
    907907        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
     
    910910}
    911911
    912 Value* PabloCompiler::genAdvanceInLoad(const unsigned index) {   
     912Value* PabloCompiler::genAdvanceInLoad(const unsigned index) {
    913913    assert (index < mAdvanceQueueVector.size());
    914914    if (mNestingDepth == 0) {
     
    922922    assert (advanceOut);
    923923    assert (index < mAdvanceQueueVector.size());
    924     if (mNestingDepth == 0) {       
     924    if (mNestingDepth == 0) {
    925925        IRBuilder<> b(mBasicBlock);
    926926        b.CreateAlignedStore(advanceOut, b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
     
    957957#if (BLOCK_SIZE == 128)
    958958    const auto advanceIdx = mAdvanceQueueIdx++;
     959#ifdef USE_LONG_INTEGER_SHIFT
     960    Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(advanceIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
     961    Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
     962    Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), advanceq_longint, "advance");
     963    Value* result_value = b.CreateBitCast(adv_longint, mBitBlockType);
     964    Value* advance_out = b.CreateBitCast(b.CreateLShr(strm_longint, BLOCK_SIZE - shift_amount, "advance_out"), mBitBlockType);
     965    genAdvanceOutStore(advance_out, advanceIdx);
     966
     967    return result_value;
     968#else
    959969    if (shift_amount == 1) {
    960         Value* advanceq_value = genAdvanceInLoad(advanceIdx);
    961         Value* srli_1_value = b.CreateLShr(strm_value, 63);
    962         Value* packed_shuffle;
    963         Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
    964         Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
    965         packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
    966 
    967         Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
    968         Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
    969 
    970         Value* shl_value = b.CreateShl(strm_value, const_packed_2);
    971         Value* result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
    972 
    973         Value* advance_out = genShiftHighbitToLow(strm_value, "advance_out");
    974         //CarryQ - carry out:
    975         genAdvanceOutStore(advance_out, advanceIdx);
    976            
    977         return result_value;
     970        Value* advanceq_value = genAdvanceInLoad(advanceIdx);
     971        Value* srli_1_value = b.CreateLShr(strm_value, 63);
     972        Value* packed_shuffle;
     973        Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
     974        Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
     975        packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
     976
     977        Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
     978        Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
     979
     980        Value* shl_value = b.CreateShl(strm_value, const_packed_2);
     981        Value* result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
     982
     983        Value* advance_out = genShiftHighbitToLow(strm_value, "advance_out");
     984        //CarryQ - carry out:
     985        genAdvanceOutStore(advance_out, advanceIdx);
     986
     987        return result_value;
    978988    }
    979989    else if (shift_amount < 64) {
    980         // This is the preferred logic, but is too slow for the general case.   
     990        // This is the preferred logic, but is too slow for the general case.
    981991        // We need to speed up our custom LLVM for this code.
    982         Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(advanceIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
    983         Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
    984         Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), advanceq_longint, "advance");
     992        Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(advanceIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
     993        Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
     994        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), advanceq_longint, "advance");
    985995        Value* result_value = b.CreateBitCast(adv_longint, mBitBlockType);
    986996        Value* advance_out = b.CreateBitCast(b.CreateLShr(strm_longint, BLOCK_SIZE - shift_amount, "advance_out"), mBitBlockType);
    987         genAdvanceOutStore(advance_out, advanceIdx);
    988            
    989         return result_value;
     997        genAdvanceOutStore(advance_out, advanceIdx);
     998
     999        return result_value;
    9901000    }
    9911001    else {//if (shift_amount >= 64) {
    992         throw std::runtime_error("Shift amount >= 64 in Advance is currently unsupported.");
    993     }
    994 #endif
     1002        throw std::runtime_error("Shift amount >= 64 in Advance is currently unsupported.");
     1003    }
     1004#endif //else USE_LONG_INTEGER_SHIFT
     1005#endif //BLOCK_SIZE == 128
    9951006
    9961007#if (BLOCK_SIZE == 256)
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.h

    r4288 r4289  
    1010//indicates that we use llvm.uadd.with.overflow.carryin for genAddWithCarry
    1111//#define USE_UADD_OVERFLOW
     12//#define USE_LONG_INTEGER_SHIFT
    1213
    1314//Pablo Expressions
Note: See TracChangeset for help on using the changeset viewer.