Changeset 4881 for icGREP


Ignore:
Timestamp:
Nov 25, 2015, 11:36:18 AM (4 years ago)
Author:
cameron
Message:

Parallel long addition within icgrep improves performance on AVX2

Location:
icGREP/icgrep-devel/icgrep
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_builder.cpp

    r4879 r4881  
    186186}
    187187
     188#if (BLOCK_SIZE==256)
     189#define SIGNMASK_AVX2
     190#endif
     191
    188192Value * IDISA_Builder::hsimd_signmask(unsigned fw, Value * a) {
     193#ifdef SIGNMASK_AVX2
     194    if (fw == 64) {
     195        Value * signmask_f64func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_pd_256);
     196        Type * bitBlock_f64type = VectorType::get(mLLVMBuilder->getDoubleTy(), mBitBlockWidth/64);
     197        Value * a_as_pd = mLLVMBuilder->CreateBitCast(a, bitBlock_f64type);
     198        Value * mask = mLLVMBuilder->CreateCall(signmask_f64func, std::vector<Value *>({a_as_pd}));
     199        return mask;
     200    }
     201    else if (fw == 32) {
     202        Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
     203        Type * bitBlock_f32type = VectorType::get(mLLVMBuilder->getFloatTy(), mBitBlockWidth/32);
     204        Value * a_as_ps = mLLVMBuilder->CreateBitCast(a, bitBlock_f32type);
     205        Value * mask = mLLVMBuilder->CreateCall(signmask_f32func, std::vector<Value *>({a_as_ps}));
     206        return mask;
     207    }
     208#endif
    189209    Value * mask = mLLVMBuilder->CreateICmpSLT(fwCast(fw, a), ConstantAggregateZero::get(fwVectorType(fw)));
    190210    return mLLVMBuilder->CreateBitCast(mask, mLLVMBuilder->getIntNTy(mBitBlockWidth/fw));
     
    194214    Value * aVec = fwCast(fw, a);
    195215    return mLLVMBuilder->CreateExtractElement(aVec, mLLVMBuilder->getInt32(fieldIndex));
     216}
     217
     218Value * IDISA_Builder::mvmd_insert(unsigned fw, Value * blk, Value * elt, unsigned fieldIndex) {
     219    Value * vec = fwCast(fw, blk);
     220    return mLLVMBuilder->CreateInsertElement(vec, elt, mLLVMBuilder->getInt32(fieldIndex));
    196221}
    197222
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_builder.h

    r4879 r4881  
    3939    Value * bitCast(Value * a) {return a->getType() == mBitBlockType ? a : mLLVMBuilder->CreateBitCast(a, mBitBlockType);}
    4040    int getBitBlockWidth() { return mBitBlockWidth;}
     41    Module * getModule() {return mMod;}
    4142    void genPrintRegister(std::string regName, Value * bitblockValue);
    4243   
     
    7576   
    7677    Value * mvmd_extract(unsigned fw, Value * a, unsigned fieldIndex);
     78    Value * mvmd_insert(unsigned fw, Value * blk, Value * elt, unsigned fieldIndex);
    7779    Value * mvmd_dslli(unsigned fw, Value * a, Value * b, unsigned shift);
    7880   
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r4870 r4881  
    1818#include <llvm/IR/Function.h>
    1919
    20 
    2120static cl::opt<CarryManagerStrategy> Strategy(cl::desc("Choose carry management strategy:"),
    2221                                              cl::values(
     
    352351}
    353352
     353#if (BLOCK_SIZE==256)
     354#define LONGADD 1
     355#endif
     356
    354357   
    355358void CarryManager::setCarryOpCarryOut(unsigned localIndex, Value * carry_out_strm) {
     
    359362    }
    360363    else {
     364#ifndef LONGADD
    361365        Value * carry_bit = mBuilder->CreateLShr(mBuilder->CreateBitCast(carry_out_strm, mBuilder->getIntNTy(mBITBLOCK_WIDTH)), mBITBLOCK_WIDTH-1);
    362366        mCarryOutPack[posn] = mBuilder->CreateBitCast(carry_bit, mBitBlockType);
     367#else
     368        mCarryOutPack[posn] = carry_out_strm;
     369#endif
    363370        if (mCarryInfo->getWhileDepth() == 0) {
    364371            storeCarryPack(posn);
     
    383390    }
    384391    else {
     392#ifndef LONGADD
    385393        Value * carryq_value = getCarryOpCarryIn(localIndex);
    386394        Value* carrygen = iBuilder->simd_and(e1, e2);
     
    390398        setCarryOpCarryOut(localIndex, carry_out_strm);
    391399        return sum;
     400#else
     401        Value * carryq_value = getCarryOpCarryIn(localIndex);
     402        Value * carryin = iBuilder->mvmd_extract(32, carryq_value, 0);
     403        Value * carrygen = iBuilder->simd_and(e1, e2);
     404        Value * carryprop = iBuilder->simd_or(e1, e2);
     405        Value * digitsum = iBuilder->simd_add(64, e1, e2);
     406        Value * digitcarry = iBuilder->simd_or(carrygen, iBuilder->simd_and(carryprop, mBuilder->CreateNot(digitsum)));
     407        Value * carryMask = iBuilder->hsimd_signmask(64, digitcarry);
     408        Value * carryMask2 = mBuilder->CreateOr(mBuilder->CreateAdd(carryMask, carryMask), carryin);
     409        Value * bubble = iBuilder->simd_eq(64, digitsum, iBuilder->allOnes());
     410        Value * bubbleMask = iBuilder->hsimd_signmask(64, bubble);
     411        Value * incrementMask = mBuilder->CreateXor(mBuilder->CreateAdd(bubbleMask, carryMask2), bubbleMask);
     412        Value * increments = iBuilder->esimd_bitspread(64,incrementMask);
     413        Value * sum = iBuilder->simd_add(64, digitsum, increments);
     414        Value * carry_out_strm = iBuilder->mvmd_insert(32, iBuilder->allZeroes(), mBuilder->CreateLShr(incrementMask, iBuilder->getBitBlockWidth()/64), 0);
     415        setCarryOpCarryOut(localIndex, carry_out_strm);
     416        return sum;
     417#endif
    392418    }
    393419}
Note: See TracChangeset for help on using the changeset viewer.