Ignore:
Timestamp:
Nov 25, 2015, 11:36:18 AM (4 years ago)
Author:
cameron
Message:

Parallel long addition within icgrep improves performance on AVX2

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r4870 r4881  
    1818#include <llvm/IR/Function.h>
    1919
    20 
    2120static cl::opt<CarryManagerStrategy> Strategy(cl::desc("Choose carry management strategy:"),
    2221                                              cl::values(
     
    352351}
    353352
     353#if (BLOCK_SIZE==256)
     354#define LONGADD 1
     355#endif
     356
    354357   
    355358void CarryManager::setCarryOpCarryOut(unsigned localIndex, Value * carry_out_strm) {
     
    359362    }
    360363    else {
     364#ifndef LONGADD
    361365        Value * carry_bit = mBuilder->CreateLShr(mBuilder->CreateBitCast(carry_out_strm, mBuilder->getIntNTy(mBITBLOCK_WIDTH)), mBITBLOCK_WIDTH-1);
    362366        mCarryOutPack[posn] = mBuilder->CreateBitCast(carry_bit, mBitBlockType);
     367#else
     368        mCarryOutPack[posn] = carry_out_strm;
     369#endif
    363370        if (mCarryInfo->getWhileDepth() == 0) {
    364371            storeCarryPack(posn);
     
    383390    }
    384391    else {
     392#ifndef LONGADD
    385393        Value * carryq_value = getCarryOpCarryIn(localIndex);
    386394        Value* carrygen = iBuilder->simd_and(e1, e2);
     
    390398        setCarryOpCarryOut(localIndex, carry_out_strm);
    391399        return sum;
     400#else
     401        Value * carryq_value = getCarryOpCarryIn(localIndex);
     402        Value * carryin = iBuilder->mvmd_extract(32, carryq_value, 0);
     403        Value * carrygen = iBuilder->simd_and(e1, e2);
     404        Value * carryprop = iBuilder->simd_or(e1, e2);
     405        Value * digitsum = iBuilder->simd_add(64, e1, e2);
     406        Value * digitcarry = iBuilder->simd_or(carrygen, iBuilder->simd_and(carryprop, mBuilder->CreateNot(digitsum)));
     407        Value * carryMask = iBuilder->hsimd_signmask(64, digitcarry);
     408        Value * carryMask2 = mBuilder->CreateOr(mBuilder->CreateAdd(carryMask, carryMask), carryin);
     409        Value * bubble = iBuilder->simd_eq(64, digitsum, iBuilder->allOnes());
     410        Value * bubbleMask = iBuilder->hsimd_signmask(64, bubble);
     411        Value * incrementMask = mBuilder->CreateXor(mBuilder->CreateAdd(bubbleMask, carryMask2), bubbleMask);
     412        Value * increments = iBuilder->esimd_bitspread(64,incrementMask);
     413        Value * sum = iBuilder->simd_add(64, digitsum, increments);
     414        Value * carry_out_strm = iBuilder->mvmd_insert(32, iBuilder->allZeroes(), mBuilder->CreateLShr(incrementMask, iBuilder->getBitBlockWidth()/64), 0);
     415        setCarryOpCarryOut(localIndex, carry_out_strm);
     416        return sum;
     417#endif
    392418    }
    393419}
Note: See TracChangeset for help on using the changeset viewer.