Changeset 5931 for icGREP/icgrep-devel


Ignore:
Timestamp:
Mar 23, 2018, 10:34:52 AM (12 months ago)
Author:
cameron
Message:

AVX-512BW builder with packh/packl from Cole, Avery and Oscar

Location:
icGREP/icgrep-devel/icgrep/IR_Gen
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r5884 r5931  
    1111
    1212namespace IDISA {
    13    
     13
    1414std::string IDISA_AVX_Builder::getBuilderUniqueName() {
    1515    return mBitBlockWidth != 256 ? "AVX_" + std::to_string(mBitBlockWidth) : "AVX";
     
    5252    return mBitBlockWidth != 256 ? "AVX2_" + std::to_string(mBitBlockWidth) : "AVX2";
    5353}
    54    
     54
    5555Value * IDISA_AVX2_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {
    5656    if ((fw > 8) && (fw <= 64)) {
     
    9696    return IDISA_SSE_Builder::hsimd_packl(fw, a, b);
    9797}
    98    
     98
    9999Value * IDISA_AVX2_Builder::esimd_mergeh(unsigned fw, Value * a, Value * b) {
    100100#if LLVM_VERSION_INTEGER < LLVM_VERSION_CODE(6, 0, 0)
     
    140140    return IDISA_SSE_Builder::hsimd_packh_in_lanes(lanes, fw, a, b);
    141141}
    142    
     142
    143143std::pair<Value *, Value *> IDISA_AVX2_Builder::bitblock_add_with_carry(Value * e1, Value * e2, Value * carryin) {
    144144    // using LONG_ADD
     
    164164    return std::pair<Value *, Value *>{carry_out, bitCast(sum)};
    165165}
    166    
     166
    167167std::pair<Value *, Value *> IDISA_AVX2_Builder::bitblock_indexed_advance(Value * strm, Value * index_strm, Value * shiftIn, unsigned shiftAmount) {
    168168    Value * const popcount = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, getSizeTy());
     
    241241    }
    242242}
    243    
     243
    244244Value * IDISA_AVX2_Builder::hsimd_signmask(unsigned fw, Value * a) {
    245245    // AVX2 special cases
     
    260260}
    261261
    262 
    263 }
     262llvm::Value * IDISA_AVX512BW_Builder::hsimd_packh(unsigned fw, llvm::Value * a, llvm::Value * b) {
     263    if ((mBitBlockWidth == 512) && (fw == 16)) {
     264
     265        const unsigned int field_count = 64;
     266        Constant * Idxs[field_count];
     267
     268        for (unsigned int i = 0; i < field_count; i++) {
     269            Idxs[i] = getInt32(i);
     270        }
     271
     272        llvm::Value * pmovfunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_mask_pmov_wb_512);
     273        llvm::Value * mask = getInt32(-1);
     274        llvm::Constant * shuffleMask = ConstantVector::get({Idxs, 64});
     275
     276        a = fwCast(fw, a);
     277        a = IDISA_Builder::simd_srai(fw, a, fw/2);
     278        a = CreateCall(pmovfunc, {a, a, mask});
     279        b = fwCast(fw, b);
     280        b = IDISA_Builder::simd_srai(fw, b, fw/2);
     281        b = CreateCall(pmovfunc, {b, b, mask});
     282
     283        llvm::Value * c = CreateShuffleVector(a, b, shuffleMask);
     284        c = bitCast(c);
     285        return c;
     286    }
     287return IDISA_Builder::hsimd_packh(fw, a, b);
     288}
     289
     290llvm::Value * IDISA_AVX512BW_Builder::hsimd_packl(unsigned fw, llvm::Value * a, llvm::Value * b) {
     291    if ((mBitBlockWidth == 512) && (fw == 16)) {
     292
     293        const unsigned int field_count = 64;
     294        Constant * Idxs[field_count];
     295        for (unsigned int i = 0; i < field_count; i++) {
     296            Idxs[i] = getInt32(i);
     297        }
     298
     299        llvm::Value * pmovfunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_mask_pmov_wb_512);
     300        llvm::Value * mask = getInt32(-1);
     301        llvm::Constant * shuffleMask = ConstantVector::get({Idxs, 64});
     302
     303        a = fwCast(fw, a);
     304        a = CreateCall(pmovfunc, {a, a, mask});
     305        b = fwCast(fw, b);
     306        b = CreateCall(pmovfunc, {b, b, mask});
     307
     308        llvm::Value * c = CreateShuffleVector(a, b, shuffleMask);
     309        c = bitCast(c);
     310        return c;
     311    }
     312return IDISA_Builder::hsimd_packl(fw, a, b);
     313}
     314
     315
     316}
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.h

    r5884 r5931  
    1313class IDISA_AVX_Builder : public IDISA_SSE2_Builder {
    1414public:
    15    
     15
    1616    IDISA_AVX_Builder(llvm::LLVMContext & C, unsigned vectorWidth, unsigned stride)
    1717    : IDISA_Builder(C, vectorWidth, stride)
     
    3131class IDISA_AVX2_Builder : public IDISA_AVX_Builder {
    3232public:
    33    
     33
    3434    IDISA_AVX2_Builder(llvm::LLVMContext & C, unsigned vectorWidth, unsigned stride)
    3535    : IDISA_Builder(C, vectorWidth, stride)
     
    5151    ~IDISA_AVX2_Builder() {}
    5252};
    53  
    54 class IDISA_AVX512BW_Builder : public virtual IDISA_Builder {
     53
     54class IDISA_AVX512BW_Builder : public IDISA_AVX2_Builder {
    5555public:
     56
     57    IDISA_AVX512BW_Builder(llvm::LLVMContext & C, unsigned vectorWidth, unsigned stride)
     58    : IDISA_Builder(C, vectorWidth, stride)
     59    , IDISA_AVX2_Builder(C, vectorWidth, stride) {
     60    }
     61
     62    virtual std::string getBuilderUniqueName() override;
     63    llvm::Value * hsimd_packh(unsigned fw, llvm::Value * a, llvm::Value * b) override;
     64    llvm::Value * hsimd_packl(unsigned fw, llvm::Value * a, llvm::Value * b) override;
    5665   
    57     IDISA_AVX512BW_Builder(llvm::LLVMContext & C, unsigned vectorWidth, unsigned stride)
    58     : IDISA_Builder(C, vectorWidth, stride) {
    59     }
    60    
    61     virtual std::string getBuilderUniqueName() override;
    62 
     66    ~IDISA_AVX512BW_Builder() {}
    6367};
    6468
    65    
     69
    6670}
    6771#endif // IDISA_AVX_BUILDER_H
Note: See TracChangeset for help on using the changeset viewer.