Ignore:
Timestamp:
Mar 23, 2018, 10:34:52 AM (13 months ago)
Author:
cameron
Message:

AVX-512BW builder with packh/packl from Cole, Avery and Oscar

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r5884 r5931  
    1111
    1212namespace IDISA {
    13    
     13
    1414std::string IDISA_AVX_Builder::getBuilderUniqueName() {
    1515    return mBitBlockWidth != 256 ? "AVX_" + std::to_string(mBitBlockWidth) : "AVX";
     
    5252    return mBitBlockWidth != 256 ? "AVX2_" + std::to_string(mBitBlockWidth) : "AVX2";
    5353}
    54    
     54
    5555Value * IDISA_AVX2_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {
    5656    if ((fw > 8) && (fw <= 64)) {
     
    9696    return IDISA_SSE_Builder::hsimd_packl(fw, a, b);
    9797}
    98    
     98
    9999Value * IDISA_AVX2_Builder::esimd_mergeh(unsigned fw, Value * a, Value * b) {
    100100#if LLVM_VERSION_INTEGER < LLVM_VERSION_CODE(6, 0, 0)
     
    140140    return IDISA_SSE_Builder::hsimd_packh_in_lanes(lanes, fw, a, b);
    141141}
    142    
     142
    143143std::pair<Value *, Value *> IDISA_AVX2_Builder::bitblock_add_with_carry(Value * e1, Value * e2, Value * carryin) {
    144144    // using LONG_ADD
     
    164164    return std::pair<Value *, Value *>{carry_out, bitCast(sum)};
    165165}
    166    
     166
    167167std::pair<Value *, Value *> IDISA_AVX2_Builder::bitblock_indexed_advance(Value * strm, Value * index_strm, Value * shiftIn, unsigned shiftAmount) {
    168168    Value * const popcount = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, getSizeTy());
     
    241241    }
    242242}
    243    
     243
    244244Value * IDISA_AVX2_Builder::hsimd_signmask(unsigned fw, Value * a) {
    245245    // AVX2 special cases
     
    260260}
    261261
    262 
    263 }
     262llvm::Value * IDISA_AVX512BW_Builder::hsimd_packh(unsigned fw, llvm::Value * a, llvm::Value * b) {
     263    if ((mBitBlockWidth == 512) && (fw == 16)) {
     264
     265        const unsigned int field_count = 64;
     266        Constant * Idxs[field_count];
     267
     268        for (unsigned int i = 0; i < field_count; i++) {
     269            Idxs[i] = getInt32(i);
     270        }
     271
     272        llvm::Value * pmovfunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_mask_pmov_wb_512);
     273        llvm::Value * mask = getInt32(-1);
     274        llvm::Constant * shuffleMask = ConstantVector::get({Idxs, 64});
     275
     276        a = fwCast(fw, a);
     277        a = IDISA_Builder::simd_srai(fw, a, fw/2);
     278        a = CreateCall(pmovfunc, {a, a, mask});
     279        b = fwCast(fw, b);
     280        b = IDISA_Builder::simd_srai(fw, b, fw/2);
     281        b = CreateCall(pmovfunc, {b, b, mask});
     282
     283        llvm::Value * c = CreateShuffleVector(a, b, shuffleMask);
     284        c = bitCast(c);
     285        return c;
     286    }
     287return IDISA_Builder::hsimd_packh(fw, a, b);
     288}
     289
     290llvm::Value * IDISA_AVX512BW_Builder::hsimd_packl(unsigned fw, llvm::Value * a, llvm::Value * b) {
     291    if ((mBitBlockWidth == 512) && (fw == 16)) {
     292
     293        const unsigned int field_count = 64;
     294        Constant * Idxs[field_count];
     295        for (unsigned int i = 0; i < field_count; i++) {
     296            Idxs[i] = getInt32(i);
     297        }
     298
     299        llvm::Value * pmovfunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_mask_pmov_wb_512);
     300        llvm::Value * mask = getInt32(-1);
     301        llvm::Constant * shuffleMask = ConstantVector::get({Idxs, 64});
     302
     303        a = fwCast(fw, a);
     304        a = CreateCall(pmovfunc, {a, a, mask});
     305        b = fwCast(fw, b);
     306        b = CreateCall(pmovfunc, {b, b, mask});
     307
     308        llvm::Value * c = CreateShuffleVector(a, b, shuffleMask);
     309        c = bitCast(c);
     310        return c;
     311    }
     312return IDISA_Builder::hsimd_packl(fw, a, b);
     313}
     314
     315
     316}
Note: See TracChangeset for help on using the changeset viewer.