Ignore:
Timestamp:
Apr 19, 2018, 1:17:43 PM (15 months ago)
Author:
cameron
Message:

mvmd_slli override for AVX-512 in support of u8u16

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r5978 r5979  
    339339    return IDISA_Builder::esimd_bitspread(fw, bitmask);
    340340}
     341
     342Value * IDISA_AVX512F_Builder:: mvmd_slli(unsigned fw, llvm::Value * a, unsigned shift) {
     343    if (shift == 0) return a;
     344    if (fw > 32) {
     345        return mvmd_slli(32, a, shift * (fw/32));
     346    } else if (((shift % 2) == 0) && (fw < 32)) {
     347        return mvmd_slli(2 * fw, a, shift / 2);
     348    }
     349    const unsigned field_count = mBitBlockWidth/fw;
     350    if ((fw == 32) || (hostCPUFeatures.hasAVX512BW && (fw == 16)))   {
     351        // Mask with 1 bit per field indicating which fields are not zeroed out.
     352        Type * fwTy = getIntNTy(fw);
     353        Constant * fieldMask = ConstantInt::get(getIntNTy(field_count), (1 << field_count) - (1 << shift));
     354        Value * permute_func = nullptr;
     355        if (fw == 32) permute_func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_maskz_vpermt2var_d_512);
     356        else permute_func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_maskz_vpermt2var_hi_512);
     357        Constant * indices[field_count];
     358        for (unsigned i = 0; i < field_count; i++) {
     359            indices[i] = i < shift ? UndefValue::get(fwTy) : ConstantInt::get(fwTy, i - shift);
     360        }
     361        Value * args[4] = {ConstantVector::get({indices, field_count}), fwCast(fw, a), UndefValue::get(fwVectorType(fw)), fieldMask};
     362        return bitCast(CreateCall(permute_func, args));
     363    } else {
     364        unsigned field32_shift = (shift * fw) / 32;
     365        unsigned bit_shift = (shift * fw) % 32;
     366        return simd_or(simd_slli(32, mvmd_slli(32, a, field32_shift), bit_shift),
     367                       simd_srli(32, mvmd_slli(32, a, field32_shift + 1), 32-bit_shift));
     368    }
     369}
    341370llvm::Value * IDISA_AVX512F_Builder::simd_popcount(unsigned fw, llvm::Value * a) {
    342371     if (fw == 512) {
Note: See TracChangeset for help on using the changeset viewer.