Changeset 5979


Ignore:
Timestamp:
Apr 19, 2018, 1:17:43 PM (12 months ago)
Author:
cameron
Message:

mvmd_slli override for AVX-512 in support of u8u16

Location:
icGREP/icgrep-devel/icgrep/IR_Gen
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r5978 r5979  
    339339    return IDISA_Builder::esimd_bitspread(fw, bitmask);
    340340}
     341
     342Value * IDISA_AVX512F_Builder:: mvmd_slli(unsigned fw, llvm::Value * a, unsigned shift) {
     343    if (shift == 0) return a;
     344    if (fw > 32) {
     345        return mvmd_slli(32, a, shift * (fw/32));
     346    } else if (((shift % 2) == 0) && (fw < 32)) {
     347        return mvmd_slli(2 * fw, a, shift / 2);
     348    }
     349    const unsigned field_count = mBitBlockWidth/fw;
     350    if ((fw == 32) || (hostCPUFeatures.hasAVX512BW && (fw == 16)))   {
     351        // Mask with 1 bit per field indicating which fields are not zeroed out.
     352        Type * fwTy = getIntNTy(fw);
     353        Constant * fieldMask = ConstantInt::get(getIntNTy(field_count), (1 << field_count) - (1 << shift));
     354        Value * permute_func = nullptr;
     355        if (fw == 32) permute_func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_maskz_vpermt2var_d_512);
     356        else permute_func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_maskz_vpermt2var_hi_512);
     357        Constant * indices[field_count];
     358        for (unsigned i = 0; i < field_count; i++) {
     359            indices[i] = i < shift ? UndefValue::get(fwTy) : ConstantInt::get(fwTy, i - shift);
     360        }
     361        Value * args[4] = {ConstantVector::get({indices, field_count}), fwCast(fw, a), UndefValue::get(fwVectorType(fw)), fieldMask};
     362        return bitCast(CreateCall(permute_func, args));
     363    } else {
     364        unsigned field32_shift = (shift * fw) / 32;
     365        unsigned bit_shift = (shift * fw) % 32;
     366        return simd_or(simd_slli(32, mvmd_slli(32, a, field32_shift), bit_shift),
     367                       simd_srli(32, mvmd_slli(32, a, field32_shift + 1), 32-bit_shift));
     368    }
     369}
    341370llvm::Value * IDISA_AVX512F_Builder::simd_popcount(unsigned fw, llvm::Value * a) {
    342371     if (fw == 512) {
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.h

    r5978 r5979  
    6767    llvm::Value * esimd_bitspread(unsigned fw, llvm::Value * bitmask) override;
    6868    llvm::Value * simd_popcount(unsigned fw, llvm::Value * a) override;
     69    llvm::Value * mvmd_slli(unsigned fw, llvm::Value * a, unsigned shift) override;
    6970    llvm::Value * hsimd_signmask(unsigned fw, llvm::Value * a) override;
    7071
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r5977 r5979  
    482482Value * IDISA_Builder::mvmd_slli(unsigned fw, Value * a, unsigned shift) {
    483483    if (fw < 8) report_fatal_error("Unsupported field width: mvmd_slli " + std::to_string(fw));
    484     const auto field_count = mBitBlockWidth / fw;
    485     return mvmd_dslli(fw, a, Constant::getNullValue(fwVectorType(fw)), field_count - shift);
     484    Value * shifted = mvmd_dslli(fw, a, Constant::getNullValue(fwVectorType(fw)), shift);
     485    return shifted;
    486486}
    487487
    488488Value * IDISA_Builder::mvmd_srli(unsigned fw, Value * a, unsigned shift) {
    489489    if (fw < 8) report_fatal_error("Unsupported field width: mvmd_srli " + std::to_string(fw));
    490     return mvmd_dslli(fw, Constant::getNullValue(fwVectorType(fw)), a, shift);
     490    const auto field_count = mBitBlockWidth / fw;
     491    return mvmd_dslli(fw, Constant::getNullValue(fwVectorType(fw)), a, field_count - shift);
    491492}
    492493
     
    496497    Constant * Idxs[field_count];
    497498    for (unsigned i = 0; i < field_count; i++) {
    498         Idxs[i] = getInt32(i + shift);
     499        Idxs[i] = getInt32(i + field_count - shift);
    499500    }
    500501    return CreateShuffleVector(fwCast(fw, b), fwCast(fw, a), ConstantVector::get({Idxs, field_count}));
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.h

    r5865 r5979  
    3838    llvm::Value * hsimd_packh(unsigned fw, llvm::Value * a, llvm::Value * b) override;
    3939    llvm::Value * hsimd_packl(unsigned fw, llvm::Value * a, llvm::Value * b) override;
    40     std::pair<llvm::Value *, llvm::Value *> bitblock_advance(llvm::Value * a, llvm::Value * shiftin, unsigned shift) final;
     40    std::pair<llvm::Value *, llvm::Value *> bitblock_advance(llvm::Value * a, llvm::Value * shiftin, unsigned shift) override;
    4141    ~IDISA_SSE2_Builder() {}
    4242};
Note: See TracChangeset for help on using the changeset viewer.