Changeset 6009


Ignore:
Timestamp:
May 1, 2018, 8:10:44 PM (4 months ago)
Author:
cameron
Message:

AVX2 mvmd_compress

Location:
icGREP/icgrep-devel/icgrep/IR_Gen
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r6007 r6009  
    256256    return IDISA_AVX_Builder::hsimd_signmask(fw, a);
    257257}
     258   
     259llvm::Value * IDISA_AVX2_Builder::mvmd_compress(unsigned fw, llvm::Value * a, llvm::Value * select_mask) {
     260    if (mBitBlockWidth == 256 && fw == 32) {
     261        Type * v1xi32Ty = VectorType::get(getInt32Ty(), 1);
     262        Type * v8xi32Ty = VectorType::get(getInt32Ty(), 8);
     263        Type * v8xi1Ty = VectorType::get(getInt1Ty(), 8);
     264        Value * shuf32Func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx2_permd);
     265        Value * PEXT_func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pext_32);
     266        Value * PDEP_func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pdep_32);
     267        Value * const popcount_func = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, getInt32Ty());
     268        // First duplicate each mask bit to select 4-bit fields
     269        Value * mask = CreateZExt(select_mask, getInt32Ty());
     270        Value * field_count = CreateCall(popcount_func, mask);
     271        Value * spread = CreateCall(PDEP_func, {mask, getInt32(0x11111111)});
     272        Value * ext_mask = CreateMul(spread, getInt32(0xF));
     273        // Now extract the 4-bit index values for the required fields.
     274        Value * indexes = CreateCall(PEXT_func, {getInt32(0x76543210), ext_mask});
     275        // Broadcast to all fields
     276        Value * bdcst = CreateShuffleVector(CreateBitCast(indexes, v1xi32Ty),
     277                                            UndefValue::get(v1xi32Ty),
     278                                            ConstantVector::getNullValue(v8xi32Ty));
     279        Constant * Shifts[8];
     280        for (unsigned int i = 0; i < 8; i++) {
     281            Shifts[i] = getInt32(i*4);
     282        }
     283        Value * compress = CreateCall(shuf32Func, {a, CreateLShr(bdcst, ConstantVector::get({Shifts, 8}))});
     284        Value * selectf = CreateBitCast(CreateSub(CreateShl(getInt32(1), field_count), getInt32(1)), v8xi1Ty);
     285        return CreateSelect(selectf, ConstantVector::getNullValue(v8xi32Ty), compress);
     286    }
     287    return IDISA_Builder::mvmd_compress(fw, a, select_mask);
     288}
    258289
    259290std::string IDISA_AVX512F_Builder::getBuilderUniqueName() {
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.h

    r6007 r6009  
    4848    std::pair<llvm::Value *, llvm::Value *> bitblock_indexed_advance(llvm::Value * a, llvm::Value * index_strm, llvm::Value * shiftin, unsigned shift) override;
    4949    llvm::Value * hsimd_signmask(unsigned fw, llvm::Value * a) override;
     50    llvm::Value * mvmd_compress(unsigned fw, llvm::Value * a, llvm::Value * select_mask) override;
    5051
    5152    ~IDISA_AVX2_Builder() {}
     
    6970    llvm::Value * mvmd_slli(unsigned fw, llvm::Value * a, unsigned shift) override;
    7071    llvm::Value * hsimd_signmask(unsigned fw, llvm::Value * a) override;
    71     virtual llvm::Value * mvmd_compress(unsigned fw, llvm::Value * a, llvm::Value * select_mask) override;
     72    llvm::Value * mvmd_compress(unsigned fw, llvm::Value * a, llvm::Value * select_mask) override;
    7273
    7374    ~IDISA_AVX512F_Builder() {
Note: See TracChangeset for help on using the changeset viewer.