Ignore:
Timestamp:
Oct 27, 2017, 1:21:46 PM (19 months ago)
Author:
cameron
Message:

Moving indexed advance operation into IDISA builder

Location:
icGREP/icgrep-devel/icgrep/IR_Gen
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r5464 r5713  
    160160}
    161161   
    162 }
     162std::pair<Value *, Value *> IDISA_AVX2_Builder::bitblock_indexed_advance(Value * strm, Value * index_strm, Value * shiftIn, unsigned shiftAmount) {
     163    Value * popcount_f = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, getSizeTy());
     164    Value * PEXT_f = nullptr;
     165    Value * PDEP_f = nullptr;
     166    unsigned bitWidth = sizeof(size_t) * 8;
     167    if (bitWidth == 64) {
     168        PEXT_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pext_64);
     169        PDEP_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pdep_64);
     170    }
     171    else if ((bitWidth == 32)  && (shiftAmount < 32)) {
     172        PEXT_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pext_32);
     173        PDEP_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pdep_32);
     174    }
     175    else {
     176        llvm::report_fatal_error("indexed_advance unsupported bit width");
     177    }
     178    Type * iBitBlock = getIntNTy(getBitBlockWidth());
     179    Value * shiftVal = getSize(shiftAmount);
     180    if (LLVM_LIKELY(shiftAmount < bitWidth)) {
     181        Value * carry = mvmd_extract(bitWidth, shiftIn, 0);
     182        Value * result = allZeroes();
     183        for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     184            Value * s = mvmd_extract(bitWidth, strm, i);
     185            Value * ix = mvmd_extract(bitWidth, index_strm, i);
     186            Value * ix_popcnt = CreateCall(popcount_f, {ix});
     187            Value * bits = CreateCall(PEXT_f, {s, ix});
     188            Value * adv = CreateOr(CreateShl(bits, shiftAmount), carry);
     189            // We have two cases depending on whether the popcount of the index pack is < shiftAmount or not.
     190            Value * popcount_small = CreateICmpULT(ix_popcnt, shiftVal);
     191            Value * carry_if_popcount_small =
     192                CreateOr(CreateShl(bits, CreateSub(shiftVal, ix_popcnt)),
     193                            CreateLShr(carry, ix_popcnt));
     194            Value * carry_if_popcount_large = CreateLShr(bits, CreateSub(ix_popcnt, shiftVal));
     195            carry = CreateSelect(popcount_small, carry_if_popcount_small, carry_if_popcount_large);
     196            result = mvmd_insert(bitWidth, result, CreateCall(PDEP_f, {adv, ix}), i);
     197        }
     198        Value * carryOut = mvmd_insert(bitWidth, allZeroes(), carry, 0);
     199        return std::pair<Value *, Value *>{bitCast(carryOut), bitCast(result)};
     200    }
     201    else {
     202        // The shift amount is always greater than the popcount of the individual
     203        // elements that we deal with.   This simplifies some of the logic.
     204       Value * carry = CreateBitCast(shiftIn, iBitBlock);
     205       Value * result = allZeroes();
     206        for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     207            Value * s = mvmd_extract(bitWidth, strm, i);
     208            Value * ix = mvmd_extract(bitWidth, index_strm, i);
     209            Value * ix_popcnt = CreateCall(popcount_f, {ix});
     210            Value * bits = CreateCall(PEXT_f, {s, ix});  // All these bits are shifted out (appended to carry).
     211            result = mvmd_insert(bitWidth, result, CreateCall(PDEP_f, {mvmd_extract(bitWidth, carry, 0), ix}), i);
     212            carry = CreateLShr(carry, CreateZExt(ix_popcnt, iBitBlock)); // Remove the carry bits consumed, make room for new bits.
     213            carry = CreateOr(carry, CreateShl(CreateZExt(bits, iBitBlock), CreateZExt(CreateSub(shiftVal, ix_popcnt), iBitBlock)));
     214        }
     215        return std::pair<Value *, Value *>{bitCast(carry), bitCast(result)};
     216    }
     217}
     218
     219}
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.h

    r5489 r5713  
    4646    llvm::Value * hsimd_packl_in_lanes(unsigned lanes, unsigned fw, llvm::Value * a, llvm::Value * b) override;
    4747    std::pair<llvm::Value *, llvm::Value *> bitblock_add_with_carry(llvm::Value * a, llvm::Value * b, llvm::Value * carryin) override;
     48    std::pair<llvm::Value *, llvm::Value *> bitblock_indexed_advance(llvm::Value * a, llvm::Value * index_strm, llvm::Value * shiftin, unsigned shift) override;
    4849
    4950    ~IDISA_AVX2_Builder() {}
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r5493 r5713  
    346346}
    347347
     348// full shift producing {shiftout, shifted}
     349std::pair<Value *, Value *> IDISA_Builder::bitblock_indexed_advance(Value * a, Value * index, Value * shiftin, unsigned shift) {
     350    llvm::report_fatal_error("bitblock_indexed_advance unimplemented for this architecture");
     351}
     352
    348353Value * IDISA_Builder::bitblock_mask_from(Value * pos) {
    349354    Type * bitBlockInt = getIntNTy(getBitBlockWidth());
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r5493 r5713  
    126126    // full shift producing {shiftout, shifted}
    127127    virtual std::pair<llvm::Value *, llvm::Value *> bitblock_advance(llvm::Value * a, llvm::Value * shiftin, unsigned shift);
     128    virtual std::pair<llvm::Value *, llvm::Value *> bitblock_indexed_advance(llvm::Value * a, llvm::Value * index_strm, llvm::Value * shiftin, unsigned shift);
    128129    virtual llvm::Value * bitblock_mask_from(llvm::Value * pos);
    129130    virtual llvm::Value * bitblock_set_bit(llvm::Value * pos);
     131   
    130132
    131133    virtual void CreateBaseFunctions() {}
Note: See TracChangeset for help on using the changeset viewer.