Ignore:
Timestamp:
Jan 15, 2018, 3:42:27 PM (18 months ago)
Author:
nmedfort
Message:

Bug fix for UntilN

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r5828 r5832  
    170170   
    171171std::pair<Value *, Value *> IDISA_AVX2_Builder::bitblock_indexed_advance(Value * strm, Value * index_strm, Value * shiftIn, unsigned shiftAmount) {
    172     Value * popcount_f = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, getSizeTy());
     172    Value * const popcount = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, getSizeTy());
    173173    Value * PEXT_f = nullptr;
    174174    Value * PDEP_f = nullptr;
    175     unsigned bitWidth = sizeof(size_t) * 8;
     175    const unsigned bitWidth = getSizeTy()->getBitWidth();
    176176    if (bitWidth == 64) {
    177177        PEXT_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pext_64);
    178178        PDEP_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pdep_64);
    179     }
    180     else if ((bitWidth == 32)  && (shiftAmount < 32)) {
     179    } else if ((bitWidth == 32)  && (shiftAmount < 32)) {
    181180        PEXT_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pext_32);
    182181        PDEP_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pdep_32);
    183     }
    184     else {
     182    } else {
    185183        llvm::report_fatal_error("indexed_advance unsupported bit width");
    186184    }
    187185    Type * iBitBlock = getIntNTy(getBitBlockWidth());
    188186    Value * shiftVal = getSize(shiftAmount);
     187    const auto n = getBitBlockWidth() / bitWidth;
     188    VectorType * const vecTy = VectorType::get(getSizeTy(), n);
    189189    if (LLVM_LIKELY(shiftAmount < bitWidth)) {
    190190        Value * carry = mvmd_extract(bitWidth, shiftIn, 0);
    191         Value * result = allZeroes();
    192         for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     191        Value * result = UndefValue::get(vecTy);
     192        for (unsigned i = 0; i < n; i++) {
    193193            Value * s = mvmd_extract(bitWidth, strm, i);
    194194            Value * ix = mvmd_extract(bitWidth, index_strm, i);
    195             Value * ix_popcnt = CreateCall(popcount_f, {ix});
     195            Value * ix_popcnt = CreateCall(popcount, {ix});
    196196            Value * bits = CreateCall(PEXT_f, {s, ix});
    197197            Value * adv = CreateOr(CreateShl(bits, shiftAmount), carry);
    198198            // We have two cases depending on whether the popcount of the index pack is < shiftAmount or not.
    199199            Value * popcount_small = CreateICmpULT(ix_popcnt, shiftVal);
    200             Value * carry_if_popcount_small = 
     200            Value * carry_if_popcount_small =
    201201                CreateOr(CreateShl(bits, CreateSub(shiftVal, ix_popcnt)),
    202202                            CreateLShr(carry, ix_popcnt));
     
    212212        // elements that we deal with.   This simplifies some of the logic.
    213213        Value * carry = CreateBitCast(shiftIn, iBitBlock);
    214         Value * result = allZeroes();
    215         for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     214        Value * result = UndefValue::get(vecTy);
     215        for (unsigned i = 0; i < n; i++) {
    216216            Value * s = mvmd_extract(bitWidth, strm, i);
    217217            Value * ix = mvmd_extract(bitWidth, index_strm, i);
    218             Value * ix_popcnt = CreateCall(popcount_f, {ix});
     218            Value * ix_popcnt = CreateCall(popcount, {ix});
    219219            Value * bits = CreateCall(PEXT_f, {s, ix});  // All these bits are shifted out (appended to carry).
    220220            result = mvmd_insert(bitWidth, result, CreateCall(PDEP_f, {mvmd_extract(bitWidth, carry, 0), ix}), i);
     
    229229        // elements that we deal with.   This simplifies some of the logic.
    230230        Value * carry = CreateBitCast(shiftIn, iBitBlock);
    231         Value * result = allZeroes();
    232         Value * carryOut = CreateBitCast(allZeroes(), iBitBlock);
     231        Value * result = UndefValue::get(vecTy);
     232        Value * carryOut = ConstantInt::getNullValue(iBitBlock);
    233233        Value * generated = getSize(0);
    234         for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     234        for (unsigned i = 0; i < n; i++) {
    235235            Value * s = mvmd_extract(bitWidth, strm, i);
    236236            Value * ix = mvmd_extract(bitWidth, index_strm, i);
    237             Value * ix_popcnt = CreateCall(popcount_f, {ix});
     237            Value * ix_popcnt = CreateCall(popcount, {ix});
    238238            Value * bits = CreateCall(PEXT_f, {s, ix});  // All these bits are shifted out (appended to carry).
    239239            result = mvmd_insert(bitWidth, result, CreateCall(PDEP_f, {mvmd_extract(bitWidth, carry, 0), ix}), i);
Note: See TracChangeset for help on using the changeset viewer.