Changeset 6046


Ignore:
Timestamp:
May 16, 2018, 10:02:54 PM (5 days ago)
Author:
cameron
Message:

Some fixes

Location:
icGREP/icgrep-devel/icgrep
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r6045 r6046  
    355355        }
    356356        Constant * splat01 = ConstantVector::get({Idxs, field_count});
    357         Value * half_shuffle_table = simd_add(fw, simd_add(fw, shuffle_table, shuffle_table), splat01);
    358         return mvmd_shuffle(half_fw, a, half_shuffle_table);
     357        Value * half_shuffle_table = simd_or(shuffle_table, mvmd_slli(half_fw, shuffle_table, 1));
     358        half_shuffle_table = simd_add(fw, simd_add(fw, half_shuffle_table, half_shuffle_table), splat01);
     359        Value * rslt = mvmd_shuffle(half_fw, a, half_shuffle_table);
     360        return rslt;
    359361    }
    360362    if (mBitBlockWidth == 256 && fw == 32) {
     
    570572        return CreateCall(permuteFunc, {fwCast(fw, shuffle_table), fwCast(fw, a), fwCast(fw, b), mask});
    571573    }
    572     return IDISA_Builder::mvmd_shuffle(fw, a, shuffle_table);
     574    return IDISA_Builder::mvmd_shuffle2(fw, a, b, shuffle_table);
    573575}
    574576
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r6045 r6046  
    191191        Idxs[i] = ConstantInt::get(fwTy, i + field_count);
    192192    }
    193     Value * shuffle = simd_add(fw, simd_fill(fw, shift), ConstantVector::get({Idxs, field_count}));
    194     return mvmd_shuffle2(fw, fwCast(fw, b), fwCast(fw, a), shuffle);
     193    Value * shuffle = simd_sub(fw, ConstantVector::get({Idxs, field_count}), simd_fill(fw, shift));
     194    Value * rslt = mvmd_shuffle2(fw, fwCast(fw, a), fwCast(fw, b), shuffle);
     195    return rslt;
    195196}
    196197
     
    534535    //  Use two shuffles, with selection by the bit value within the shuffle_table.
    535536    const auto field_count = mBitBlockWidth/fw;
    536     Constant * selectorSplat = ConstantVector::getSplat(field_count, ConstantInt::get(getIntNTy(fw), 1<<field_count));
     537    Constant * selectorSplat = ConstantVector::getSplat(field_count, ConstantInt::get(getIntNTy(fw), field_count));
    537538    Value * selectMask = simd_eq(fw, simd_and(shuffle_table, selectorSplat), selectorSplat);
    538     Value * negSelect = simd_not(selectMask);
    539     Value * tbl = simd_and(shuffle_table, negSelect);
    540     return simd_or(simd_and(mvmd_shuffle(fw, a, tbl), negSelect), simd_and(mvmd_shuffle(fw, b, tbl), selectMask));
     539    Value * tbl = simd_and(shuffle_table, simd_not(selectorSplat));
     540    Value * rslt= simd_or(simd_and(mvmd_shuffle(fw, a, tbl), simd_not(selectMask)), simd_and(mvmd_shuffle(fw, b, tbl), selectMask));
     541    return rslt;
    541542}
    542543   
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.cpp

    r6045 r6046  
    210210    // Calculate the field values and offsets we need for assembling a
    211211    // a full block of source bits.  Assembly will use the following operations.
    212     // A = b->simd_srli(fw, b->mvmd_dslli(fw, source, pending, field_offset_lo), bit_offset);
    213     // B = b->simd_slli(fw, b->mvmd_dslli(fw, source, pending, field_offset_hi), shift_fwd);
     212    // A = b->simd_srl(fw, b->mvmd_dsll(fw, source, pending, field_offset_lo), bit_offset);
     213    // B = b->simd_sll(fw, b->mvmd_dsll(fw, source, pending, field_offset_hi), shift_fwd);
    214214    // all_source_bits = simd_or(A, B);
    215215    Value * pendingOffset = b->CreateURem(pendingBlockEnd, bwConst);
    216     Value * field_offset_lo =  b->simd_fill(fw, b->CreateUDiv(pendingOffset, fwConst));
     216    Value * field_offset_lo =  b->CreateUDiv(pendingOffset, fwConst);
    217217    Value * bit_offset = b->simd_fill(fw, b->CreateURem(pendingOffset, fwConst));
    218218   
    219219    // Carefully avoid a shift by the full fieldwith (which gives a poison value).
    220220    // field_offset_lo + 1 unless the bit_offset is 0, in which case it is just field_offset_lo.
    221     Value * field_offset_hi =  b->simd_fill(fw, b->CreateUDiv(b->CreateAdd(pendingOffset, fw_sub1Const), fwConst));
     221    Value * field_offset_hi =  b->CreateUDiv(b->CreateAdd(pendingOffset, fw_sub1Const), fwConst);
    222222    // fw - bit_offset, unless bit_offset is 0, in which case, the shift_fwd is 0.
    223223    Value * shift_fwd = b->CreateURem(b->CreateSub(fwSplat, bit_offset), fwSplat);
     
    273273        b->setScalarField("pendingSourceBlock_" + std::to_string(i), b->bitCast(pendingDataPhi[i]));
    274274    }
    275     b->getModule()->dump();
    276275}
    277276
     
    338337    Value * extractionMaskPtr = kb->getInputStreamBlockPtr("depositMask", ZERO, blockOffsetPhi);
    339338    extractionMaskPtr = kb->CreatePointerCast(extractionMaskPtr, fieldPtrTy);
     339    for (unsigned i = 0; i < fieldsPerBlock; i++) {
     340        mask[i] = kb->CreateLoad(kb->CreateGEP(extractionMaskPtr, kb->getInt32(i)));
     341    }
    340342    for (unsigned j = 0; j < mStreamCount; ++j) {
    341343        Value * inputPtr = kb->getInputStreamBlockPtr("inputStreamSet", kb->getInt32(j), blockOffsetPhi);
Note: See TracChangeset for help on using the changeset viewer.