Ignore:
Timestamp:
Jan 15, 2018, 3:42:27 PM (18 months ago)
Author:
nmedfort
Message:

Bug fix for UntilN

Location:
icGREP/icgrep-devel/icgrep/IR_Gen
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r5828 r5832  
    170170   
    171171std::pair<Value *, Value *> IDISA_AVX2_Builder::bitblock_indexed_advance(Value * strm, Value * index_strm, Value * shiftIn, unsigned shiftAmount) {
    172     Value * popcount_f = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, getSizeTy());
     172    Value * const popcount = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, getSizeTy());
    173173    Value * PEXT_f = nullptr;
    174174    Value * PDEP_f = nullptr;
    175     unsigned bitWidth = sizeof(size_t) * 8;
     175    const unsigned bitWidth = getSizeTy()->getBitWidth();
    176176    if (bitWidth == 64) {
    177177        PEXT_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pext_64);
    178178        PDEP_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pdep_64);
    179     }
    180     else if ((bitWidth == 32)  && (shiftAmount < 32)) {
     179    } else if ((bitWidth == 32)  && (shiftAmount < 32)) {
    181180        PEXT_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pext_32);
    182181        PDEP_f = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pdep_32);
    183     }
    184     else {
     182    } else {
    185183        llvm::report_fatal_error("indexed_advance unsupported bit width");
    186184    }
    187185    Type * iBitBlock = getIntNTy(getBitBlockWidth());
    188186    Value * shiftVal = getSize(shiftAmount);
     187    const auto n = getBitBlockWidth() / bitWidth;
     188    VectorType * const vecTy = VectorType::get(getSizeTy(), n);
    189189    if (LLVM_LIKELY(shiftAmount < bitWidth)) {
    190190        Value * carry = mvmd_extract(bitWidth, shiftIn, 0);
    191         Value * result = allZeroes();
    192         for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     191        Value * result = UndefValue::get(vecTy);
     192        for (unsigned i = 0; i < n; i++) {
    193193            Value * s = mvmd_extract(bitWidth, strm, i);
    194194            Value * ix = mvmd_extract(bitWidth, index_strm, i);
    195             Value * ix_popcnt = CreateCall(popcount_f, {ix});
     195            Value * ix_popcnt = CreateCall(popcount, {ix});
    196196            Value * bits = CreateCall(PEXT_f, {s, ix});
    197197            Value * adv = CreateOr(CreateShl(bits, shiftAmount), carry);
    198198            // We have two cases depending on whether the popcount of the index pack is < shiftAmount or not.
    199199            Value * popcount_small = CreateICmpULT(ix_popcnt, shiftVal);
    200             Value * carry_if_popcount_small = 
     200            Value * carry_if_popcount_small =
    201201                CreateOr(CreateShl(bits, CreateSub(shiftVal, ix_popcnt)),
    202202                            CreateLShr(carry, ix_popcnt));
     
    212212        // elements that we deal with.   This simplifies some of the logic.
    213213        Value * carry = CreateBitCast(shiftIn, iBitBlock);
    214         Value * result = allZeroes();
    215         for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     214        Value * result = UndefValue::get(vecTy);
     215        for (unsigned i = 0; i < n; i++) {
    216216            Value * s = mvmd_extract(bitWidth, strm, i);
    217217            Value * ix = mvmd_extract(bitWidth, index_strm, i);
    218             Value * ix_popcnt = CreateCall(popcount_f, {ix});
     218            Value * ix_popcnt = CreateCall(popcount, {ix});
    219219            Value * bits = CreateCall(PEXT_f, {s, ix});  // All these bits are shifted out (appended to carry).
    220220            result = mvmd_insert(bitWidth, result, CreateCall(PDEP_f, {mvmd_extract(bitWidth, carry, 0), ix}), i);
     
    229229        // elements that we deal with.   This simplifies some of the logic.
    230230        Value * carry = CreateBitCast(shiftIn, iBitBlock);
    231         Value * result = allZeroes();
    232         Value * carryOut = CreateBitCast(allZeroes(), iBitBlock);
     231        Value * result = UndefValue::get(vecTy);
     232        Value * carryOut = ConstantInt::getNullValue(iBitBlock);
    233233        Value * generated = getSize(0);
    234         for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     234        for (unsigned i = 0; i < n; i++) {
    235235            Value * s = mvmd_extract(bitWidth, strm, i);
    236236            Value * ix = mvmd_extract(bitWidth, index_strm, i);
    237             Value * ix_popcnt = CreateCall(popcount_f, {ix});
     237            Value * ix_popcnt = CreateCall(popcount, {ix});
    238238            Value * bits = CreateCall(PEXT_f, {s, ix});  // All these bits are shifted out (appended to carry).
    239239            result = mvmd_insert(bitWidth, result, CreateCall(PDEP_f, {mvmd_extract(bitWidth, carry, 0), ix}), i);
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r5830 r5832  
    476476// full shift producing {shiftout, shifted}
    477477std::pair<Value *, Value *> IDISA_Builder::bitblock_indexed_advance(Value * strm, Value * index_strm, Value * shiftIn, unsigned shiftAmount) {
    478     unsigned bitWidth = sizeof(size_t) * 8;
    479     Type * iBitBlock = getIntNTy(getBitBlockWidth());
    480     Value * shiftVal = getSize(shiftAmount);
     478    const unsigned bitWidth = getSizeTy()->getBitWidth();
     479    Type * const iBitBlock = getIntNTy(getBitBlockWidth());
     480    Value * const shiftVal = getSize(shiftAmount);
    481481    Value * extracted_bits = simd_pext(bitWidth, strm, index_strm);
    482482    Value * ix_popcounts = simd_popcount(bitWidth, index_strm);
    483 
    484    
     483    const auto n = getBitBlockWidth() / bitWidth;
     484    VectorType * const vecTy = VectorType::get(getSizeTy(), n);
    485485    if (LLVM_LIKELY(shiftAmount < bitWidth)) {
    486486        Value * carry = mvmd_extract(bitWidth, shiftIn, 0);
    487         Value * result = allZeroes();
    488         for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     487        Value * result = UndefValue::get(vecTy);
     488        for (unsigned i = 0; i < n; i++) {
    489489            Value * ix_popcnt = mvmd_extract(bitWidth, ix_popcounts, i);
    490490            Value * bits = mvmd_extract(bitWidth, extracted_bits, i);
     
    492492            // We have two cases depending on whether the popcount of the index pack is < shiftAmount or not.
    493493            Value * popcount_small = CreateICmpULT(ix_popcnt, shiftVal);
    494             Value * carry_if_popcount_small = 
     494            Value * carry_if_popcount_small =
    495495                CreateOr(CreateShl(bits, CreateSub(shiftVal, ix_popcnt)),
    496496                            CreateLShr(carry, ix_popcnt));
     
    506506        // elements that we deal with.   This simplifies some of the logic.
    507507        Value * carry = CreateBitCast(shiftIn, iBitBlock);
    508         Value * result = allZeroes();
    509         for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     508        Value * result = UndefValue::get(vecTy);
     509        for (unsigned i = 0; i < n; i++) {
    510510            Value * ix_popcnt = mvmd_extract(bitWidth, ix_popcounts, i);
    511511            Value * bits = mvmd_extract(bitWidth, extracted_bits, i);  // All these bits are shifted out (appended to carry).
     
    521521        // elements that we deal with.   This simplifies some of the logic.
    522522        Value * carry = CreateBitCast(shiftIn, iBitBlock);
    523         Value * result = allZeroes();
    524         Value * carryOut = CreateBitCast(allZeroes(), iBitBlock);
     523        Value * result = UndefValue::get(vecTy);
     524        Value * carryOut = ConstantInt::getNullValue(iBitBlock);
    525525        Value * generated = getSize(0);
    526         for (unsigned i = 0; i < getBitBlockWidth()/bitWidth; i++) {
     526        for (unsigned i = 0; i < n; i++) {
    527527            Value * ix_popcnt = mvmd_extract(bitWidth, ix_popcounts, i);
    528528            Value * bits = mvmd_extract(bitWidth, extracted_bits, i);  // All these bits are shifted out (appended to carry).
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r5828 r5832  
    106106    llvm::Value * simd_popcount(unsigned fw, llvm::Value * a) {
    107107        if (LLVM_UNLIKELY(fw < 8)) {
     108            assert ("field width is less than 8" && false);
    108109            llvm::report_fatal_error("Unsupported field width: popcount " + std::to_string(fw));
    109110        }
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.cpp

    r5464 r5832  
    6666}
    6767
    68 Value * IDISA_SSE_Builder::hsimd_signmask(unsigned fw, Value * a) {
     68Value * IDISA_SSE_Builder::hsimd_signmask(const unsigned fw, Value * a) {
    6969    // SSE special cases using Intrinsic::x86_sse_movmsk_ps (fw=32 only)
    7070    if (fw == 32) {
Note: See TracChangeset for help on using the changeset viewer.