Ignore:
Timestamp:
May 8, 2018, 9:26:02 AM (13 months ago)
Author:
cameron
Message:

mvmd_compress for SSE2, StreamCompress? bug fix

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r6018 r6024  
    198198    const unsigned numFields = b->getBitBlockWidth()/fw;
    199199    Constant * zeroSplat = Constant::getNullValue(b->fwVectorType(fw));
     200    Constant * oneSplat = ConstantVector::getSplat(numFields, ConstantInt::get(fwTy, 1));
    200201    Constant * fwSplat = ConstantVector::getSplat(numFields, ConstantInt::get(fwTy, fw));
    201202    Constant * numFieldConst = ConstantInt::get(sizeTy, numFields);
     
    282283            Value * fields_fwd = b->mvmd_slli(fw, outputFields[i], j);
    283284            outputFields[i] = b->simd_or(outputFields[i], b->simd_and(select, fields_fwd));
    284         }
     285       }
    285286    }
    286287    // Now compress the data fields, eliminating all but the last field from
    287     // each run of consecutive field having the same field number.
    288     // same field number as a subsequent field.
    289     Value * eqNext = b->simd_eq(fw, fieldNo, b->mvmd_srli(fw, fieldNo, 1));
     288    // each run of consecutive field having the same field number as a subsequent field.
     289    // But it may be that last field number is 0 which will compare equal to a 0 shifted in.
     290    // So we add 1 to field numbers first.
     291    Value * nonZeroFieldNo = b->simd_add(fw, fieldNo, oneSplat);
     292    Value * eqNext = b->simd_eq(fw, nonZeroFieldNo, b->mvmd_srli(fw, nonZeroFieldNo, 1));
    290293    Value * compressMask = b->hsimd_signmask(fw, b->simd_not(eqNext));
    291294    for (unsigned i = 0; i < mStreamCount; i++) {
    292295        outputFields[i] = b->mvmd_compress(fw, outputFields[i], compressMask);
    293    }
     296    }
    294297    //
    295298    // Finally combine the pendingOutput and outputField data.
Note: See TracChangeset for help on using the changeset viewer.