Changeset 6024


Ignore:
Timestamp:
May 8, 2018, 9:26:02 AM (5 months ago)
Author:
cameron
Message:

mvmd_compress for SSE2, StreamCompress? bug fix

Location:
icGREP/icgrep-devel/icgrep
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.cpp

    r5832 r6024  
    153153}
    154154
     155Value * IDISA_SSE_Builder::mvmd_compress(unsigned fw, Value * a, Value * selector) {
     156    if ((mBitBlockWidth == 128) && (fw == 64)) {
     157        Constant * keep[2] = {ConstantInt::get(getInt64Ty(), 1), ConstantInt::get(getInt64Ty(), 3)};
     158        Constant * keep_mask = ConstantVector::get({keep, 2});
     159        Constant * shift[2] = {ConstantInt::get(getInt64Ty(), 2), ConstantInt::get(getInt64Ty(), 0)};
     160        Constant * shifted_mask = ConstantVector::get({shift, 2});
     161        Value * a_srli1 = mvmd_srli(64, a, 1);
     162        Value * bdcst = simd_fill(64, CreateZExt(selector, getInt64Ty()));
     163        Value * kept = simd_and(simd_eq(64, simd_and(keep_mask, bdcst), keep_mask), a);
     164        Value * shifted = simd_and(a_srli1, simd_eq(64, shifted_mask, bdcst));
     165        return simd_or(kept, shifted);
     166    }
     167    return IDISA_Builder::mvmd_compress(fw, a, selector);
    155168}
     169
     170
     171}
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.h

    r5979 r6024  
    33
    44/*
    5  *  Copyright (c) 2015 International Characters.
     5 *  Copyright (c) 2018 International Characters.
    66 *  This software is licensed to the public under the Open Software License 3.0.
    77 *  icgrep is a trademark of International Characters.
     
    2222    virtual std::string getBuilderUniqueName() override;
    2323    llvm::Value * hsimd_signmask(unsigned fw, llvm::Value * a) override;
     24    llvm::Value * mvmd_compress(unsigned fw, llvm::Value * a, llvm::Value * select_mask) override;
    2425    ~IDISA_SSE_Builder() {}
    2526};
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r6018 r6024  
    198198    const unsigned numFields = b->getBitBlockWidth()/fw;
    199199    Constant * zeroSplat = Constant::getNullValue(b->fwVectorType(fw));
     200    Constant * oneSplat = ConstantVector::getSplat(numFields, ConstantInt::get(fwTy, 1));
    200201    Constant * fwSplat = ConstantVector::getSplat(numFields, ConstantInt::get(fwTy, fw));
    201202    Constant * numFieldConst = ConstantInt::get(sizeTy, numFields);
     
    282283            Value * fields_fwd = b->mvmd_slli(fw, outputFields[i], j);
    283284            outputFields[i] = b->simd_or(outputFields[i], b->simd_and(select, fields_fwd));
    284         }
     285       }
    285286    }
    286287    // Now compress the data fields, eliminating all but the last field from
    287     // each run of consecutive field having the same field number.
    288     // same field number as a subsequent field.
    289     Value * eqNext = b->simd_eq(fw, fieldNo, b->mvmd_srli(fw, fieldNo, 1));
     288    // each run of consecutive field having the same field number as a subsequent field.
     289    // But it may be that last field number is 0 which will compare equal to a 0 shifted in.
     290    // So we add 1 to field numbers first.
     291    Value * nonZeroFieldNo = b->simd_add(fw, fieldNo, oneSplat);
     292    Value * eqNext = b->simd_eq(fw, nonZeroFieldNo, b->mvmd_srli(fw, nonZeroFieldNo, 1));
    290293    Value * compressMask = b->hsimd_signmask(fw, b->simd_not(eqNext));
    291294    for (unsigned i = 0; i < mStreamCount; i++) {
    292295        outputFields[i] = b->mvmd_compress(fw, outputFields[i], compressMask);
    293    }
     296    }
    294297    //
    295298    // Finally combine the pendingOutput and outputField data.
Note: See TracChangeset for help on using the changeset viewer.