Changeset 4999


Ignore:
Timestamp:
Apr 1, 2016, 1:57:18 PM (18 months ago)
Author:
cameron
Message:

Further hsimd_signmask options using sse2.pmovmskb.128; allow BlockSize?=512 with SSE2

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_sse_builder.cpp

    r4998 r4999  
    4343            return mask;
    4444        }
     45        if (fw == 8) {
     46            Value * pmovmskb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_pmovmskb_128);
     47            Value * mask = CreateCall(pmovmskb_func, fwCast(8, a));
     48            return mask;
     49        }
     50    }
     51    int fieldCount = mBitBlockWidth/fw;
     52    if ((fieldCount > 4) && (fieldCount <= 16)) {
     53        Value * pmovmskb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_pmovmskb_128);
     54        int fieldBytes = fw/8;
     55        int hiByte = fieldBytes - 1;
     56        std::vector<Constant*> Idxs;
     57        for (unsigned i = 0; i < fieldCount; i++) {
     58            Idxs.push_back(getInt32(fieldBytes*i+hiByte));
     59        }
     60        for (unsigned i = fieldCount; i < 16; i++) {
     61            Idxs.push_back(getInt32(mBitBlockWidth/8));
     62        }
     63        Value * packh = CreateShuffleVector(fwCast(8, a), fwCast(8, allZeroes()), ConstantVector::get(Idxs));
     64        Value * mask = CreateCall(pmovmskb_func, packh);
     65        return mask;
    4566    }
    4667    // Otherwise use default SSE logic.
Note: See TracChangeset for help on using the changeset viewer.