Changeset 4996


Ignore:
Timestamp:
Mar 31, 2016, 10:48:32 AM (20 months ago)
Author:
cameron
Message:

Duplicated packh/l code with SSE2 builder buggy; eliminate duplicate code

Location:
icGREP/icgrep-devel/icgrep/IDISA
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_avx_builder.cpp

    r4957 r4996  
    1414
    1515Value * IDISA_AVX_Builder::hsimd_signmask(unsigned fw, Value * a) {
     16    // AVX2 special cases
    1617    if (mBitBlockWidth == 256) {
    1718        if (fw == 64) {
     
    4647        }
    4748    }
    48     Value * mask = CreateICmpSLT(fwCast(fw, a), ConstantAggregateZero::get(fwVectorType(fw)));
    49     return CreateBitCast(mask, getIntNTy(mBitBlockWidth/fw));
     49    // Otherwise use default SSE logic.
     50    return IDISA_SSE_Builder::hsimd_signmask(fw, a);
    5051}
    5152   
     
    7273        return hsimd_packh(mBitBlockWidth/2, shufa, shufb);
    7374    }
    74     else {
    75         std::vector<Constant*> Idxs;
    76         for (unsigned i = 0; i < field_count; i++) {
    77             Idxs.push_back(getInt32(2*i));
    78         }
    79         return CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
    80     }
     75    // Otherwise use default SSE logic.
     76    return IDISA_SSE_Builder::hsimd_packh(fw, a, b);
    8177}
    8278
     
    10399        return hsimd_packl(mBitBlockWidth/2, shufa, shufb);
    104100    }
    105     else {
    106         std::vector<Constant*> Idxs;
    107         for (unsigned i = 0; i < field_count; i++) {
    108             Idxs.push_back(getInt32(2*i+1));
    109         }
    110         return CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
    111     }
     101    // Otherwise use default SSE logic.
     102    return IDISA_SSE_Builder::hsimd_packl(fw, a, b);
    112103}
    113104   
     
    117108        return CreateCall3(vperm2i128func, fwCast(64, a), fwCast(64, b), getInt8(0x31));
    118109    }
    119     unsigned field_count = mBitBlockWidth/fw;
    120     Value * aVec = fwCast(fw, a);
    121     Value * bVec = fwCast(fw, b);
    122     std::vector<Constant*> Idxs;
    123     for (unsigned i = field_count/2; i < field_count; i++) {
    124         Idxs.push_back(getInt32(i));    // selects elements from first reg.
    125         Idxs.push_back(getInt32(i + field_count)); // selects elements from second reg.
    126     }
    127     return CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
     110    // Otherwise use default SSE logic.
     111    return IDISA_SSE_Builder::esimd_mergeh(fw, a, b);
    128112}
    129113
     
    133117        return CreateCall3(vperm2i128func, fwCast(64, a), fwCast(64, b), getInt8(0x20));
    134118    }
    135     unsigned field_count = mBitBlockWidth/fw;
    136     Value * aVec = fwCast(fw, a);
    137     Value * bVec = fwCast(fw, b);
    138     std::vector<Constant*> Idxs;
    139     for (unsigned i = 0; i < field_count/2; i++) {
    140         Idxs.push_back(getInt32(i));    // selects elements from first reg.
    141         Idxs.push_back(getInt32(i + field_count)); // selects elements from second reg.
    142     }
    143     return CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
     119    // Otherwise use default SSE logic.
     120    return IDISA_SSE_Builder::esimd_mergel(fw, a, b);
    144121}
    145122
     
    152129        return pack;
    153130    }
    154     unsigned fw_out = fw/2;
    155     unsigned fields_per_lane = mBitBlockWidth/(fw_out * lanes);
    156     unsigned field_offset_for_b = mBitBlockWidth/fw_out;
    157     Value * aVec = fwCast(fw_out, a);
    158     Value * bVec = fwCast(fw_out, b);
    159     std::vector<Constant*> Idxs;
    160     for (unsigned lane = 0; lane < lanes; lane++) {
    161         unsigned first_field_in_lane = lane * fields_per_lane; // every second field
    162         for (unsigned i = 0; i < fields_per_lane/2; i++) {
    163             Idxs.push_back(getInt32(first_field_in_lane + 2*i));
    164         }
    165         for (unsigned i = 0; i < fields_per_lane/2; i++) {
    166             Idxs.push_back(getInt32(field_offset_for_b + first_field_in_lane + 2*i));
    167         }
    168     }
    169     Value * pack = CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
    170     return pack;
     131    // Otherwise use default SSE logic.
     132    return IDISA_SSE_Builder::hsimd_packl_in_lanes(lanes, fw, a, b);
    171133}
    172134
     
    179141        return pack;
    180142    }
    181     unsigned fw_out = fw/2;
    182     unsigned fields_per_lane = mBitBlockWidth/(fw_out * lanes);
    183     unsigned field_offset_for_b = mBitBlockWidth/fw_out;
    184     Value * aVec = fwCast(fw_out, a);
    185     Value * bVec = fwCast(fw_out, b);
    186     std::vector<Constant*> Idxs;
    187     for (unsigned lane = 0; lane < lanes; lane++) {
    188         unsigned first_field_in_lane = lane * fields_per_lane; // every second field
    189         for (unsigned i = 0; i < fields_per_lane/2; i++) {
    190             Idxs.push_back(getInt32(first_field_in_lane + 2*i));
    191         }
    192         for (unsigned i = 0; i < fields_per_lane/2; i++) {
    193             Idxs.push_back(getInt32(field_offset_for_b + first_field_in_lane + 2*i));
    194         }
    195     }
    196     Value * pack = CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
    197     return pack;
     143    // Otherwise use default SSE logic.
     144    return IDISA_SSE_Builder::hsimd_packh_in_lanes(lanes, fw, a, b);
    198145}
    199146   
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_avx_builder.h

    r4957 r4996  
    88
    99#include <IDISA/idisa_builder.h>
     10#include <IDISA/idisa_sse_builder.h>
    1011
    1112using namespace llvm;
     
    1314namespace IDISA {
    1415
    15 class IDISA_AVX_Builder : public IDISA_Builder {
     16class IDISA_AVX_Builder : public IDISA_SSE_Builder {
    1617public:
    1718   
    18     IDISA_AVX_Builder(Module * m, Type * bitBlockType) : IDISA_Builder(m, bitBlockType) {
     19    IDISA_AVX_Builder(Module * m, Type * bitBlockType) : IDISA_SSE_Builder(m, bitBlockType) {
    1920    }
    2021    Value * hsimd_signmask(unsigned fw, Value * a) override;
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_sse_builder.cpp

    r4901 r4996  
    1515
    1616Value * IDISA_SSE2_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {
    17     if (fw == 16) {
     17    if ((fw == 16) && (mBitBlockWidth == 128)) {
    1818        Value * packuswb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_packuswb_128);
    1919        return CreateCall(packuswb_func, std::vector<Value *>({simd_srli(16, a, 8), simd_srli(16, b, 8)}));
    2020    }
    21     unsigned field_count = 2 * mBitBlockWidth/fw;
    22     Value * aVec = fwCast(fw/2, a);
    23     Value * bVec = fwCast(fw/2, b);
    24     std::vector<Constant*> Idxs;
    25     for (unsigned i = 0; i < field_count; i++) {
    26         Idxs.push_back(getInt32(2*i));
    27     }
    28     return CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
     21    // Otherwise use default logic.
     22    return IDISA_Builder::hsimd_packh(fw, a, b);
    2923}
    3024
    3125Value * IDISA_SSE2_Builder::hsimd_packl(unsigned fw, Value * a, Value * b) {
    32     if (fw == 16) {
     26    if ((fw == 16) && (mBitBlockWidth == 128)) {
    3327        Value * packuswb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_packuswb_128);
    3428        Value * mask = simd_lomask(16);
    3529        return CreateCall(packuswb_func, std::vector<Value *>({fwCast(16, simd_and(a, mask)), fwCast(16, simd_and(b, mask))}));
    3630    }
    37     unsigned field_count = 2 * mBitBlockWidth/fw;
    38     Value * aVec = fwCast(fw/2, a);
    39     Value * bVec = fwCast(fw/2, b);
    40     std::vector<Constant*> Idxs;
    41     for (unsigned i = 0; i < field_count; i++) {
    42         Idxs.push_back(getInt32(2*i+1));
    43     }
    44     return CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
     31    // Otherwise use default logic.
     32    return IDISA_Builder::hsimd_packl(fw, a, b);
    4533}
    4634
Note: See TracChangeset for help on using the changeset viewer.