Changeset 5309 for icGREP


Ignore:
Timestamp:
Feb 10, 2017, 1:46:17 PM (3 years ago)
Author:
nmedfort
Message:

Replaced short vector construction in IDISA_Builder with stack allocated arrays.

Location:
icGREP/icgrep-devel/icgrep/IR_Gen
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5308 r5309  
    99#include <llvm/IR/Constants.h>
    1010#include <llvm/IR/Intrinsics.h>
    11 #include <llvm/IR/Function.h>
     11//#include <llvm/IR/Function.h>
    1212#include <llvm/IR/TypeBuilder.h>
    1313#include <fcntl.h>  // for
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r5260 r5309  
    66
    77#include "idisa_avx_builder.h"
    8 #include <llvm/IR/IRBuilder.h>
    9 #include <llvm/IR/Constants.h>
    10 #include <llvm/IR/Intrinsics.h>
    11 #include <llvm/IR/Function.h>
    12 #include <llvm/IR/Module.h>
    138
    149namespace IDISA {
     
    2116            Type * bitBlock_f64type = VectorType::get(getDoubleTy(), mBitBlockWidth/64);
    2217            Value * a_as_pd = CreateBitCast(a, bitBlock_f64type);
    23             Value * mask = CreateCall(signmask_f64func, std::vector<Value *>({a_as_pd}));
    24             return mask;
    25         }
    26         else if (fw == 32) {
     18            return CreateCall(signmask_f64func, a_as_pd);
     19        } else if (fw == 32) {
    2720            Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
    2821            Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
    2922            Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
    30             Value * mask = CreateCall(signmask_f32func, std::vector<Value *>({a_as_ps}));
    31             return mask;
     23            return CreateCall(signmask_f32func, a_as_ps);
    3224        }
    33     }
    34     else if (mBitBlockWidth == 512) {
     25    } else if (mBitBlockWidth == 512) {
    3526        if (fw == 64) {
    36             Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
     27            Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth / 32);
    3728            Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
    38             std::vector<Constant*> Idxs;
     29            Constant * indicies[8];
    3930            for (unsigned i = 0; i < 8; i++) {
    40                 Idxs.push_back(getInt32(2*i+1));
     31                indicies[i] = getInt32(2 * i + 1);
    4132            }
    42             Value * packh = CreateShuffleVector(a_as_ps, UndefValue::get(bitBlock_f32type), ConstantVector::get(Idxs));
     33            Value * packh = CreateShuffleVector(a_as_ps, UndefValue::get(bitBlock_f32type), ConstantVector::get({indicies, 8}));
    4334            Type * halfBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/64);
    4435            Value * pack_as_ps = CreateBitCast(packh, halfBlock_f32type);
    4536            Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
    46             Value * mask = CreateCall(signmask_f32func, std::vector<Value *>({pack_as_ps}));
    47             return mask;
     37            return CreateCall(signmask_f32func, pack_as_ps);
    4838        }
    4939    }
     
    5343   
    5444Value * IDISA_AVX2_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {
    55     unsigned field_count = 2 * mBitBlockWidth/fw;
    56     Value * aVec = fwCast(fw/2, a);
    57     Value * bVec = fwCast(fw/2, b);
    58     if (fw <= 64) {
    59         std::vector<Constant*> Idxs;
    60         for (unsigned i = 0; i < field_count/4; i++) {
    61             Idxs.push_back(getInt32(2*i));
     45    if (fw <= 64) {       
     46        Value * aVec = fwCast(fw / 2, a);
     47        Value * bVec = fwCast(fw / 2, b);
     48        const auto field_count = 2 * mBitBlockWidth / fw;
     49        Constant * Idxs[field_count];
     50        const auto H = (field_count / 2);
     51        const auto Q = (field_count / 4);
     52        for (unsigned i = 0; i < Q; i++) {
     53            Idxs[i] = getInt32(2 * i);
     54            Idxs[i + Q] = getInt32((2 * i) + 1);
     55            Idxs[i + H] = getInt32((2 * i) + H);
     56            Idxs[i + H + Q] = getInt32((2 * i) + 1 + H);
    6257        }
    63         for (unsigned i = 0; i < field_count/4; i++) {
    64             Idxs.push_back(getInt32(2*i + 1));
    65         }
    66         for (unsigned i = 0; i < field_count/4; i++) {
    67             Idxs.push_back(getInt32(field_count/2 + 2*i));
    68         }
    69         for (unsigned i = 0; i < field_count/4; i++) {
    70             Idxs.push_back(getInt32(field_count/2 + 2*i + 1));
    71         }
    72         Value * shufa = CreateShuffleVector(aVec, aVec, ConstantVector::get(Idxs));
    73         Value * shufb = CreateShuffleVector(bVec, bVec, ConstantVector::get(Idxs));
    74         return hsimd_packh(mBitBlockWidth/2, shufa, shufb);
     58        Value * shufa = CreateShuffleVector(aVec, aVec, ConstantVector::get({Idxs, field_count}));
     59        Value * shufb = CreateShuffleVector(bVec, bVec, ConstantVector::get({Idxs, field_count}));
     60        return hsimd_packh(mBitBlockWidth / 2, shufa, shufb);
    7561    }
    7662    // Otherwise use default SSE logic.
     
    7965
    8066Value * IDISA_AVX2_Builder::hsimd_packl(unsigned fw, Value * a, Value * b) {
    81     unsigned field_count = 2 * mBitBlockWidth/fw;
    82     Value * aVec = fwCast(fw/2, a);
    83     Value * bVec = fwCast(fw/2, b);
    8467    if (fw <= 64) {
    85         std::vector<Constant*> Idxs;
    86         for (unsigned i = 0; i < field_count/4; i++) {
    87             Idxs.push_back(getInt32(2*i));
     68        Value * aVec = fwCast(fw / 2, a);
     69        Value * bVec = fwCast(fw / 2, b);
     70        const auto field_count = 2 * mBitBlockWidth / fw;
     71        Constant * Idxs[field_count];
     72        const auto H = (field_count / 2);
     73        const auto Q = (field_count / 4);
     74        for (unsigned i = 0; i < Q; i++) {
     75            Idxs[i] = getInt32(2 * i);
     76            Idxs[i + Q] = getInt32((2 * i) + 1);
     77            Idxs[i + H] = getInt32((2 * i) + H);
     78            Idxs[i + H + Q] = getInt32((2 * i) + H + 1);
    8879        }
    89         for (unsigned i = 0; i < field_count/4; i++) {
    90             Idxs.push_back(getInt32(2*i + 1));
    91         }
    92         for (unsigned i = 0; i < field_count/4; i++) {
    93             Idxs.push_back(getInt32(field_count/2 + 2*i));
    94         }
    95         for (unsigned i = 0; i < field_count/4; i++) {
    96             Idxs.push_back(getInt32(field_count/2 + 2*i + 1));
    97         }
    98         Value * shufa = CreateShuffleVector(aVec, aVec, ConstantVector::get(Idxs));
    99         Value * shufb = CreateShuffleVector(bVec, bVec, ConstantVector::get(Idxs));
    100         return hsimd_packl(mBitBlockWidth/2, shufa, shufb);
     80        Value * shufa = CreateShuffleVector(aVec, aVec, ConstantVector::get({Idxs, field_count}));
     81        Value * shufb = CreateShuffleVector(bVec, bVec, ConstantVector::get({Idxs, field_count}));
     82        return hsimd_packl(mBitBlockWidth / 2, shufa, shufb);
    10183    }
    10284    // Otherwise use default SSE logic.
     
    127109        Value * a_low = fwCast(16, simd_and(a, simd_lomask(fw)));
    128110        Value * b_low = fwCast(16, simd_and(b, simd_lomask(fw)));
    129         Value * pack = CreateCall(vpackuswbfunc, {a_low, b_low});
    130         return pack;
     111        return CreateCall(vpackuswbfunc, {a_low, b_low});
    131112    }
    132113    // Otherwise use default SSE logic.
     
    139120        Value * a_low = simd_srli(fw, a, fw/2);
    140121        Value * b_low = simd_srli(fw, b, fw/2);
    141         Value * pack = CreateCall(vpackuswbfunc, {a_low, b_low});
    142         return pack;
     122        return CreateCall(vpackuswbfunc, {a_low, b_low});
    143123    }
    144124    // Otherwise use default SSE logic.
     
    167147        carry_out = bitCast(CreateZExt(carry_out, getIntNTy(mBitBlockWidth)));
    168148    }
    169     return std::pair<Value *, Value *>(carry_out, bitCast(sum));
     149    return std::pair<Value *, Value *>{carry_out, bitCast(sum)};
    170150}
    171151   
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r5307 r5309  
    7575}
    7676
    77 
    7877Constant * IDISA_Builder::simd_himask(unsigned fw) {
    7978    return Constant::getIntegerValue(getIntNTy(mBitBlockWidth), APInt::getSplat(mBitBlockWidth, APInt::getHighBitsSet(fw, fw/2)));
     
    161160Value * IDISA_Builder::simd_cttz(unsigned fw, Value * a) {
    162161    Value * cttzFunc = Intrinsic::getDeclaration(mMod, Intrinsic::cttz, fwVectorType(fw));
    163     Value * rslt = CreateCall(cttzFunc, std::vector<Value *>({fwCast(fw, a), ConstantInt::get(getInt1Ty(), 0)}));
    164     return rslt;
     162    return CreateCall(cttzFunc, {fwCast(fw, a), ConstantInt::get(getInt1Ty(), 0)});
    165163}
    166164
    167165Value * IDISA_Builder::simd_popcount(unsigned fw, Value * a) {
    168166    Value * ctpopFunc = Intrinsic::getDeclaration(mMod, Intrinsic::ctpop, fwVectorType(fw));
    169     Value * rslt = CreateCall(ctpopFunc, std::vector<Value *>({fwCast(fw, a)}));
    170     return rslt;
     167    return CreateCall(ctpopFunc, fwCast(fw, a));
    171168}
    172169
     
    177174        Value * c = bitCast(cond);
    178175        return CreateOr(CreateAnd(a1, c), CreateAnd(CreateXor(c, b1), b1));
    179     }
    180     else {
     176    } else {
    181177        Value * aVec = fwCast(fw, a);
    182178        Value * bVec = fwCast(fw, b);
     
    185181}
    186182   
    187 Value * IDISA_Builder::esimd_mergeh(unsigned fw, Value * a, Value * b) {
    188     unsigned field_count = mBitBlockWidth/fw;
    189     Value * aVec = fwCast(fw, a);
    190     Value * bVec = fwCast(fw, b);
    191     std::vector<Constant*> Idxs;
    192     for (unsigned i = field_count/2; i < field_count; i++) {
    193         Idxs.push_back(getInt32(i));    // selects elements from first reg.
    194         Idxs.push_back(getInt32(i + field_count)); // selects elements from second reg.
    195     }
    196     return CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
    197 }
    198 
    199 Value * IDISA_Builder::esimd_mergel(unsigned fw, Value * a, Value * b) {
    200     unsigned field_count = mBitBlockWidth/fw;
    201     Value * aVec = fwCast(fw, a);
    202     Value * bVec = fwCast(fw, b);
    203     std::vector<Constant*> Idxs;
    204     for (unsigned i = 0; i < field_count/2; i++) {
    205         Idxs.push_back(getInt32(i));    // selects elements from first reg.
    206         Idxs.push_back(getInt32(i + field_count)); // selects elements from second reg.
    207     }
    208     return CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
     183Value * IDISA_Builder::esimd_mergeh(unsigned fw, Value * a, Value * b) {   
     184    const auto field_count = mBitBlockWidth / fw;
     185    Constant * Idxs[field_count];
     186    for (unsigned i = 0; i < field_count / 2; i++) {
     187        Idxs[2 * i] = getInt32(i + field_count / 2); // selects elements from first reg.
     188        Idxs[2 * i + 1] = getInt32(i + field_count / 2 + field_count); // selects elements from second reg.
     189    }
     190    return CreateShuffleVector(fwCast(fw, a), fwCast(fw, b), ConstantVector::get({Idxs, field_count}));
     191}
     192
     193Value * IDISA_Builder::esimd_mergel(unsigned fw, Value * a, Value * b) {   
     194    const auto field_count = mBitBlockWidth / fw;
     195    Constant * Idxs[field_count];
     196    for (unsigned i = 0; i < field_count / 2; i++) {
     197        Idxs[2 * i] = getInt32(i); // selects elements from first reg.
     198        Idxs[2 * i + 1] = getInt32(i + field_count); // selects elements from second reg.
     199    }
     200    return CreateShuffleVector(fwCast(fw, a), fwCast(fw, b), ConstantVector::get({Idxs, field_count}));
    209201}
    210202
    211203Value * IDISA_Builder::esimd_bitspread(unsigned fw, Value * bitmask) {
    212     unsigned field_count = mBitBlockWidth/fw;
     204    const auto field_count = mBitBlockWidth / fw;
    213205    Type * field_type = getIntNTy(fw);
    214     if (bitmask->getType()->getIntegerBitWidth() < fw) {
    215         bitmask = CreateZExt(bitmask, field_type);
    216     }
    217     else if (bitmask->getType()->getIntegerBitWidth() > fw) {
    218         bitmask = CreateTrunc(bitmask, field_type);
    219     }
    220     Value * spread_field = CreateBitCast(bitmask, VectorType::get(getIntNTy(fw), 1));
     206    Value * spread_field = CreateBitCast(CreateZExtOrTrunc(bitmask, field_type), VectorType::get(getIntNTy(fw), 1));
    221207    Value * undefVec = UndefValue::get(VectorType::get(getIntNTy(fw), 1));
    222208    Value * broadcast = CreateShuffleVector(spread_field, undefVec, Constant::getNullValue(VectorType::get(getInt32Ty(), field_count)));
    223     std::vector<Constant*> bitSel;
    224     std::vector<Constant*> bitShift;
     209    Constant * bitSel[field_count];
     210    Constant * bitShift[field_count];
    225211    for (unsigned i = 0; i < field_count; i++) {
    226         bitSel.push_back(ConstantInt::get(field_type, 1 << i));
    227         bitShift.push_back(ConstantInt::get(field_type, i));
    228     }
    229     Value * bitSelVec = ConstantVector::get(bitSel);
    230     Value * bitShiftVec = ConstantVector::get(bitShift);
     212        bitSel[i] = ConstantInt::get(field_type, 1 << i);
     213        bitShift[i] = ConstantInt::get(field_type, i);
     214    }
     215    Value * bitSelVec = ConstantVector::get({bitSel, field_count});
     216    Value * bitShiftVec = ConstantVector::get({bitShift, field_count});
    231217    return CreateLShr(CreateAnd(bitSelVec, broadcast), bitShiftVec);
    232218}
    233219
    234220Value * IDISA_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {
    235     unsigned field_count = 2 * mBitBlockWidth/fw;
    236221    Value * aVec = fwCast(fw/2, a);
    237222    Value * bVec = fwCast(fw/2, b);
    238     std::vector<Constant*> Idxs;
     223    const auto field_count = 2 * mBitBlockWidth / fw;
     224    Constant * Idxs[field_count];
    239225    for (unsigned i = 0; i < field_count; i++) {
    240         Idxs.push_back(getInt32(2*i+1));
    241     }
    242     return CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
     226        Idxs[i] = getInt32(2 * i + 1);
     227    }
     228    return CreateShuffleVector(aVec, bVec, ConstantVector::get({Idxs, field_count}));
    243229}
    244230
    245231Value * IDISA_Builder::hsimd_packl(unsigned fw, Value * a, Value * b) {
    246     unsigned field_count = 2 * mBitBlockWidth/fw;
    247232    Value * aVec = fwCast(fw/2, a);
    248233    Value * bVec = fwCast(fw/2, b);
    249     std::vector<Constant*> Idxs;
     234    const auto field_count = 2 * mBitBlockWidth / fw;
     235    Constant * Idxs[field_count];
    250236    for (unsigned i = 0; i < field_count; i++) {
    251         Idxs.push_back(getInt32(2*i));
    252     }
    253     return CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
    254 }
    255 
    256    
     237        Idxs[i] = getInt32(2 * i);
     238    }
     239    return CreateShuffleVector(aVec, bVec, ConstantVector::get({Idxs, field_count}));
     240}
     241
    257242Value * IDISA_Builder::hsimd_packh_in_lanes(unsigned lanes, unsigned fw, Value * a, Value * b) {
    258     unsigned fw_out = fw/2;
    259     unsigned fields_per_lane = mBitBlockWidth/(fw_out * lanes);
    260     unsigned field_offset_for_b = mBitBlockWidth/fw_out;
    261     Value * aVec = fwCast(fw_out, a);
    262     Value * bVec = fwCast(fw_out, b);
    263     std::vector<Constant*> Idxs;
    264     for (unsigned lane = 0; lane < lanes; lane++) {
    265         unsigned first_field_in_lane = lane * fields_per_lane; // every second field
    266         for (unsigned i = 0; i < fields_per_lane/2; i++) {
    267             Idxs.push_back(getInt32(first_field_in_lane + 2*i + 1));
    268         }
    269         for (unsigned i = 0; i < fields_per_lane/2; i++) {
    270             Idxs.push_back(getInt32(field_offset_for_b + first_field_in_lane + 2*i + 1));
    271         }
    272     }
    273     Value * pack = CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
    274     return pack;
     243    const unsigned fw_out = fw / 2;
     244    const unsigned fields_per_lane = mBitBlockWidth / (fw_out * lanes);
     245    const unsigned field_offset_for_b = mBitBlockWidth / fw_out;
     246    const unsigned field_count = mBitBlockWidth / fw_out;
     247    Constant * Idxs[field_count];
     248    for (unsigned lane = 0, j = 0; lane < lanes; lane++) {
     249        const unsigned first_field_in_lane = lane * fields_per_lane; // every second field
     250        for (unsigned i = 0; i < fields_per_lane / 2; i++) {
     251            Idxs[j++] = getInt32(first_field_in_lane + (2 * i) + 1);
     252        }
     253        for (unsigned i = 0; i < fields_per_lane / 2; i++) {
     254            Idxs[j++] = getInt32(field_offset_for_b + first_field_in_lane + (2 * i) + 1);
     255        }
     256    }
     257    return CreateShuffleVector(fwCast(fw_out, a), fwCast(fw_out, b), ConstantVector::get({Idxs, field_count}));
    275258}
    276259
    277260Value * IDISA_Builder::hsimd_packl_in_lanes(unsigned lanes, unsigned fw, Value * a, Value * b) {
    278     unsigned fw_out = fw/2;
    279     unsigned fields_per_lane = mBitBlockWidth/(fw_out * lanes);
    280     unsigned field_offset_for_b = mBitBlockWidth/fw_out;
    281     Value * aVec = fwCast(fw_out, a);
    282     Value * bVec = fwCast(fw_out, b);
    283     std::vector<Constant*> Idxs;
    284     for (unsigned lane = 0; lane < lanes; lane++) {
    285         unsigned first_field_in_lane = lane * fields_per_lane; // every second field
    286         for (unsigned i = 0; i < fields_per_lane/2; i++) {
    287             Idxs.push_back(getInt32(first_field_in_lane + 2*i));
    288         }
    289         for (unsigned i = 0; i < fields_per_lane/2; i++) {
    290             Idxs.push_back(getInt32(field_offset_for_b + first_field_in_lane + 2*i));
    291         }
    292     }
    293     Value * pack = CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
    294     return pack;
    295 }
    296 
    297    
     261    const unsigned fw_out = fw / 2;
     262    const unsigned fields_per_lane = mBitBlockWidth / (fw_out * lanes);
     263    const unsigned field_offset_for_b = mBitBlockWidth / fw_out;
     264    const unsigned field_count = mBitBlockWidth / fw_out;
     265    Constant * Idxs[field_count];
     266    for (unsigned lane = 0, j = 0; lane < lanes; lane++) {
     267        const unsigned first_field_in_lane = lane * fields_per_lane; // every second field
     268        for (unsigned i = 0; i < fields_per_lane / 2; i++) {
     269            Idxs[j++] = getInt32(first_field_in_lane + (2 * i));
     270        }
     271        for (unsigned i = 0; i < fields_per_lane / 2; i++) {
     272            Idxs[j++] = getInt32(field_offset_for_b + first_field_in_lane + (2 * i));
     273        }
     274    }
     275    return CreateShuffleVector(fwCast(fw_out, a), fwCast(fw_out, b), ConstantVector::get({Idxs, field_count}));
     276}
     277
    298278Value * IDISA_Builder::hsimd_signmask(unsigned fw, Value * a) {
    299279    Value * mask = CreateICmpSLT(fwCast(fw, a), ConstantAggregateZero::get(fwVectorType(fw)));
     
    306286
    307287Value * IDISA_Builder::mvmd_insert(unsigned fw, Value * blk, Value * elt, unsigned fieldIndex) {
    308     Value * vec = fwCast(fw, blk);
    309     return CreateInsertElement(vec, elt, getInt32(fieldIndex));
     288    return CreateInsertElement(fwCast(fw, blk), elt, getInt32(fieldIndex));
    310289}
    311290
    312291Value * IDISA_Builder::mvmd_slli(unsigned fw, Value * a, unsigned shift) {
    313     unsigned field_count = mBitBlockWidth/fw;
     292    const auto field_count = mBitBlockWidth / fw;
    314293    return mvmd_dslli(fw, a, Constant::getNullValue(fwVectorType(fw)), field_count - shift);
    315294}
     
    320299
    321300Value * IDISA_Builder::mvmd_dslli(unsigned fw, Value * a, Value * b, unsigned shift) {
    322     unsigned field_count = mBitBlockWidth/fw;
    323     Value * aVec = fwCast(fw, a);
    324     Value * bVec = fwCast(fw, b);
    325     std::vector<Constant*> Idxs;
     301    const auto field_count = mBitBlockWidth/fw;
     302    Constant * Idxs[field_count];
    326303    for (unsigned i = 0; i < field_count; i++) {
    327         Idxs.push_back(getInt32(i + shift));
    328     }
    329     return CreateShuffleVector(bVec, aVec, ConstantVector::get(Idxs));
     304        Idxs[i] = getInt32(i + shift);
     305    }
     306    return CreateShuffleVector(fwCast(fw, b), fwCast(fw, a), ConstantVector::get({Idxs, field_count}));
    330307}
    331308
    332309Value * IDISA_Builder::bitblock_any(Value * a) {
    333310    Type * iBitBlock = getIntNTy(mBitBlockWidth);
    334     return CreateICmpNE(CreateBitCast(a, iBitBlock),  ConstantInt::get(iBitBlock, 0));
     311    return CreateICmpNE(CreateBitCast(a, iBitBlock),  ConstantInt::getNullValue(iBitBlock));
    335312}
    336313
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_i64_builder.cpp

    r5260 r5309  
    66
    77#include "idisa_i64_builder.h"
    8 #include <llvm/IR/IRBuilder.h>
    9 #include <llvm/IR/Constants.h>
    10 #include <llvm/IR/Intrinsics.h>
    11 #include <llvm/IR/Function.h>
    12 #include <llvm/IR/Module.h>
    138
    149namespace IDISA {
     
    2318        a_ = simd_or(simd_and(a_, himask_odd), simd_slli(mBitBlockWidth, simd_and(a_, himask_even), w/2));
    2419    }
    25     Value * pk = simd_or(b_, simd_srli(mBitBlockWidth, a_, mBitBlockWidth/2));
    26     return pk;
     20    return simd_or(b_, simd_srli(mBitBlockWidth, a_, mBitBlockWidth/2));
    2721}
    28 
    2922
    3023Value * IDISA_I64_Builder::hsimd_packl(unsigned fw, Value * a, Value * b) {
     
    3730        a_ = simd_or(simd_and(a_, lomask_even), simd_srli(mBitBlockWidth, simd_and(a_, lomask_odd), w/2));
    3831    }
    39     Value * pk = simd_or(simd_slli(mBitBlockWidth, b_, mBitBlockWidth/2), a_);
    40     return pk;
     32    return simd_or(simd_slli(mBitBlockWidth, b_, mBitBlockWidth/2), a_);
    4133}
    4234
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.cpp

    r5292 r5309  
    66
    77#include "idisa_nvptx_builder.h"
    8 #include <llvm/IR/IRBuilder.h>
    9 #include <llvm/IR/Constants.h>
    10 #include <llvm/IR/Intrinsics.h>
    11 #include <llvm/IR/Function.h>
    128#include <llvm/IR/InlineAsm.h>
    139#include <llvm/IR/Module.h>
     
    2218    Type * const int32ty = getInt32Ty();
    2319    Function * barrierOrFunc = cast<Function>(mMod->getOrInsertFunction("llvm.nvvm.barrier0.or", int32ty, int32ty, nullptr));
    24     Value * nonZero_i1 = CreateICmpUGT(val, ConstantInt::get(mBitBlockType, 0));
     20    Value * nonZero_i1 = CreateICmpUGT(val, ConstantInt::getNullValue(mBitBlockType));
    2521    Value * nonZero_i32 = CreateZExt(CreateBitCast(nonZero_i1, getInt1Ty()), int32ty);
    2622    Value * anyNonZero = CreateCall(barrierOrFunc, nonZero_i32);
    27     return CreateICmpNE(anyNonZero,  ConstantInt::get(int32ty, 0));
     23    return CreateICmpNE(anyNonZero,  ConstantInt::getNullValue(int32ty));
    2824}
    2925
     
    10197}
    10298
    103 void IDISA_NVPTX20_Builder::CreateBuiltinFunctions(){   
     99void IDISA_NVPTX20_Builder::CreateBuiltinFunctions(){
    104100    Type * const voidTy = getVoidTy();
    105101    Type * const int32ty = getInt32Ty();
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.cpp

    r5260 r5309  
    66
    77#include "idisa_sse_builder.h"
    8 #include <llvm/IR/IRBuilder.h>
    9 #include <llvm/IR/Constants.h>
    10 #include <llvm/IR/Intrinsics.h>
    11 #include <llvm/IR/Function.h>
    12 #include <llvm/IR/Module.h>
    138
    149namespace IDISA {
    15 
    1610
    1711Value * IDISA_SSE2_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {
     
    4135            Type * bitBlock_f64type = VectorType::get(getDoubleTy(), mBitBlockWidth/64);
    4236            Value * a_as_pd = CreateBitCast(a, bitBlock_f64type);
    43             Value * mask = CreateCall(signmask_f64func, a_as_pd);
    44             return mask;
     37            return CreateCall(signmask_f64func, a_as_pd);
    4538        }
    4639        if (fw == 8) {
    4740            Value * pmovmskb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_pmovmskb_128);
    48             Value * mask = CreateCall(pmovmskb_func, fwCast(8, a));
    49             return mask;
     41            return CreateCall(pmovmskb_func, fwCast(8, a));
    5042        }
    5143    }
    52     const unsigned fieldCount = mBitBlockWidth / fw;
     44    const auto fieldCount = mBitBlockWidth / fw;
    5345    if ((fieldCount > 4) && (fieldCount <= 16)) {
    5446        Value * pmovmskb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_pmovmskb_128);
    55         int fieldBytes = fw/8;
     47        int fieldBytes = fw / 8;
    5648        int hiByte = fieldBytes - 1;
    57         std::vector<Constant*> Idxs;
     49        Constant * Idxs[16];
    5850        for (unsigned i = 0; i < fieldCount; i++) {
    59             Idxs.push_back(getInt32(fieldBytes*i+hiByte));
     51            Idxs[i] = getInt32(fieldBytes * i + hiByte);
    6052        }
    6153        for (unsigned i = fieldCount; i < 16; i++) {
    62             Idxs.push_back(getInt32(mBitBlockWidth/8));
     54            Idxs[i] = getInt32(mBitBlockWidth / 8);
    6355        }
    64         Value * packh = CreateShuffleVector(fwCast(8, a), fwCast(8, allZeroes()), ConstantVector::get(Idxs));
    65         Value * mask = CreateCall(pmovmskb_func, packh);
    66         return mask;
     56        Value * packh = CreateShuffleVector(fwCast(8, a), fwCast(8, allZeroes()), ConstantVector::get({Idxs, 16}));
     57        return CreateCall(pmovmskb_func, packh);
    6758    }
    6859    // Otherwise use default SSE logic.
     
    7970            return CreateCall(signmask_f32func, a_as_ps);
    8071        }
    81     }
    82     else if ((fw == 64) && (mBitBlockWidth == 256)) {
     72    } else if ((fw == 64) && (mBitBlockWidth == 256)) {
    8373        Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
    8474        Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
    85         std::vector<Constant*> Idxs;
    86         for (unsigned i = 0; i < mBitBlockWidth/fw; i++) {
    87             Idxs.push_back(getInt32(2*i+1));
     75        Constant * Idxs[4];
     76        for (unsigned i = 0; i < 4; i++) {
     77            Idxs[i] = getInt32(2 * i + 1);
    8878        }
    89         Value * packh = CreateShuffleVector(a_as_ps, UndefValue::get(bitBlock_f32type), ConstantVector::get(Idxs));
     79        Value * packh = CreateShuffleVector(a_as_ps, UndefValue::get(bitBlock_f32type), ConstantVector::get({Idxs, 4}));
    9080        Type * halfBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/64);
    9181        Value * pack_as_ps = CreateBitCast(packh, halfBlock_f32type);
Note: See TracChangeset for help on using the changeset viewer.