Changeset 5729 for icGREP


Ignore:
Timestamp:
Nov 10, 2017, 10:46:51 AM (16 months ago)
Author:
cameron
Message:

IDISA operations, simd_pext, simd_pdep, simd_sllv, simd_srlv, small fw operations

Location:
icGREP/icgrep-devel/icgrep/IR_Gen
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r5713 r5729  
    7474
    7575Value * IDISA_Builder::simd_fill(unsigned fw, Value * a) {
    76     unsigned field_count = mBitBlockWidth/fw;
     76    if (fw < 8) report_fatal_error("Unsupported field width: simd_fill " + std::to_string(fw));
     77   unsigned field_count = mBitBlockWidth/fw;
    7778    Type * singleFieldVecTy = VectorType::get(getIntNTy(fw), 1);
    7879    Value * aVec = CreateBitCast(a, singleFieldVecTy);
     
    8182
    8283Value * IDISA_Builder::simd_add(unsigned fw, Value * a, Value * b) {
     84    if (fw == 1) return simd_xor(a, b);
     85    if (fw < 8) {
     86        Constant * hi_bit_mask = Constant::getIntegerValue(getIntNTy(mBitBlockWidth),
     87                                                           APInt::getSplat(mBitBlockWidth, APInt::getHighBitsSet(fw, 1)));
     88        Constant * lo_bit_mask = Constant::getIntegerValue(getIntNTy(mBitBlockWidth),
     89                                                           APInt::getSplat(mBitBlockWidth, APInt::getLowBitsSet(fw, fw-1)));
     90        Value * hi_xor = simd_xor(simd_and(a, hi_bit_mask), simd_and(b, hi_bit_mask));
     91        Value * part_sum = simd_add(32, simd_and(a, lo_bit_mask), simd_and(b, lo_bit_mask));
     92        return simd_xor(part_sum, hi_xor);
     93    }
    8394    return CreateAdd(fwCast(fw, a), fwCast(fw, b));
    8495}
    8596
    8697Value * IDISA_Builder::simd_sub(unsigned fw, Value * a, Value * b) {
     98    if (fw < 8) report_fatal_error("Unsupported field width: sub " + std::to_string(fw));
    8799    return CreateSub(fwCast(fw, a), fwCast(fw, b));
    88100}
    89101
    90102Value * IDISA_Builder::simd_mult(unsigned fw, Value * a, Value * b) {
     103    if (fw < 8) report_fatal_error("Unsupported field width: mult " + std::to_string(fw));
    91104    return CreateMul(fwCast(fw, a), fwCast(fw, b));
    92105}
    93106
    94107Value * IDISA_Builder::simd_eq(unsigned fw, Value * a, Value * b) {
     108    if (fw < 8) {
     109        Value * eq_bits = simd_not(simd_xor(a, b));
     110        if (fw == 1) return eq_bits;
     111        eq_bits = simd_or(simd_and(simd_srli(32, simd_and(simd_himask(2), eq_bits), 1), eq_bits),
     112                          simd_and(simd_slli(32, simd_and(simd_lomask(2), eq_bits), 1), eq_bits));
     113        if (fw == 2) return eq_bits;
     114        eq_bits = simd_or(simd_and(simd_srli(32, simd_and(simd_himask(4), eq_bits), 2), eq_bits),
     115                          simd_and(simd_slli(32, simd_and(simd_lomask(4), eq_bits), 2), eq_bits));
     116        return eq_bits;
     117    }
    95118    return CreateSExt(CreateICmpEQ(fwCast(fw, a), fwCast(fw, b)), fwVectorType(fw));
    96119}
    97120
    98121Value * IDISA_Builder::simd_gt(unsigned fw, Value * a, Value * b) {
     122    if (fw < 8) report_fatal_error("Unsupported field width: gt " + std::to_string(fw));
    99123    return CreateSExt(CreateICmpSGT(fwCast(fw, a), fwCast(fw, b)), fwVectorType(fw));
    100124}
    101125
    102126Value * IDISA_Builder::simd_ugt(unsigned fw, Value * a, Value * b) {
     127    if (fw < 8) report_fatal_error("Unsupported field width: ugt " + std::to_string(fw));
    103128    return CreateSExt(CreateICmpUGT(fwCast(fw, a), fwCast(fw, b)), fwVectorType(fw));
    104129}
    105130
    106131Value * IDISA_Builder::simd_lt(unsigned fw, Value * a, Value * b) {
     132    if (fw < 8) report_fatal_error("Unsupported field width: lt " + std::to_string(fw));
    107133    return CreateSExt(CreateICmpSLT(fwCast(fw, a), fwCast(fw, b)), fwVectorType(fw));
    108134}
    109135
    110136Value * IDISA_Builder::simd_ult(unsigned fw, Value * a, Value * b) {
     137    if (fw < 8) report_fatal_error("Unsupported field width: ult " + std::to_string(fw));
    111138    return CreateSExt(CreateICmpULT(fwCast(fw, a), fwCast(fw, b)), fwVectorType(fw));
    112139}
    113140
    114141Value * IDISA_Builder::simd_max(unsigned fw, Value * a, Value * b) {
     142    if (fw < 8) report_fatal_error("Unsupported field width: max " + std::to_string(fw));
    115143    Value * aVec = fwCast(fw, a);
    116144    Value * bVec = fwCast(fw, b);
     
    119147
    120148Value * IDISA_Builder::simd_umax(unsigned fw, Value * a, Value * b) {
     149    if (fw < 8) report_fatal_error("Unsupported field width: umax " + std::to_string(fw));
    121150    Value * aVec = fwCast(fw, a);
    122151    Value * bVec = fwCast(fw, b);
     
    125154
    126155Value * IDISA_Builder::simd_min(unsigned fw, Value * a, Value * b) {
     156    if (fw < 8) report_fatal_error("Unsupported field width: min " + std::to_string(fw));
    127157    Value * aVec = fwCast(fw, a);
    128158    Value * bVec = fwCast(fw, b);
     
    131161
    132162Value * IDISA_Builder::simd_umin(unsigned fw, Value * a, Value * b) {
     163    if (fw < 8) report_fatal_error("Unsupported field width: umin " + std::to_string(fw));
    133164    Value * aVec = fwCast(fw, a);
    134165    Value * bVec = fwCast(fw, b);
     
    137168
    138169Value * IDISA_Builder::simd_slli(unsigned fw, Value * a, unsigned shift) {
     170    if (fw < 16) {
     171        Constant * value_mask = Constant::getIntegerValue(getIntNTy(mBitBlockWidth),
     172                                                          APInt::getSplat(mBitBlockWidth, APInt::getLowBitsSet(fw, fw-shift)));
     173        return CreateShl(fwCast(32, simd_and(a, value_mask)), shift);
     174    }
    139175    return CreateShl(fwCast(fw, a), shift);
    140176}
    141177
    142178Value * IDISA_Builder::simd_srli(unsigned fw, Value * a, unsigned shift) {
     179    if (fw < 16) {
     180        Constant * value_mask = Constant::getIntegerValue(getIntNTy(mBitBlockWidth),
     181                                                          APInt::getSplat(mBitBlockWidth, APInt::getHighBitsSet(fw, fw-shift)));
     182        return CreateLShr(fwCast(32, simd_and(a, value_mask)), shift);
     183    }
    143184    return CreateLShr(fwCast(fw, a), shift);
    144185}
    145186
    146187Value * IDISA_Builder::simd_srai(unsigned fw, Value * a, unsigned shift) {
     188    if (fw < 8) report_fatal_error("Unsupported field width: srai " + std::to_string(fw));
    147189    return CreateAShr(fwCast(fw, a), shift);
    148190}
     191   
     192Value * IDISA_Builder::simd_sllv(unsigned fw, Value * v, Value * shifts) {
     193    if (fw >= 8) return CreateShl(fwCast(fw, v), fwCast(fw, shifts));
     194    Value * w = v;
     195    for (unsigned shft_amt = 1; shft_amt < fw; shft_amt *= 2) {
     196        APInt bit_in_field(fw, shft_amt);
     197        // To simulate shift within a fw, we need to mask off the high shft_amt bits of each element.
     198        Constant * value_mask = Constant::getIntegerValue(getIntNTy(mBitBlockWidth),
     199                                                          APInt::getSplat(mBitBlockWidth, APInt::getLowBitsSet(fw, fw-shft_amt)));
     200        Constant * bit_select = Constant::getIntegerValue(getIntNTy(mBitBlockWidth),
     201                                                          APInt::getSplat(mBitBlockWidth, bit_in_field));
     202        Value * unshifted_field_mask = simd_eq(fw, simd_and(bit_select, shifts), allZeroes());
     203        Value * fieldsToShift = simd_and(w, simd_and(value_mask, simd_not(unshifted_field_mask)));
     204        w = simd_or(simd_and(w, unshifted_field_mask), simd_slli(32, fieldsToShift, shft_amt));
     205    }
     206    return w;
     207}
     208
     209Value * IDISA_Builder::simd_srlv(unsigned fw, Value * v, Value * shifts) {
     210    if (fw >= 8) return CreateLShr(fwCast(fw, v), fwCast(fw, shifts));
     211    Value * w = v;
     212    for (unsigned shft_amt = 1; shft_amt < fw; shft_amt *= 2) {
     213        APInt bit_in_field(fw, shft_amt);
     214        // To simulate shift within a fw, we need to mask off the low shft_amt bits of each element.
     215        Constant * value_mask = Constant::getIntegerValue(getIntNTy(mBitBlockWidth),
     216                                                          APInt::getSplat(mBitBlockWidth, APInt::getHighBitsSet(fw, fw-shft_amt)));
     217        Constant * bit_select = Constant::getIntegerValue(getIntNTy(mBitBlockWidth),
     218                                                          APInt::getSplat(mBitBlockWidth, bit_in_field));
     219        Value * unshifted_field_mask = simd_eq(fw, simd_and(bit_select, shifts), allZeroes());
     220        Value * fieldsToShift = simd_and(w, simd_and(value_mask, simd_not(unshifted_field_mask)));
     221        w = simd_or(simd_and(w, unshifted_field_mask), simd_srli(32, fieldsToShift, shft_amt));
     222    }
     223    return w;
     224}
     225
     226Value * IDISA_Builder::simd_pext(unsigned fieldwidth, Value * v, Value * extract_mask) {
     227    Value * delcounts = CreateNot(extract_mask);  // initially deletion counts per 1-bit field
     228    Value * w = simd_and(extract_mask, v);
     229    for (unsigned fw = 2; fw < fieldwidth; fw = fw * 2) {
     230        Value * shift_fwd_field_mask = simd_lomask(fw*2);
     231        Value * shift_back_field_mask = simd_himask(fw*2);
     232        Value * shift_back_count_mask = simd_and(shift_back_field_mask, simd_lomask(fw));
     233        Value * shift_fwd_amts = simd_srli(fw, simd_and(shift_fwd_field_mask, delcounts), fw/2);
     234        Value * shift_back_amts = simd_and(shift_back_count_mask, delcounts);
     235        w = simd_or(simd_sllv(fw, simd_and(w, shift_fwd_field_mask), shift_fwd_amts),
     236                    simd_srlv(fw, simd_and(w, shift_back_field_mask), shift_back_amts));
     237        delcounts = simd_add(fw, simd_and(simd_lomask(fw), delcounts), simd_srli(fw, delcounts, fw/2));
     238    }
     239    // Now shift back all fw fields.
     240    Value * shift_back_amts = simd_and(simd_lomask(fieldwidth), delcounts);
     241    w = simd_srlv(fieldwidth, w, shift_back_amts);
     242    return w;
     243}
     244
     245Value * IDISA_Builder::simd_pdep(unsigned fieldwidth, Value * v, Value * deposit_mask) {
     246    // simd_pdep is implemented by reversing the process of simd_pext.
     247    // First determine the deletion counts necessary for each stage of the process.
     248    std::vector<Value *> delcounts;
     249    delcounts.push_back(simd_not(deposit_mask)); // initially deletion counts per 1-bit field
     250    for (unsigned fw = 2; fw < fieldwidth; fw = fw * 2) {
     251        delcounts.push_back(simd_add(fw, simd_and(simd_lomask(fw), delcounts.back()), simd_srli(fw, delcounts.back(), fw/2)));
     252    }
     253    //
     254    // Now reverse the pext process.  First reverse the final shift_back.
     255    Value * pext_shift_back_amts = simd_and(simd_lomask(fieldwidth), delcounts.back());
     256    Value * w = simd_sllv(fieldwidth, v, pext_shift_back_amts);
     257   
     258    //
     259    // No work through the smaller field widths.
     260    for (unsigned fw = fieldwidth/2; fw >= 2; fw = fw/2) {
     261        delcounts.pop_back();
     262        Value * pext_shift_fwd_field_mask = simd_lomask(fw*2);
     263        Value * pext_shift_back_field_mask = simd_himask(fw*2);
     264        Value * pext_shift_back_count_mask = simd_and(pext_shift_back_field_mask, simd_lomask(fw));
     265        Value * pext_shift_fwd_amts = simd_srli(fw, simd_and(pext_shift_fwd_field_mask, delcounts.back()), fw/2);
     266        Value * pext_shift_back_amts = simd_and(pext_shift_back_count_mask, delcounts.back());
     267        w = simd_or(simd_srlv(fw, simd_and(w, pext_shift_fwd_field_mask), pext_shift_fwd_amts),
     268                    simd_sllv(fw, simd_and(w, pext_shift_back_field_mask), pext_shift_back_amts));
     269    }
     270    return w;
     271}
    149272
    150273Value * IDISA_Builder::simd_cttz(unsigned fw, Value * a) {
     274    if (fw < 8) report_fatal_error("Unsupported field width: cttz " + std::to_string(fw));
    151275    Value * cttzFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::cttz, fwVectorType(fw));
    152276    return CreateCall(cttzFunc, {fwCast(fw, a), ConstantInt::get(getInt1Ty(), 0)});
     
    154278
    155279Value * IDISA_Builder::simd_popcount(unsigned fw, Value * a) {
     280    if (fw < 8) report_fatal_error("Unsupported field width: popcount " + std::to_string(fw));
    156281    Value * ctpopFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, fwVectorType(fw));
    157282    return CreateCall(ctpopFunc, fwCast(fw, a));
     
    191316        return CreateOr(CreateAnd(a1, c), CreateAnd(CreateXor(c, b1), b1));
    192317    } else {
     318        if (fw < 8) report_fatal_error("Unsupported field width: simd_if " + std::to_string(fw));
    193319        Value * aVec = fwCast(fw, a);
    194320        Value * bVec = fwCast(fw, b);
     
    198324   
    199325Value * IDISA_Builder::esimd_mergeh(unsigned fw, Value * a, Value * b) {   
     326    if (fw < 8) report_fatal_error("Unsupported field width: mergeh " + std::to_string(fw));
    200327    const auto field_count = mBitBlockWidth / fw;
    201328    Constant * Idxs[field_count];
     
    208335
    209336Value * IDISA_Builder::esimd_mergel(unsigned fw, Value * a, Value * b) {   
     337    if (fw < 8) report_fatal_error("Unsupported field width: mergel " + std::to_string(fw));
    210338    const auto field_count = mBitBlockWidth / fw;
    211339    Constant * Idxs[field_count];
     
    218346
    219347Value * IDISA_Builder::esimd_bitspread(unsigned fw, Value * bitmask) {
     348    if (fw < 8) report_fatal_error("Unsupported field width: bitspread " + std::to_string(fw));
    220349    const auto field_count = mBitBlockWidth / fw;
    221350    Type * field_type = getIntNTy(fw);
     
    235364
    236365Value * IDISA_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {
     366    if (fw < 8) report_fatal_error("Unsupported field width: packh " + std::to_string(fw));
    237367    Value * aVec = fwCast(fw/2, a);
    238368    Value * bVec = fwCast(fw/2, b);
     
    246376
    247377Value * IDISA_Builder::hsimd_packl(unsigned fw, Value * a, Value * b) {
     378    if (fw < 8) report_fatal_error("Unsupported field width: packl " + std::to_string(fw));
    248379    Value * aVec = fwCast(fw/2, a);
    249380    Value * bVec = fwCast(fw/2, b);
     
    257388
    258389Value * IDISA_Builder::hsimd_packh_in_lanes(unsigned lanes, unsigned fw, Value * a, Value * b) {
     390    if (fw < 16) report_fatal_error("Unsupported field width: packh_in_lanes " + std::to_string(fw));
    259391    const unsigned fw_out = fw / 2;
    260392    const unsigned fields_per_lane = mBitBlockWidth / (fw_out * lanes);
     
    275407
    276408Value * IDISA_Builder::hsimd_packl_in_lanes(unsigned lanes, unsigned fw, Value * a, Value * b) {
     409    if (fw < 16) report_fatal_error("Unsupported field width: packl_in_lanes " + std::to_string(fw));
    277410    const unsigned fw_out = fw / 2;
    278411    const unsigned fields_per_lane = mBitBlockWidth / (fw_out * lanes);
     
    293426
    294427Value * IDISA_Builder::hsimd_signmask(unsigned fw, Value * a) {
     428    if (fw < 8) report_fatal_error("Unsupported field width: hsimd_signmask " + std::to_string(fw));
    295429    Value * mask = CreateICmpSLT(fwCast(fw, a), ConstantAggregateZero::get(fwVectorType(fw)));
    296430    return CreateZExt(CreateBitCast(mask, getIntNTy(mBitBlockWidth/fw)), getInt32Ty());
     
    298432
    299433Value * IDISA_Builder::mvmd_extract(unsigned fw, Value * a, unsigned fieldIndex) {
     434    if (fw < 8) report_fatal_error("Unsupported field width: mvmd_extract " + std::to_string(fw));
    300435    return CreateExtractElement(fwCast(fw, a), getInt32(fieldIndex));
    301436}
    302437
    303438Value * IDISA_Builder::mvmd_insert(unsigned fw, Value * blk, Value * elt, unsigned fieldIndex) {
     439    if (fw < 8) report_fatal_error("Unsupported field width: mvmd_insert " + std::to_string(fw));
    304440    return CreateInsertElement(fwCast(fw, blk), elt, getInt32(fieldIndex));
    305441}
    306442
    307443Value * IDISA_Builder::mvmd_slli(unsigned fw, Value * a, unsigned shift) {
     444    if (fw < 8) report_fatal_error("Unsupported field width: mvmd_slli " + std::to_string(fw));
    308445    const auto field_count = mBitBlockWidth / fw;
    309446    return mvmd_dslli(fw, a, Constant::getNullValue(fwVectorType(fw)), field_count - shift);
     
    311448
    312449Value * IDISA_Builder::mvmd_srli(unsigned fw, Value * a, unsigned shift) {
     450    if (fw < 8) report_fatal_error("Unsupported field width: mvmd_srli " + std::to_string(fw));
    313451    return mvmd_dslli(fw, Constant::getNullValue(fwVectorType(fw)), a, shift);
    314452}
    315453
    316454Value * IDISA_Builder::mvmd_dslli(unsigned fw, Value * a, Value * b, unsigned shift) {
     455    if (fw < 8) report_fatal_error("Unsupported field width: mvmd_dslli " + std::to_string(fw));
    317456    const auto field_count = mBitBlockWidth/fw;
    318457    Constant * Idxs[field_count];
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r5713 r5729  
    9898    virtual llvm::Value * simd_srli(unsigned fw, llvm::Value * a, unsigned shift);
    9999    virtual llvm::Value * simd_srai(unsigned fw, llvm::Value * a, unsigned shift);
     100    virtual llvm::Value * simd_sllv(unsigned fw, llvm::Value * a, llvm::Value * shifts);
     101    virtual llvm::Value * simd_srlv(unsigned fw, llvm::Value * a, llvm::Value * shifts);
     102   
     103    virtual llvm::Value * simd_pext(unsigned fw, llvm::Value * v, llvm::Value * extract_mask);
     104    virtual llvm::Value * simd_pdep(unsigned fw, llvm::Value * v, llvm::Value * deposit_mask);
    100105   
    101106    virtual llvm::Value * simd_cttz(unsigned fw, llvm::Value * a);
Note: See TracChangeset for help on using the changeset viewer.