Changeset 6110


Ignore:
Timestamp:
Jun 23, 2018, 4:11:04 PM (3 weeks ago)
Author:
cameron
Message:

Prefer simd_select_hi/select_lo in place of simd_himask/lomask for arbitrary vector width

Location:
icGREP/icgrep-devel/icgrep/IR_Gen
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r6109 r6110  
    175175        Value * eq_bits = simd_not(simd_xor(a, b));
    176176        if (fw == 1) return eq_bits;
    177         eq_bits = simd_or(simd_and(simd_srli(32, simd_and(simd_himask(2), eq_bits), 1), eq_bits),
    178                           simd_and(simd_slli(32, simd_and(simd_lomask(2), eq_bits), 1), eq_bits));
     177        eq_bits = simd_or(simd_and(simd_srli(32, simd_select_hi(2, eq_bits), 1), eq_bits),
     178                          simd_and(simd_slli(32, simd_select_lo(2, eq_bits), 1), eq_bits));
    179179        if (fw == 2) return eq_bits;
    180         eq_bits = simd_or(simd_and(simd_srli(32, simd_and(simd_himask(4), eq_bits), 2), eq_bits),
    181                           simd_and(simd_slli(32, simd_and(simd_lomask(4), eq_bits), 2), eq_bits));
     180        eq_bits = simd_or(simd_and(simd_srli(32, simd_select_hi(4, eq_bits), 2), eq_bits),
     181                          simd_and(simd_slli(32, simd_select_lo(4, eq_bits), 2), eq_bits));
    182182        return eq_bits;
    183183    }
     
    215215    if (fw == 1) return simd_or(simd_not(a), b);
    216216    if (fw < 8) {
    217         Value * hi_rslt = simd_and(simd_himask(2*fw), simd_ule(2*fw, simd_and(simd_himask(2*fw), a), b));
    218         Value * lo_rslt = simd_and(simd_lomask(2*fw), simd_ule(2*fw, simd_and(simd_lomask(2*fw), a), simd_and(simd_lomask(2*fw), b)));
     217        Value * hi_rslt = simd_select_hi(2*fw, simd_ule(2*fw, simd_select_hi(2*fw, a), b));
     218        Value * lo_rslt = simd_select_lo(2*fw, simd_ule(2*fw, simd_select_lo(2*fw, a), simd_select_lo(2*fw, b)));
    219219        return simd_or(hi_rslt, lo_rslt);
    220220    }
     
    225225    if (fw == 1) return simd_or(a, simd_not(b));
    226226    if (fw < 8) {
    227         Value * hi_rslt = simd_and(simd_himask(2*fw), simd_uge(2*fw, a, simd_and(simd_himask(2*fw), b)));
    228         Value * lo_rslt = simd_and(simd_lomask(2*fw), simd_uge(2*fw, simd_and(simd_lomask(2*fw), a), simd_and(simd_lomask(2*fw), b)));
     227        Value * hi_rslt = simd_select_hi(2*fw, simd_uge(2*fw, a, simd_select_hi(2*fw, b)));
     228        Value * lo_rslt = simd_select_lo(2*fw, simd_uge(2*fw, simd_select_lo(2*fw, a), simd_select_lo(2*fw, b)));
    229229        return simd_or(hi_rslt, lo_rslt);
    230230    }
     
    243243    if (fw == 1) return simd_or(a, b);
    244244    if (fw < 8) {
    245         Value * hi_rslt = simd_and(simd_himask(2*fw), simd_umax(2*fw, a, b));
    246         Value * lo_rslt = simd_umax(2*fw, simd_and(simd_lomask(2*fw), a), simd_and(simd_lomask(2*fw), b));
     245        Value * hi_rslt = simd_select_hi(2*fw, simd_umax(2*fw, a, b));
     246        Value * lo_rslt = simd_umax(2*fw, simd_select_lo(2*fw, a), simd_select_lo(2*fw, b));
    247247        return simd_or(hi_rslt, lo_rslt);
    248248    }
     
    262262    if (fw == 1) return simd_and(a, b);
    263263    if (fw < 8) {
    264         Value * hi_rslt = simd_and(simd_himask(2*fw), simd_umin(2*fw, a, b));
    265         Value * lo_rslt = simd_umin(2*fw, simd_and(simd_lomask(2*fw), a), simd_and(simd_lomask(2*fw), b));
     264        Value * hi_rslt = simd_select_hi(2*fw, simd_umin(2*fw, a, b));
     265        Value * lo_rslt = simd_umin(2*fw, simd_select_lo(2*fw, a), simd_select_lo(2*fw, b));
    266266        return simd_or(hi_rslt, lo_rslt);
    267267    }
     
    374374    Value * w = simd_and(extract_mask, v);
    375375    for (unsigned fw = 2; fw < fieldwidth; fw = fw * 2) {
    376         Value * shift_fwd_field_mask = simd_lomask(fw*2);
    377         Value * shift_back_field_mask = simd_himask(fw*2);
    378         Value * shift_back_count_mask = simd_and(shift_back_field_mask, simd_lomask(fw));
    379         Value * shift_fwd_amts = simd_srli(fw, simd_and(shift_fwd_field_mask, delcounts), fw/2);
    380         Value * shift_back_amts = simd_and(shift_back_count_mask, delcounts);
    381         w = simd_or(simd_sllv(fw, simd_and(w, shift_fwd_field_mask), shift_fwd_amts),
    382                     simd_srlv(fw, simd_and(w, shift_back_field_mask), shift_back_amts));
    383         delcounts = simd_add(fw, simd_and(simd_lomask(fw), delcounts), simd_srli(fw, delcounts, fw/2));
     376        Value * shift_fwd_amts = simd_srli(fw, simd_select_lo(fw*2, delcounts), fw/2);
     377        Value * shift_back_amts = simd_select_lo(fw, simd_select_hi(fw*2, delcounts));
     378      w = simd_or(simd_sllv(fw, simd_select_lo(fw*2, w), shift_fwd_amts),
     379                    simd_srlv(fw, simd_select_hi(fw*2, w), shift_back_amts));
     380        delcounts = simd_add(fw, simd_select_lo(fw, delcounts), simd_srli(fw, delcounts, fw/2));
    384381    }
    385382    // Now shift back all fw fields.
    386     Value * shift_back_amts = simd_and(simd_lomask(fieldwidth), delcounts);
     383    Value * shift_back_amts = simd_select_lo(fieldwidth, delcounts);
    387384    w = simd_srlv(fieldwidth, w, shift_back_amts);
    388385    return w;
     
    395392    delcounts.push_back(simd_not(deposit_mask)); // initially deletion counts per 1-bit field
    396393    for (unsigned fw = 2; fw < fieldwidth; fw = fw * 2) {
    397         delcounts.push_back(simd_add(fw, simd_and(simd_lomask(fw), delcounts.back()), simd_srli(fw, delcounts.back(), fw/2)));
     394        delcounts.push_back(simd_add(fw, simd_select_lo(fw, delcounts.back()), simd_srli(fw, delcounts.back(), fw/2)));
    398395    }
    399396    //
    400397    // Now reverse the pext process.  First reverse the final shift_back.
    401     Value * pext_shift_back_amts = simd_and(simd_lomask(fieldwidth), delcounts.back());
     398    Value * pext_shift_back_amts = simd_select_lo(fieldwidth, delcounts.back());
    402399    Value * w = simd_sllv(fieldwidth, v, pext_shift_back_amts);
    403400    //
     
    405402    for (unsigned fw = fieldwidth/2; fw >= 2; fw = fw/2) {
    406403        delcounts.pop_back();
    407         Value * pext_shift_fwd_field_mask = simd_lomask(fw*2);
    408         Value * pext_shift_back_field_mask = simd_himask(fw*2);
    409         Value * pext_shift_back_count_mask = simd_and(pext_shift_back_field_mask, simd_lomask(fw));
    410         Value * pext_shift_fwd_amts = simd_srli(fw, simd_and(pext_shift_fwd_field_mask, delcounts.back()), fw/2);
    411         Value * pext_shift_back_amts = simd_and(pext_shift_back_count_mask, delcounts.back());
    412         w = simd_or(simd_srlv(fw, simd_and(w, pext_shift_fwd_field_mask), pext_shift_fwd_amts),
    413                     simd_sllv(fw, simd_and(w, pext_shift_back_field_mask), pext_shift_back_amts));
     404        Value * pext_shift_fwd_amts = simd_srli(fw, simd_select_lo(fw * 2, delcounts.back()), fw/2);
     405        Value * pext_shift_back_amts = simd_select_lo(fw, simd_select_hi(fw*2, delcounts.back()));
     406        w = simd_or(simd_srlv(fw, simd_select_lo(fw * 2, w), pext_shift_fwd_amts),
     407                    simd_sllv(fw, simd_select_hi(fw * 2, w), pext_shift_back_amts));
    414408    }
    415409    return simd_and(w, deposit_mask);
     
    426420        // case 10:  ab - 0a = 10 - 01 = 01 (no borrow)
    427421        // case 11:  ab - 0a = 11 - 01 = 10
    428         return simd_sub(64, a, simd_srli(64, simd_and(simd_himask(2), a), 1));
     422        return simd_sub(64, a, simd_srli(64, simd_select_hi(2, a), 1));
    429423    } else if (fw <= 8) {
    430424        Value * c = simd_popcount(fw/2, a);
    431         c = simd_add(64, simd_and(c, simd_lomask(fw)), simd_srli(fw, c, fw/2));
     425        c = simd_add(64, simd_select_lo(fw, c), simd_srli(fw, c, fw/2));
    432426        return c;
    433427    } else {
     
    458452            a = simd_bitreverse(fw/2, a);
    459453        }
    460         return simd_or(simd_srli(16, simd_and(a, simd_himask(fw)), fw/2), simd_slli(16, simd_and(a, simd_lomask(fw)), fw/2));
     454        return simd_or(simd_srli(16, simd_select_hi(fw, a), fw/2), simd_slli(16, simd_select_lo(fw, a), fw/2));
    461455    }
    462456}
     
    557551        Value * bLo = simd_srli(fw_wkg, b, fw/2);
    558552        return hsimd_packl(fw*2,
    559                            bitCast(simd_or(simd_and(simd_himask(fw), aLo), simd_and(simd_lomask(fw), a))),
    560                            bitCast(simd_or(simd_and(simd_himask(fw), bLo), simd_and(simd_lomask(fw), b))));
     553                           bitCast(simd_or(simd_select_hi(fw, aLo), simd_select_lo(fw, a))),
     554                           bitCast(simd_or(simd_select_hi(fw, bLo), simd_select_lo(fw, b))));
    561555    }
    562556    Value * aVec = fwCast(fw/2, a);
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.cpp

    r6108 r6110  
    217217        Value * high_bits = simd_slli(16, mvmd_shuffle(8, interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))), fw);
    218218        // For each 16-bit field, interleave the low bits of the two bytes.
    219         low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8-fw));
     219        low_bits = simd_or(simd_select_lo(16, low_bits), simd_srli(16, low_bits, 8-fw));
    220220        // For each 16-bit field, interleave the high bits of the two bytes.
    221         high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8-fw));
     221        high_bits = simd_or(simd_select_hi(16, high_bits), simd_slli(16, high_bits, 8-fw));
    222222        return simd_or(low_bits, high_bits);
    223223    }
     
    234234        Value * high_bits = simd_slli(16, mvmd_shuffle(8, interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))), fw);
    235235        // For each 16-bit field, interleave the low bits of the two bytes.
    236         low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8-fw));
     236        low_bits = simd_or(simd_select_lo(16, low_bits), simd_srli(16, low_bits, 8-fw));
    237237        // For each 16-bit field, interleave the high bits of the two bytes.
    238         high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8-fw));
     238        high_bits = simd_or(simd_select_hi(16, high_bits), simd_slli(16, high_bits, 8-fw));
    239239        return simd_or(low_bits, high_bits);
    240240    }
Note: See TracChangeset for help on using the changeset viewer.