Ignore:
Timestamp:
Nov 27, 2011, 2:38:59 PM (7 years ago)
Author:
huah
Message:

added support for ARM NEON

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_sse2.cpp

    r1661 r1740  
    106106IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    107107IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
     108IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    108109IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    109 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    110110template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2);
    111111template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2);
     
    519519
    520520//The total number of operations is 1
     521IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
     522{
     523        return _mm_and_si128(arg1, arg2);
     524}
     525
     526//The total number of operations is 1
    521527IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
    522528{
    523529        return _mm_xor_si128(arg1, arg2);
    524 }
    525 
    526 //The total number of operations is 1
    527 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
    528 {
    529         return _mm_and_si128(arg1, arg2);
    530530}
    531531
     
    573573}
    574574
    575 //The total number of operations is 65
     575//The total number of operations is 64
    576576template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::max(bitblock128_t arg1, bitblock128_t arg2)
    577577{
    578         bitblock128_t hiAns = simd128<(64)>::max(arg1, arg2);
    579         bitblock128_t loAns = simd128<(64)>::umax(arg1, arg2);
    580         bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg1));
    581         bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg2));
    582         return simd128<1>::ifh(simd128<128>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2));
     578        return simd128<1>::ifh(simd128<128>::gt(arg1, arg2), arg1, arg2);
    583579}
    584580
     
    708704}
    709705
    710 //The total number of operations is 66
     706//The total number of operations is 61
    711707template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::gt(bitblock128_t arg1, bitblock128_t arg2)
    712708{
     
    825821template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ult(bitblock128_t arg1, bitblock128_t arg2)
    826822{
    827         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     823        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    828824        return simd128<32>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    829825}
     
    838834}
    839835
    840 //The total number of operations is 55
     836//The total number of operations is 50
    841837template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ult(bitblock128_t arg1, bitblock128_t arg2)
    842838{
     
    887883template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lt(bitblock128_t arg1, bitblock128_t arg2)
    888884{
    889         bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL)>();
     885        bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>();
    890886        return simd128<64>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    891887}
    892888
    893 //The total number of operations is 75
     889//The total number of operations is 70
    894890template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lt(bitblock128_t arg1, bitblock128_t arg2)
    895891{
     
    10421038template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10431039{
    1044         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1040        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    10451041        return simd128<32>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    10461042}
     
    10551051}
    10561052
    1057 //The total number of operations is 51
     1053//The total number of operations is 46
    10581054template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10591055{
     
    14841480}
    14851481
    1486 //The total number of operations is 65
     1482//The total number of operations is 64
    14871483template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::min(bitblock128_t arg1, bitblock128_t arg2)
    14881484{
    1489         bitblock128_t hiAns = simd128<(64)>::min(arg1, arg2);
    1490         bitblock128_t loAns = simd128<(64)>::umin(arg1, arg2);
    1491         bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg1));
    1492         bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg2));
    1493         return simd128<1>::ifh(simd128<128>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2));
     1485        return simd128<1>::ifh(simd128<128>::gt(arg1, arg2), arg2, arg1);
    14941486}
    14951487
     
    15701562template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umin(bitblock128_t arg1, bitblock128_t arg2)
    15711563{
    1572         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1564        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    15731565        return simd_xor(simd128<32>::min(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    15741566}
     
    17321724template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
    17331725{
    1734         bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
    1735         return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd128<64>::constant<1>()), tmp)));
    1736 }
    1737 
    1738 //The total number of operations is 21
     1726        return simd_or(simd_and(simd128<64>::himask(), simd128<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd128<64>::srli<sh>(arg1) : simd128<(32)>::srai<(sh-(32))>(simd128<64>::srli<(32)>(arg1))));
     1727}
     1728
     1729//The total number of operations is 16
    17391730template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
    17401731{
    1741         bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
    1742         return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd128<128>::constant<1>()), tmp)));
     1732        return simd_or(simd_and(simd128<128>::himask(), simd128<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd128<128>::srli<sh>(arg1) : simd128<(64)>::srai<(sh-(64))>(simd128<128>::srli<(64)>(arg1))));
    17431733}
    17441734
     
    18711861template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2)
    18721862{
    1873         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1863        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    18741864        return simd_xor(simd128<32>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    18751865}
     
    20212011}
    20222012
    2023 //The total number of operations is 288
     2013//The total number of operations is 268
    20242014template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packss(bitblock128_t arg1, bitblock128_t arg2)
    20252015{
     
    20492039}
    20502040
    2051 //The total number of operations is 25
     2041//The total number of operations is 8
    20522042template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<32>::signmask(bitblock128_t arg1)
    20532043{
    2054         return hsimd128<(16)>::signmask(hsimd128<32>::packh(simd128<32>::constant<0>(), arg1));
    2055 }
    2056 
    2057 //The total number of operations is 32
     2044        return (((((mvmd128<32>::extract<3>(arg1)>>28)&8)|((mvmd128<32>::extract<2>(arg1)>>29)&4))|((mvmd128<32>::extract<1>(arg1)>>30)&2))|(mvmd128<32>::extract<0>(arg1)>>31));
     2045}
     2046
     2047//The total number of operations is 8
    20582048template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<64>::signmask(bitblock128_t arg1)
    20592049{
    2060         return hsimd128<(32)>::signmask(hsimd128<64>::packh(simd128<64>::constant<0>(), arg1));
    2061 }
    2062 
    2063 //The total number of operations is 39
     2050        return (((mvmd128<64>::extract<1>(arg1)>>62)&2)|(mvmd128<64>::extract<0>(arg1)>>63));
     2051}
     2052
     2053//The total number of operations is 15
    20642054template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<128>::signmask(bitblock128_t arg1)
    20652055{
     
    24572447}
    24582448
    2459 //The total number of operations is 21
     2449//The total number of operations is 16
    24602450template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendh(bitblock128_t arg1)
    24612451{
     
    24992489}
    25002490
    2501 //The total number of operations is 25
     2491//The total number of operations is 20
    25022492template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendl(bitblock128_t arg1)
    25032493{
     
    27902780template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::srli(bitblock128_t arg1)
    27912781{
    2792         return mvmd128<(2)>::srli<(sh*2)>(arg1);
     2782        return simd128<128>::srli<(sh*4)>(arg1);
    27932783}
    27942784
Note: See TracChangeset for help on using the changeset viewer.