Ignore:
Timestamp:
Nov 27, 2011, 2:38:59 PM (7 years ago)
Author:
huah
Message:

added support for ARM NEON

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_sse4_2.cpp

    r1661 r1740  
    107107IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    108108IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
     109IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    109110IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    110 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    111111template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2);
    112112template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2);
     
    524524
    525525//The total number of operations is 1
     526IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
     527{
     528        return _mm_and_si128(arg1, arg2);
     529}
     530
     531//The total number of operations is 1
    526532IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
    527533{
    528534        return _mm_xor_si128(arg1, arg2);
    529 }
    530 
    531 //The total number of operations is 1
    532 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
    533 {
    534         return _mm_and_si128(arg1, arg2);
    535535}
    536536
     
    706706}
    707707
    708 //The total number of operations is 37
     708//The total number of operations is 32
    709709template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::gt(bitblock128_t arg1, bitblock128_t arg2)
    710710{
     
    823823template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ult(bitblock128_t arg1, bitblock128_t arg2)
    824824{
    825         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     825        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    826826        return simd128<32>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    827827}
     
    830830template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::ult(bitblock128_t arg1, bitblock128_t arg2)
    831831{
    832         bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL)>();
     832        bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>();
    833833        return simd128<64>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    834834}
    835835
    836 //The total number of operations is 40
     836//The total number of operations is 35
    837837template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ult(bitblock128_t arg1, bitblock128_t arg2)
    838838{
     
    889889}
    890890
    891 //The total number of operations is 45
     891//The total number of operations is 40
    892892template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lt(bitblock128_t arg1, bitblock128_t arg2)
    893893{
     
    10401040template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10411041{
    1042         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1042        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    10431043        return simd128<32>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    10441044}
     
    10471047template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10481048{
    1049         bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL)>();
     1049        bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>();
    10501050        return simd128<64>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    10511051}
    10521052
    1053 //The total number of operations is 36
     1053//The total number of operations is 31
    10541054template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10551055{
     
    15711571template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umin(bitblock128_t arg1, bitblock128_t arg2)
    15721572{
    1573         bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL)>();
     1573        bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>();
    15741574        return simd_xor(simd128<64>::min(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    15751575}
     
    17181718template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
    17191719{
    1720         bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
    1721         return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd128<64>::constant<1>()), tmp)));
    1722 }
    1723 
    1724 //The total number of operations is 21
     1720        return simd_or(simd_and(simd128<64>::himask(), simd128<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd128<64>::srli<sh>(arg1) : simd128<(32)>::srai<(sh-(32))>(simd128<64>::srli<(32)>(arg1))));
     1721}
     1722
     1723//The total number of operations is 16
    17251724template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
    17261725{
    1727         bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
    1728         return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd128<128>::constant<1>()), tmp)));
     1726        return simd_or(simd_and(simd128<128>::himask(), simd128<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd128<128>::srli<sh>(arg1) : simd128<(64)>::srai<(sh-(64))>(simd128<128>::srli<(64)>(arg1))));
    17291727}
    17301728
     
    18621860template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2)
    18631861{
    1864         bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL)>();
     1862        bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>();
    18651863        return simd_xor(simd128<64>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    18661864}
     
    20032001}
    20042002
    2005 //The total number of operations is 172
     2003//The total number of operations is 152
    20062004template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packss(bitblock128_t arg1, bitblock128_t arg2)
    20072005{
     
    20312029}
    20322030
    2033 //The total number of operations is 7
     2031//The total number of operations is 4
    20342032template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<32>::signmask(bitblock128_t arg1)
    20352033{
    2036         return hsimd128<(16)>::signmask(hsimd128<32>::packh(simd128<32>::constant<0>(), arg1));
    2037 }
    2038 
    2039 //The total number of operations is 10
     2034        return (((((mvmd128<32>::extract<3>(arg1)>>28)&8)|((mvmd128<32>::extract<2>(arg1)>>29)&4))|((mvmd128<32>::extract<1>(arg1)>>30)&2))|(mvmd128<32>::extract<0>(arg1)>>31));
     2035}
     2036
     2037//The total number of operations is 4
    20402038template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<64>::signmask(bitblock128_t arg1)
    20412039{
    2042         return hsimd128<(32)>::signmask(hsimd128<64>::packh(simd128<64>::constant<0>(), arg1));
    2043 }
    2044 
    2045 //The total number of operations is 17
     2040        return (((mvmd128<64>::extract<1>(arg1)>>62)&2)|(mvmd128<64>::extract<0>(arg1)>>63));
     2041}
     2042
     2043//The total number of operations is 11
    20462044template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<128>::signmask(bitblock128_t arg1)
    20472045{
     
    24352433}
    24362434
    2437 //The total number of operations is 21
     2435//The total number of operations is 16
    24382436template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendh(bitblock128_t arg1)
    24392437{
     
    24772475}
    24782476
    2479 //The total number of operations is 25
     2477//The total number of operations is 20
    24802478template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendl(bitblock128_t arg1)
    24812479{
     
    26722670template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<32>::extract(bitblock128_t arg1)
    26732671{
    2674         return (((uint64_t)((4294967296UL)-1))&_mm_extract_epi32(arg1, (int32_t)(pos)));
     2672        return (((uint64_t)((4294967296ULL)-1))&_mm_extract_epi32(arg1, (int32_t)(pos)));
    26752673}
    26762674
     
    28042802template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::srli(bitblock128_t arg1)
    28052803{
    2806         return mvmd128<(2)>::srli<(sh*2)>(arg1);
     2804        return simd128<128>::srli<(sh*4)>(arg1);
    28072805}
    28082806
     
    28222820template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::srli(bitblock128_t arg1)
    28232821{
    2824         return mvmd128<(16)>::srli<(sh*2)>(arg1);
     2822        return simd128<128>::srli<(sh*32)>(arg1);
    28252823}
    28262824
     
    28342832template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::srli(bitblock128_t arg1)
    28352833{
    2836         return mvmd128<(64)>::srli<(sh*2)>(arg1);
     2834        return simd128<128>::srli<(sh*128)>(arg1);
    28372835}
    28382836
Note: See TracChangeset for help on using the changeset viewer.