Ignore:
Timestamp:
Nov 27, 2011, 2:38:59 PM (7 years ago)
Author:
huah
Message:

added support for ARM NEON

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_sse4_1.cpp

    r1661 r1740  
    107107IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    108108IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
     109IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    109110IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    110 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    111111template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2);
    112112template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2);
     
    524524
    525525//The total number of operations is 1
     526IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
     527{
     528        return _mm_and_si128(arg1, arg2);
     529}
     530
     531//The total number of operations is 1
    526532IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
    527533{
    528534        return _mm_xor_si128(arg1, arg2);
    529 }
    530 
    531 //The total number of operations is 1
    532 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
    533 {
    534         return _mm_and_si128(arg1, arg2);
    535535}
    536536
     
    714714}
    715715
    716 //The total number of operations is 62
     716//The total number of operations is 57
    717717template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::gt(bitblock128_t arg1, bitblock128_t arg2)
    718718{
     
    831831template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ult(bitblock128_t arg1, bitblock128_t arg2)
    832832{
    833         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     833        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    834834        return simd128<32>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    835835}
     
    841841}
    842842
    843 //The total number of operations is 48
     843//The total number of operations is 43
    844844template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ult(bitblock128_t arg1, bitblock128_t arg2)
    845845{
     
    893893template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lt(bitblock128_t arg1, bitblock128_t arg2)
    894894{
    895         bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL)>();
     895        bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>();
    896896        return simd128<64>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    897897}
    898898
    899 //The total number of operations is 65
     899//The total number of operations is 60
    900900template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lt(bitblock128_t arg1, bitblock128_t arg2)
    901901{
     
    10481048template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10491049{
    1050         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1050        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    10511051        return simd128<32>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    10521052}
     
    10611061}
    10621062
    1063 //The total number of operations is 47
     1063//The total number of operations is 42
    10641064template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10651065{
     
    17341734template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
    17351735{
    1736         bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
    1737         return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd128<64>::constant<1>()), tmp)));
    1738 }
    1739 
    1740 //The total number of operations is 21
     1736        return simd_or(simd_and(simd128<64>::himask(), simd128<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd128<64>::srli<sh>(arg1) : simd128<(32)>::srai<(sh-(32))>(simd128<64>::srli<(32)>(arg1))));
     1737}
     1738
     1739//The total number of operations is 16
    17411740template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
    17421741{
    1743         bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
    1744         return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd128<128>::constant<1>()), tmp)));
     1742        return simd_or(simd_and(simd128<128>::himask(), simd128<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd128<128>::srli<sh>(arg1) : simd128<(64)>::srai<(sh-(64))>(simd128<128>::srli<(64)>(arg1))));
    17451743}
    17461744
     
    20212019}
    20222020
    2023 //The total number of operations is 272
     2021//The total number of operations is 252
    20242022template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packss(bitblock128_t arg1, bitblock128_t arg2)
    20252023{
     
    20492047}
    20502048
    2051 //The total number of operations is 7
     2049//The total number of operations is 4
    20522050template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<32>::signmask(bitblock128_t arg1)
    20532051{
    2054         return hsimd128<(16)>::signmask(hsimd128<32>::packh(simd128<32>::constant<0>(), arg1));
    2055 }
    2056 
    2057 //The total number of operations is 10
     2052        return (((((mvmd128<32>::extract<3>(arg1)>>28)&8)|((mvmd128<32>::extract<2>(arg1)>>29)&4))|((mvmd128<32>::extract<1>(arg1)>>30)&2))|(mvmd128<32>::extract<0>(arg1)>>31));
     2053}
     2054
     2055//The total number of operations is 4
    20582056template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<64>::signmask(bitblock128_t arg1)
    20592057{
    2060         return hsimd128<(32)>::signmask(hsimd128<64>::packh(simd128<64>::constant<0>(), arg1));
    2061 }
    2062 
    2063 //The total number of operations is 17
     2058        return (((mvmd128<64>::extract<1>(arg1)>>62)&2)|(mvmd128<64>::extract<0>(arg1)>>63));
     2059}
     2060
     2061//The total number of operations is 11
    20642062template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<128>::signmask(bitblock128_t arg1)
    20652063{
     
    24532451}
    24542452
    2455 //The total number of operations is 21
     2453//The total number of operations is 16
    24562454template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendh(bitblock128_t arg1)
    24572455{
     
    24952493}
    24962494
    2497 //The total number of operations is 25
     2495//The total number of operations is 20
    24982496template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendl(bitblock128_t arg1)
    24992497{
     
    26902688template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<32>::extract(bitblock128_t arg1)
    26912689{
    2692         return (((uint64_t)((4294967296UL)-1))&_mm_extract_epi32(arg1, (int32_t)(pos)));
     2690        return (((uint64_t)((4294967296ULL)-1))&_mm_extract_epi32(arg1, (int32_t)(pos)));
    26932691}
    26942692
     
    28222820template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::srli(bitblock128_t arg1)
    28232821{
    2824         return mvmd128<(2)>::srli<(sh*2)>(arg1);
     2822        return simd128<128>::srli<(sh*4)>(arg1);
    28252823}
    28262824
     
    28402838template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::srli(bitblock128_t arg1)
    28412839{
    2842         return mvmd128<(16)>::srli<(sh*2)>(arg1);
     2840        return simd128<128>::srli<(sh*32)>(arg1);
    28432841}
    28442842
     
    28522850template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::srli(bitblock128_t arg1)
    28532851{
    2854         return mvmd128<(64)>::srli<(sh*2)>(arg1);
     2852        return simd128<128>::srli<(sh*128)>(arg1);
    28552853}
    28562854
Note: See TracChangeset for help on using the changeset viewer.