Ignore:
Timestamp:
Nov 27, 2011, 2:38:59 PM (7 years ago)
Author:
huah
Message:

added support for ARM NEON

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_ssse3.cpp

    r1661 r1740  
    107107IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    108108IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
     109IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    109110IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    110 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    111111template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2);
    112112template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2);
     
    524524
    525525//The total number of operations is 1
     526IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
     527{
     528        return _mm_and_si128(arg1, arg2);
     529}
     530
     531//The total number of operations is 1
    526532IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
    527533{
    528534        return _mm_xor_si128(arg1, arg2);
    529 }
    530 
    531 //The total number of operations is 1
    532 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
    533 {
    534         return _mm_and_si128(arg1, arg2);
    535535}
    536536
     
    578578}
    579579
    580 //The total number of operations is 65
     580//The total number of operations is 64
    581581template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::max(bitblock128_t arg1, bitblock128_t arg2)
    582582{
    583         bitblock128_t hiAns = simd128<(64)>::max(arg1, arg2);
    584         bitblock128_t loAns = simd128<(64)>::umax(arg1, arg2);
    585         bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg1));
    586         bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg2));
    587         return simd128<1>::ifh(simd128<128>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2));
     583        return simd128<1>::ifh(simd128<128>::gt(arg1, arg2), arg1, arg2);
    588584}
    589585
     
    713709}
    714710
    715 //The total number of operations is 66
     711//The total number of operations is 61
    716712template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::gt(bitblock128_t arg1, bitblock128_t arg2)
    717713{
     
    830826template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ult(bitblock128_t arg1, bitblock128_t arg2)
    831827{
    832         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     828        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    833829        return simd128<32>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    834830}
     
    843839}
    844840
    845 //The total number of operations is 55
     841//The total number of operations is 50
    846842template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ult(bitblock128_t arg1, bitblock128_t arg2)
    847843{
     
    892888template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lt(bitblock128_t arg1, bitblock128_t arg2)
    893889{
    894         bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL)>();
     890        bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>();
    895891        return simd128<64>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    896892}
    897893
    898 //The total number of operations is 75
     894//The total number of operations is 70
    899895template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lt(bitblock128_t arg1, bitblock128_t arg2)
    900896{
     
    10471043template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10481044{
    1049         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1045        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    10501046        return simd128<32>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    10511047}
     
    10601056}
    10611057
    1062 //The total number of operations is 51
     1058//The total number of operations is 46
    10631059template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10641060{
     
    14891485}
    14901486
    1491 //The total number of operations is 65
     1487//The total number of operations is 64
    14921488template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::min(bitblock128_t arg1, bitblock128_t arg2)
    14931489{
    1494         bitblock128_t hiAns = simd128<(64)>::min(arg1, arg2);
    1495         bitblock128_t loAns = simd128<(64)>::umin(arg1, arg2);
    1496         bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg1));
    1497         bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg2));
    1498         return simd128<1>::ifh(simd128<128>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2));
     1490        return simd128<1>::ifh(simd128<128>::gt(arg1, arg2), arg2, arg1);
    14991491}
    15001492
     
    15751567template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umin(bitblock128_t arg1, bitblock128_t arg2)
    15761568{
    1577         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1569        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    15781570        return simd_xor(simd128<32>::min(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    15791571}
     
    17341726template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
    17351727{
    1736         bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
    1737         return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd128<64>::constant<1>()), tmp)));
    1738 }
    1739 
    1740 //The total number of operations is 21
     1728        return simd_or(simd_and(simd128<64>::himask(), simd128<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd128<64>::srli<sh>(arg1) : simd128<(32)>::srai<(sh-(32))>(simd128<64>::srli<(32)>(arg1))));
     1729}
     1730
     1731//The total number of operations is 16
    17411732template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
    17421733{
    1743         bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
    1744         return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd128<128>::constant<1>()), tmp)));
     1734        return simd_or(simd_and(simd128<128>::himask(), simd128<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd128<128>::srli<sh>(arg1) : simd128<(64)>::srai<(sh-(64))>(simd128<128>::srli<(64)>(arg1))));
    17451735}
    17461736
     
    18731863template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2)
    18741864{
    1875         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1865        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    18761866        return simd_xor(simd128<32>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    18771867}
     
    20232013}
    20242014
    2025 //The total number of operations is 288
     2015//The total number of operations is 268
    20262016template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packss(bitblock128_t arg1, bitblock128_t arg2)
    20272017{
     
    20572047}
    20582048
    2059 //The total number of operations is 10
     2049//The total number of operations is 8
    20602050template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<64>::signmask(bitblock128_t arg1)
    20612051{
    2062         return hsimd128<(32)>::signmask(hsimd128<64>::packh(simd128<64>::constant<0>(), arg1));
    2063 }
    2064 
    2065 //The total number of operations is 17
     2052        return (((mvmd128<64>::extract<1>(arg1)>>62)&2)|(mvmd128<64>::extract<0>(arg1)>>63));
     2053}
     2054
     2055//The total number of operations is 15
    20662056template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<128>::signmask(bitblock128_t arg1)
    20672057{
     
    24562446}
    24572447
    2458 //The total number of operations is 21
     2448//The total number of operations is 16
    24592449template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendh(bitblock128_t arg1)
    24602450{
     
    24982488}
    24992489
    2500 //The total number of operations is 25
     2490//The total number of operations is 20
    25012491template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendl(bitblock128_t arg1)
    25022492{
     
    28252815template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::srli(bitblock128_t arg1)
    28262816{
    2827         return mvmd128<(2)>::srli<(sh*2)>(arg1);
     2817        return simd128<128>::srli<(sh*4)>(arg1);
    28282818}
    28292819
     
    28432833template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::srli(bitblock128_t arg1)
    28442834{
    2845         return mvmd128<(16)>::srli<(sh*2)>(arg1);
     2835        return simd128<128>::srli<(sh*32)>(arg1);
    28462836}
    28472837
     
    28552845template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::srli(bitblock128_t arg1)
    28562846{
    2857         return mvmd128<(64)>::srli<(sh*2)>(arg1);
     2847        return simd128<128>::srli<(sh*128)>(arg1);
    28582848}
    28592849
Note: See TracChangeset for help on using the changeset viewer.