Changeset 1570


Ignore:
Timestamp:
Oct 22, 2011, 6:22:11 PM (6 years ago)
Author:
cameron
Message:

bitblock::(load,store) (aligned,unaligned); <1>::add, sub

Location:
trunk
Files:
18 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_avx.cpp

    r1557 r1570  
    5858public:
    5959        static IDISA_ALWAYS_INLINE bitblock256_t mergel(bitblock256_t arg1, bitblock256_t arg2);
     60        static IDISA_ALWAYS_INLINE bitblock256_t signextendh(bitblock256_t arg1);
    6061        static IDISA_ALWAYS_INLINE bitblock256_t mergeh(bitblock256_t arg1, bitblock256_t arg2);
    6162        static IDISA_ALWAYS_INLINE bitblock256_t zeroextendh(bitblock256_t arg1);
    6263        static IDISA_ALWAYS_INLINE bitblock256_t zeroextendl(bitblock256_t arg1);
    63         static IDISA_ALWAYS_INLINE bitblock256_t signextendh(bitblock256_t arg1);
    6464        static IDISA_ALWAYS_INLINE bitblock256_t signextendl(bitblock256_t arg1);
    6565};
     
    8585{
    8686public:
     87        static IDISA_ALWAYS_INLINE bitblock256_t load_unaligned(bitblock256_t* arg1);
     88        static IDISA_ALWAYS_INLINE void store_aligned(bitblock256_t* arg1, bitblock256_t arg2);
    8789        static IDISA_ALWAYS_INLINE bool all(bitblock256_t arg1);
    8890        static IDISA_ALWAYS_INLINE bool any(bitblock256_t arg1);
    8991        static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock256_t arg1);
     92        static IDISA_ALWAYS_INLINE bitblock256_t load_aligned(bitblock256_t* arg1);
     93        static IDISA_ALWAYS_INLINE void store_unaligned(bitblock256_t* arg1, bitblock256_t arg2);
    9094};
    9195
     
    875879}
    876880
    877 //The total number of operations is 186
     881//The total number of operations is 182
    878882template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ult(bitblock256_t arg1, bitblock256_t arg2)
    879883{
     
    881885}
    882886
    883 //The total number of operations is 90
     887//The total number of operations is 88
    884888template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ult(bitblock256_t arg1, bitblock256_t arg2)
    885889{
    886         bitblock256_t tmp = simd_not(arg1);
    887         bitblock256_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd256<256>::slli<1>(simd_and(tmp, arg2)), simd_or(tmp, arg2)));
    888         return simd256<1>::ifh(simd256<2>::himask(), tmpAns, simd256<256>::srli<1>(tmpAns));
     890        return simd_and(simd256<2>::srai<(1)>(simd_or(simd_and(simd_not(arg1), arg2), simd_and(simd_not(simd_xor(arg1, arg2)), simd256<2>::sub(arg1, arg2)))), simd_not(simd256<2>::eq(arg1, arg2)));
    889891}
    890892
     
    938940}
    939941
    940 //The total number of operations is 188
     942//The total number of operations is 184
    941943template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::lt(bitblock256_t arg1, bitblock256_t arg2)
    942944{
     
    945947}
    946948
    947 //The total number of operations is 91
     949//The total number of operations is 90
    948950template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lt(bitblock256_t arg1, bitblock256_t arg2)
    949951{
    950         bitblock256_t tmp = simd_not(arg2);
    951         bitblock256_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd256<256>::slli<1>(simd_and(simd_not(arg1), arg2)), simd_or(arg1, tmp)));
    952         return simd256<1>::ifh(simd256<2>::himask(), tmpAns, simd256<256>::srli<1>(tmpAns));
     952        bitblock256_t high_bit = simd256<2>::constant<(2)>();
     953        return simd256<2>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    953954}
    954955
     
    10581059}
    10591060
    1060 //The total number of operations is 60
     1061//The total number of operations is 34
    10611062template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ctz(bitblock256_t arg1)
    10621063{
     
    12741275}
    12751276
    1276 //The total number of operations is 92
     1277//The total number of operations is 1
    12771278template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::neg(bitblock256_t arg1)
    12781279{
     
    12801281}
    12811282
    1282 //The total number of operations is 44
     1283//The total number of operations is 18
    12831284template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::neg(bitblock256_t arg1)
    12841285{
     
    14301431}
    14311432
    1432 //The total number of operations is 92
     1433//The total number of operations is 1
    14331434template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::sub(bitblock256_t arg1, bitblock256_t arg2)
    14341435{
    1435         return simd256<1>::ifh(simd256<(2)>::himask(), simd256<(2)>::sub(arg1, simd_and(simd256<(2)>::himask(), arg2)), simd256<(2)>::sub(arg1, arg2));
    1436 }
    1437 
    1438 //The total number of operations is 44
     1436        return simd_xor(arg1, arg2);
     1437}
     1438
     1439//The total number of operations is 18
    14391440template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::sub(bitblock256_t arg1, bitblock256_t arg2)
    14401441{
    1441         return simd256<1>::ifh(simd256<(4)>::himask(), simd256<(4)>::sub(arg1, simd_and(simd256<(4)>::himask(), arg2)), simd256<(4)>::sub(arg1, arg2));
     1442        bitblock256_t ans = simd256<(1)>::sub(arg1, arg2);
     1443        bitblock256_t borrowMask = simd_or(simd_andc(arg2, arg1), simd_and(simd_not(simd_xor(arg1, arg2)), ans));
     1444        bitblock256_t loMask = simd256<2>::lomask();
     1445        bitblock256_t borrow = simd256<2>::slli<1>(simd_and(borrowMask, loMask));
     1446        return simd256<1>::ifh(loMask, ans, simd256<(1)>::sub(ans, borrow));
    14421447}
    14431448
     
    19901995}
    19911996
    1992 //The total number of operations is 92
     1997//The total number of operations is 1
    19931998template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::add(bitblock256_t arg1, bitblock256_t arg2)
    19941999{
    1995         return simd256<1>::ifh(simd256<(2)>::himask(), simd256<(2)>::add(arg1, simd_and(simd256<(2)>::himask(), arg2)), simd256<(2)>::add(arg1, arg2));
    1996 }
    1997 
    1998 //The total number of operations is 44
     2000        return simd_xor(arg1, arg2);
     2001}
     2002
     2003//The total number of operations is 18
    19992004template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::add(bitblock256_t arg1, bitblock256_t arg2)
    20002005{
    2001         return simd256<1>::ifh(simd256<(4)>::himask(), simd256<(4)>::add(arg1, simd_and(simd256<(4)>::himask(), arg2)), simd256<(4)>::add(arg1, arg2));
     2006        bitblock256_t ans = simd256<(1)>::add(arg1, arg2);
     2007        bitblock256_t carryMask = simd_or(simd_and(arg1, arg2), simd_and(simd_xor(arg1, arg2), simd_not(ans)));
     2008        bitblock256_t loMask = simd256<2>::lomask();
     2009        bitblock256_t carry = simd256<2>::slli<1>(simd_and(carryMask, loMask));
     2010        return simd256<1>::ifh(loMask, ans, simd256<(1)>::add(ans, carry));
    20022011}
    20032012
     
    20522061}
    20532062
    2054 //The total number of operations is 219
     2063//The total number of operations is 128
    20552064template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::abs(bitblock256_t arg1)
    20562065{
     
    21592168}
    21602169
    2161 //The total number of operations is 652
     2170//The total number of operations is 561
    21622171template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::add_hl(bitblock256_t arg1, bitblock256_t arg2)
    21632172{
     
    21652174}
    21662175
    2167 //The total number of operations is 428
     2176//The total number of operations is 402
    21682177template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::add_hl(bitblock256_t arg1, bitblock256_t arg2)
    21692178{
     
    32883297}
    32893298
     3299//The total number of operations is 1
     3300IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_unaligned(bitblock256_t* arg1)
     3301{
     3302        return _mm256_loadu_ps((bitblock256_t*)(arg1));
     3303}
     3304
     3305//The total number of operations is 1
     3306IDISA_ALWAYS_INLINE void bitblock256::store_aligned(bitblock256_t* arg1, bitblock256_t arg2)
     3307{
     3308        _mm256_store_ps((bitblock256_t*)(arg1), arg2);
     3309}
     3310
    32903311//The total number of operations is 209
    32913312IDISA_ALWAYS_INLINE uint64_t bitblock256::popcount(bitblock256_t arg1)
     
    33063327}
    33073328
     3329//The total number of operations is 1
     3330IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_aligned(bitblock256_t* arg1)
     3331{
     3332        return _mm256_load_ps((bitblock256_t*)(arg1));
     3333}
     3334
     3335//The total number of operations is 1
     3336IDISA_ALWAYS_INLINE void bitblock256::store_unaligned(bitblock256_t* arg1, bitblock256_t arg2)
     3337{
     3338        _mm256_storeu_ps((bitblock256_t*)(arg1), arg2);
     3339}
     3340
    33083341#endif
  • trunk/lib/idisa_cpp/idisa_sse2.cpp

    r1557 r1570  
    6060public:
    6161        static IDISA_ALWAYS_INLINE bitblock128_t mergel(bitblock128_t arg1, bitblock128_t arg2);
     62        static IDISA_ALWAYS_INLINE bitblock128_t signextendh(bitblock128_t arg1);
    6263        static IDISA_ALWAYS_INLINE bitblock128_t mergeh(bitblock128_t arg1, bitblock128_t arg2);
    6364        static IDISA_ALWAYS_INLINE bitblock128_t zeroextendh(bitblock128_t arg1);
    6465        static IDISA_ALWAYS_INLINE bitblock128_t zeroextendl(bitblock128_t arg1);
    65         static IDISA_ALWAYS_INLINE bitblock128_t signextendh(bitblock128_t arg1);
    6666        static IDISA_ALWAYS_INLINE bitblock128_t signextendl(bitblock128_t arg1);
    6767};
     
    8888{
    8989public:
     90        static IDISA_ALWAYS_INLINE bitblock128_t load_unaligned(bitblock128_t* arg1);
     91        static IDISA_ALWAYS_INLINE void store_aligned(bitblock128_t* arg1, bitblock128_t arg2);
    9092        static IDISA_ALWAYS_INLINE bool all(bitblock128_t arg1);
    9193        static IDISA_ALWAYS_INLINE bool any(bitblock128_t arg1);
    9294        static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock128_t arg1);
     95        static IDISA_ALWAYS_INLINE bitblock128_t load_aligned(bitblock128_t* arg1);
     96        static IDISA_ALWAYS_INLINE void store_unaligned(bitblock128_t* arg1, bitblock128_t arg2);
    9397};
    9498
     
    11491153}
    11501154
    1151 //The total number of operations is 26
     1155//The total number of operations is 1
    11521156template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1)
    11531157{
     
    12871291}
    12881292
    1289 //The total number of operations is 26
     1293//The total number of operations is 1
    12901294template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::sub(bitblock128_t arg1, bitblock128_t arg2)
    12911295{
    1292         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::sub(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::sub(arg1, arg2));
     1296        return simd_xor(arg1, arg2);
    12931297}
    12941298
     
    17941798}
    17951799
    1796 //The total number of operations is 24
     1800//The total number of operations is 1
    17971801template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2)
    17981802{
    1799         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::add(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::add(arg1, arg2));
     1803        return simd_xor(arg1, arg2);
    18001804}
    18011805
     
    18461850}
    18471851
    1848 //The total number of operations is 71
     1852//The total number of operations is 46
    18491853template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1)
    18501854{
     
    19431947}
    19441948
    1945 //The total number of operations is 116
     1949//The total number of operations is 93
    19461950template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::add_hl(bitblock128_t arg1, bitblock128_t arg2)
    19471951{
     
    29872991}
    29882992
     2993//The total number of operations is 1
     2994IDISA_ALWAYS_INLINE bitblock128_t bitblock128::load_unaligned(bitblock128_t* arg1)
     2995{
     2996        return _mm_loadu_si128((bitblock128_t*)(arg1));
     2997}
     2998
     2999//The total number of operations is 1
     3000IDISA_ALWAYS_INLINE void bitblock128::store_aligned(bitblock128_t* arg1, bitblock128_t arg2)
     3001{
     3002        _mm_store_si128((bitblock128_t*)(arg1), arg2);
     3003}
     3004
    29893005//The total number of operations is 22
    29903006IDISA_ALWAYS_INLINE uint64_t bitblock128::popcount(bitblock128_t arg1)
     
    30053021}
    30063022
     3023//The total number of operations is 1
     3024IDISA_ALWAYS_INLINE bitblock128_t bitblock128::load_aligned(bitblock128_t* arg1)
     3025{
     3026        return _mm_load_si128((bitblock128_t*)(arg1));
     3027}
     3028
     3029//The total number of operations is 1
     3030IDISA_ALWAYS_INLINE void bitblock128::store_unaligned(bitblock128_t* arg1, bitblock128_t arg2)
     3031{
     3032        _mm_storeu_si128((bitblock128_t*)(arg1), arg2);
     3033}
     3034
    30073035#endif
  • trunk/lib/idisa_cpp/idisa_sse3.cpp

    r1557 r1570  
    6060public:
    6161        static IDISA_ALWAYS_INLINE bitblock128_t mergel(bitblock128_t arg1, bitblock128_t arg2);
     62        static IDISA_ALWAYS_INLINE bitblock128_t signextendh(bitblock128_t arg1);
    6263        static IDISA_ALWAYS_INLINE bitblock128_t mergeh(bitblock128_t arg1, bitblock128_t arg2);
    6364        static IDISA_ALWAYS_INLINE bitblock128_t zeroextendh(bitblock128_t arg1);
    6465        static IDISA_ALWAYS_INLINE bitblock128_t zeroextendl(bitblock128_t arg1);
    65         static IDISA_ALWAYS_INLINE bitblock128_t signextendh(bitblock128_t arg1);
    6666        static IDISA_ALWAYS_INLINE bitblock128_t signextendl(bitblock128_t arg1);
    6767};
     
    8888{
    8989public:
     90        static IDISA_ALWAYS_INLINE bitblock128_t load_unaligned(bitblock128_t* arg1);
     91        static IDISA_ALWAYS_INLINE void store_aligned(bitblock128_t* arg1, bitblock128_t arg2);
    9092        static IDISA_ALWAYS_INLINE bool all(bitblock128_t arg1);
    9193        static IDISA_ALWAYS_INLINE bool any(bitblock128_t arg1);
    9294        static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock128_t arg1);
     95        static IDISA_ALWAYS_INLINE bitblock128_t load_aligned(bitblock128_t* arg1);
     96        static IDISA_ALWAYS_INLINE void store_unaligned(bitblock128_t* arg1, bitblock128_t arg2);
    9397};
    9498
     
    11491153}
    11501154
    1151 //The total number of operations is 26
     1155//The total number of operations is 1
    11521156template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1)
    11531157{
     
    12871291}
    12881292
    1289 //The total number of operations is 26
     1293//The total number of operations is 1
    12901294template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::sub(bitblock128_t arg1, bitblock128_t arg2)
    12911295{
    1292         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::sub(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::sub(arg1, arg2));
     1296        return simd_xor(arg1, arg2);
    12931297}
    12941298
     
    17941798}
    17951799
    1796 //The total number of operations is 24
     1800//The total number of operations is 1
    17971801template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2)
    17981802{
    1799         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::add(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::add(arg1, arg2));
     1803        return simd_xor(arg1, arg2);
    18001804}
    18011805
     
    18461850}
    18471851
    1848 //The total number of operations is 71
     1852//The total number of operations is 46
    18491853template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1)
    18501854{
     
    19431947}
    19441948
    1945 //The total number of operations is 116
     1949//The total number of operations is 93
    19461950template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::add_hl(bitblock128_t arg1, bitblock128_t arg2)
    19471951{
     
    29872991}
    29882992
     2993//The total number of operations is 1
     2994IDISA_ALWAYS_INLINE bitblock128_t bitblock128::load_unaligned(bitblock128_t* arg1)
     2995{
     2996        return _mm_loadu_si128((bitblock128_t*)(arg1));
     2997}
     2998
     2999//The total number of operations is 1
     3000IDISA_ALWAYS_INLINE void bitblock128::store_aligned(bitblock128_t* arg1, bitblock128_t arg2)
     3001{
     3002        _mm_store_si128((bitblock128_t*)(arg1), arg2);
     3003}
     3004
    29893005//The total number of operations is 22
    29903006IDISA_ALWAYS_INLINE uint64_t bitblock128::popcount(bitblock128_t arg1)
     
    30053021}
    30063022
     3023//The total number of operations is 1
     3024IDISA_ALWAYS_INLINE bitblock128_t bitblock128::load_aligned(bitblock128_t* arg1)
     3025{
     3026        return _mm_load_si128((bitblock128_t*)(arg1));
     3027}
     3028
     3029//The total number of operations is 1
     3030IDISA_ALWAYS_INLINE void bitblock128::store_unaligned(bitblock128_t* arg1, bitblock128_t arg2)
     3031{
     3032        _mm_storeu_si128((bitblock128_t*)(arg1), arg2);
     3033}
     3034
    30073035#endif
  • trunk/lib/idisa_cpp/idisa_sse4_1.cpp

    r1557 r1570  
    6060public:
    6161        static IDISA_ALWAYS_INLINE bitblock128_t mergel(bitblock128_t arg1, bitblock128_t arg2);
     62        static IDISA_ALWAYS_INLINE bitblock128_t signextendh(bitblock128_t arg1);
    6263        static IDISA_ALWAYS_INLINE bitblock128_t mergeh(bitblock128_t arg1, bitblock128_t arg2);
    6364        static IDISA_ALWAYS_INLINE bitblock128_t zeroextendh(bitblock128_t arg1);
    6465        static IDISA_ALWAYS_INLINE bitblock128_t zeroextendl(bitblock128_t arg1);
    65         static IDISA_ALWAYS_INLINE bitblock128_t signextendh(bitblock128_t arg1);
    6666        static IDISA_ALWAYS_INLINE bitblock128_t signextendl(bitblock128_t arg1);
    6767};
     
    8989{
    9090public:
     91        static IDISA_ALWAYS_INLINE bitblock128_t load_unaligned(bitblock128_t* arg1);
     92        static IDISA_ALWAYS_INLINE void store_aligned(bitblock128_t* arg1, bitblock128_t arg2);
    9193        static IDISA_ALWAYS_INLINE bool all(bitblock128_t arg1);
    9294        static IDISA_ALWAYS_INLINE bool any(bitblock128_t arg1);
    9395        static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock128_t arg1);
     96        static IDISA_ALWAYS_INLINE bitblock128_t load_aligned(bitblock128_t* arg1);
     97        static IDISA_ALWAYS_INLINE void store_unaligned(bitblock128_t* arg1, bitblock128_t arg2);
    9498};
    9599
     
    11551159}
    11561160
    1157 //The total number of operations is 26
     1161//The total number of operations is 1
    11581162template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1)
    11591163{
     
    12931297}
    12941298
    1295 //The total number of operations is 26
     1299//The total number of operations is 1
    12961300template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::sub(bitblock128_t arg1, bitblock128_t arg2)
    12971301{
    1298         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::sub(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::sub(arg1, arg2));
     1302        return simd_xor(arg1, arg2);
    12991303}
    13001304
     
    17971801}
    17981802
    1799 //The total number of operations is 24
     1803//The total number of operations is 1
    18001804template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2)
    18011805{
    1802         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::add(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::add(arg1, arg2));
     1806        return simd_xor(arg1, arg2);
    18031807}
    18041808
     
    18491853}
    18501854
    1851 //The total number of operations is 71
     1855//The total number of operations is 46
    18521856template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1)
    18531857{
     
    19431947}
    19441948
    1945 //The total number of operations is 116
     1949//The total number of operations is 93
    19461950template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::add_hl(bitblock128_t arg1, bitblock128_t arg2)
    19471951{
     
    30193023}
    30203024
     3025//The total number of operations is 1
     3026IDISA_ALWAYS_INLINE bitblock128_t bitblock128::load_unaligned(bitblock128_t* arg1)
     3027{
     3028        return _mm_loadu_si128((bitblock128_t*)(arg1));
     3029}
     3030
     3031//The total number of operations is 1
     3032IDISA_ALWAYS_INLINE void bitblock128::store_aligned(bitblock128_t* arg1, bitblock128_t arg2)
     3033{
     3034        _mm_store_si128((bitblock128_t*)(arg1), arg2);
     3035}
     3036
    30213037//The total number of operations is 20
    30223038IDISA_ALWAYS_INLINE uint64_t bitblock128::popcount(bitblock128_t arg1)
     
    30373053}
    30383054
     3055//The total number of operations is 1
     3056IDISA_ALWAYS_INLINE bitblock128_t bitblock128::load_aligned(bitblock128_t* arg1)
     3057{
     3058        return _mm_load_si128((bitblock128_t*)(arg1));
     3059}
     3060
     3061//The total number of operations is 1
     3062IDISA_ALWAYS_INLINE void bitblock128::store_unaligned(bitblock128_t* arg1, bitblock128_t arg2)
     3063{
     3064        _mm_storeu_si128((bitblock128_t*)(arg1), arg2);
     3065}
     3066
    30393067#endif
  • trunk/lib/idisa_cpp/idisa_sse4_2.cpp

    r1557 r1570  
    6060public:
    6161        static IDISA_ALWAYS_INLINE bitblock128_t mergel(bitblock128_t arg1, bitblock128_t arg2);
     62        static IDISA_ALWAYS_INLINE bitblock128_t signextendh(bitblock128_t arg1);
    6263        static IDISA_ALWAYS_INLINE bitblock128_t mergeh(bitblock128_t arg1, bitblock128_t arg2);
    6364        static IDISA_ALWAYS_INLINE bitblock128_t zeroextendh(bitblock128_t arg1);
    6465        static IDISA_ALWAYS_INLINE bitblock128_t zeroextendl(bitblock128_t arg1);
    65         static IDISA_ALWAYS_INLINE bitblock128_t signextendh(bitblock128_t arg1);
    6666        static IDISA_ALWAYS_INLINE bitblock128_t signextendl(bitblock128_t arg1);
    6767};
     
    8989{
    9090public:
     91        static IDISA_ALWAYS_INLINE bitblock128_t load_unaligned(bitblock128_t* arg1);
     92        static IDISA_ALWAYS_INLINE void store_aligned(bitblock128_t* arg1, bitblock128_t arg2);
    9193        static IDISA_ALWAYS_INLINE bool all(bitblock128_t arg1);
    9294        static IDISA_ALWAYS_INLINE bool any(bitblock128_t arg1);
    9395        static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock128_t arg1);
     96        static IDISA_ALWAYS_INLINE bitblock128_t load_aligned(bitblock128_t* arg1);
     97        static IDISA_ALWAYS_INLINE void store_unaligned(bitblock128_t* arg1, bitblock128_t arg2);
    9498};
    9599
     
    11451149}
    11461150
    1147 //The total number of operations is 26
     1151//The total number of operations is 1
    11481152template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1)
    11491153{
     
    12831287}
    12841288
    1285 //The total number of operations is 26
     1289//The total number of operations is 1
    12861290template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::sub(bitblock128_t arg1, bitblock128_t arg2)
    12871291{
    1288         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::sub(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::sub(arg1, arg2));
     1292        return simd_xor(arg1, arg2);
    12891293}
    12901294
     
    17791783}
    17801784
    1781 //The total number of operations is 24
     1785//The total number of operations is 1
    17821786template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2)
    17831787{
    1784         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::add(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::add(arg1, arg2));
     1788        return simd_xor(arg1, arg2);
    17851789}
    17861790
     
    18311835}
    18321836
    1833 //The total number of operations is 71
     1837//The total number of operations is 46
    18341838template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1)
    18351839{
     
    19251929}
    19261930
    1927 //The total number of operations is 116
     1931//The total number of operations is 93
    19281932template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::add_hl(bitblock128_t arg1, bitblock128_t arg2)
    19291933{
     
    30013005}
    30023006
     3007//The total number of operations is 1
     3008IDISA_ALWAYS_INLINE bitblock128_t bitblock128::load_unaligned(bitblock128_t* arg1)
     3009{
     3010        return _mm_loadu_si128((bitblock128_t*)(arg1));
     3011}
     3012
     3013//The total number of operations is 1
     3014IDISA_ALWAYS_INLINE void bitblock128::store_aligned(bitblock128_t* arg1, bitblock128_t arg2)
     3015{
     3016        _mm_store_si128((bitblock128_t*)(arg1), arg2);
     3017}
     3018
    30033019//The total number of operations is 20
    30043020IDISA_ALWAYS_INLINE uint64_t bitblock128::popcount(bitblock128_t arg1)
     
    30193035}
    30203036
     3037//The total number of operations is 1
     3038IDISA_ALWAYS_INLINE bitblock128_t bitblock128::load_aligned(bitblock128_t* arg1)
     3039{
     3040        return _mm_load_si128((bitblock128_t*)(arg1));
     3041}
     3042
     3043//The total number of operations is 1
     3044IDISA_ALWAYS_INLINE void bitblock128::store_unaligned(bitblock128_t* arg1, bitblock128_t arg2)
     3045{
     3046        _mm_storeu_si128((bitblock128_t*)(arg1), arg2);
     3047}
     3048
    30213049#endif
  • trunk/lib/idisa_cpp/idisa_ssse3.cpp

    r1557 r1570  
    6060public:
    6161        static IDISA_ALWAYS_INLINE bitblock128_t mergel(bitblock128_t arg1, bitblock128_t arg2);
     62        static IDISA_ALWAYS_INLINE bitblock128_t signextendh(bitblock128_t arg1);
    6263        static IDISA_ALWAYS_INLINE bitblock128_t mergeh(bitblock128_t arg1, bitblock128_t arg2);
    6364        static IDISA_ALWAYS_INLINE bitblock128_t zeroextendh(bitblock128_t arg1);
    6465        static IDISA_ALWAYS_INLINE bitblock128_t zeroextendl(bitblock128_t arg1);
    65         static IDISA_ALWAYS_INLINE bitblock128_t signextendh(bitblock128_t arg1);
    6666        static IDISA_ALWAYS_INLINE bitblock128_t signextendl(bitblock128_t arg1);
    6767};
     
    8989{
    9090public:
     91        static IDISA_ALWAYS_INLINE bitblock128_t load_unaligned(bitblock128_t* arg1);
     92        static IDISA_ALWAYS_INLINE void store_aligned(bitblock128_t* arg1, bitblock128_t arg2);
    9193        static IDISA_ALWAYS_INLINE bool all(bitblock128_t arg1);
    9294        static IDISA_ALWAYS_INLINE bool any(bitblock128_t arg1);
    9395        static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock128_t arg1);
     96        static IDISA_ALWAYS_INLINE bitblock128_t load_aligned(bitblock128_t* arg1);
     97        static IDISA_ALWAYS_INLINE void store_unaligned(bitblock128_t* arg1, bitblock128_t arg2);
    9498};
    9599
     
    11541158}
    11551159
    1156 //The total number of operations is 26
     1160//The total number of operations is 1
    11571161template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1)
    11581162{
     
    12921296}
    12931297
    1294 //The total number of operations is 26
     1298//The total number of operations is 1
    12951299template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::sub(bitblock128_t arg1, bitblock128_t arg2)
    12961300{
    1297         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::sub(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::sub(arg1, arg2));
     1301        return simd_xor(arg1, arg2);
    12981302}
    12991303
     
    17991803}
    18001804
    1801 //The total number of operations is 24
     1805//The total number of operations is 1
    18021806template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2)
    18031807{
    1804         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::add(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::add(arg1, arg2));
     1808        return simd_xor(arg1, arg2);
    18051809}
    18061810
     
    18511855}
    18521856
    1853 //The total number of operations is 71
     1857//The total number of operations is 46
    18541858template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1)
    18551859{
     
    19451949}
    19461950
    1947 //The total number of operations is 116
     1951//The total number of operations is 93
    19481952template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::add_hl(bitblock128_t arg1, bitblock128_t arg2)
    19491953{
     
    30223026}
    30233027
     3028//The total number of operations is 1
     3029IDISA_ALWAYS_INLINE bitblock128_t bitblock128::load_unaligned(bitblock128_t* arg1)
     3030{
     3031        return _mm_loadu_si128((bitblock128_t*)(arg1));
     3032}
     3033
     3034//The total number of operations is 1
     3035IDISA_ALWAYS_INLINE void bitblock128::store_aligned(bitblock128_t* arg1, bitblock128_t arg2)
     3036{
     3037        _mm_store_si128((bitblock128_t*)(arg1), arg2);
     3038}
     3039
    30243040//The total number of operations is 22
    30253041IDISA_ALWAYS_INLINE uint64_t bitblock128::popcount(bitblock128_t arg1)
     
    30403056}
    30413057
     3058//The total number of operations is 1
     3059IDISA_ALWAYS_INLINE bitblock128_t bitblock128::load_aligned(bitblock128_t* arg1)
     3060{
     3061        return _mm_load_si128((bitblock128_t*)(arg1));
     3062}
     3063
     3064//The total number of operations is 1
     3065IDISA_ALWAYS_INLINE void bitblock128::store_unaligned(bitblock128_t* arg1, bitblock128_t arg2)
     3066{
     3067        _mm_storeu_si128((bitblock128_t*)(arg1), arg2);
     3068}
     3069
    30423070#endif
  • trunk/libgen/Configure/IDISAOperations.py

    r1556 r1570  
    508508    #},
    509509   
    510     #"bitblock_load_unaligned":\
    511     #{
    512     # "signature":"SIMD_type bitblock::load_unaligned(SIMD_type* arg1)",
    513     #},
     510    "bitblock_load_aligned":\
     511    {
     512     "signature":"SIMD_type bitblock::load_aligned(SIMD_type* arg1)",
     513     "args_type":{"arg1":"SIMD_type*"},
     514     "return_type":"SIMD_type",
     515    },
     516    "bitblock_load_unaligned":\
     517    {
     518     "signature":"SIMD_type bitblock::load_unaligned(SIMD_type* arg1)",
     519     "args_type":{"arg1":"SIMD_type*"},
     520     "return_type":"SIMD_type",
     521    },
    514522   
    515523    #"bitblock_store_aligned":\
     
    518526    #},
    519527   
    520     #"bitblock_store_unaligned":\
    521     #{
    522     # "signature":"void bitblock::store_unaligned(SIMD_type* arg1, SIMD_type arg2)",
    523     #},
     528    "bitblock_store_aligned":\
     529    {
     530     "signature":"void bitblock::store_aligned(SIMD_type* arg1, SIMD_type arg2)",
     531     "args_type":{"arg1":"SIMD_type*", "arg2":"SIMD_type"},
     532     "return_type":"void",
     533    },
     534    "bitblock_store_unaligned":\
     535    {
     536     "signature":"void bitblock::store_unaligned(SIMD_type* arg1, SIMD_type arg2)",
     537     "args_type":{"arg1":"SIMD_type*", "arg2":"SIMD_type"},
     538     "return_type":"void",
     539    },
    524540}
  • trunk/libgen/Library_Generator/AVXInstructions.py

    r1536 r1570  
    197197                "fws":[8],
    198198        },
     199        "bitblock_load_aligned":\
     200        {
     201                "signature":"SIMD_type _mm256_load_ps(SIMD_type* arg1)",
     202                "args_type":{"arg1":"SIMD_type*"},
     203                "return_type":"SIMD_type",
     204                "fws":[256],
     205        },
     206        "bitblock_store_aligned":\
     207        {
     208                "signature":"void _mm256_store_ps(SIMD_type* arg1, SIMD_type arg2)",
     209                "args_type":{"arg1":"SIMD_type*", "arg2":"SIMD_type"},
     210                "return_type":"void",
     211                "fws":[256],
     212        },
     213        "bitblock_load_unaligned":\
     214        {
     215                "signature":"SIMD_type _mm256_loadu_ps(SIMD_type* arg1)",
     216                "args_type":{"arg1":"SIMD_type*"},
     217                "return_type":"SIMD_type",
     218                "fws":[256],
     219        },
     220        "bitblock_store_unaligned":\
     221        {
     222                "signature":"void _mm256_storeu_ps(SIMD_type* arg1, SIMD_type arg2)",
     223                "args_type":{"arg1":"SIMD_type*", "arg2":"SIMD_type"},
     224                "return_type":"void",
     225                "fws":[256],
     226        },
     227
    199228        "_mm256_set_epi32":\
    200229        {
  • trunk/libgen/Library_Generator/BuiltIns.py

    r1536 r1570  
    4242                        elif StandardTypes.IsSIMDType(self.argsType[self.args[i]]):
    4343                                txt += self.arguments[i] + ", "
     44                        elif StandardTypes.IsSIMDPointer(self.argsType[self.args[i]]):
     45                                txt += "(" + StandardTypes.GetSIMDPointer(self.arch) + ")" + "(" + self.arguments[i] + ")" + ", "
    4446                        elif StandardTypes.Is64BitFloatingType(self.argsType[self.args[i]]):
    4547                                txt += "(" + StandardTypes.Get64BitFloatingType(self.argsType[self.args[i]], configure.RegisterSize[self.arch]) + ")" + "(" + self.arguments[i] + ")" + ", "
  • trunk/libgen/Library_Generator/Operation.py

    r1539 r1570  
    9797                                self.valueRange[arg.name] = {"min":minV, "max":maxV}
    9898                                argType = StandardTypes.GetUnsignedIntType(int_type, regSize)
     99                        elif StandardTypes.IsSIMDPointer(argType):
     100                                argType = configure.Bitblock_type[self.arch] + "*"
    99101                        else:
    100102                                #it must be a SIMD_type
  • trunk/libgen/Library_Generator/SSE2Instructions.py

    r1521 r1570  
    204204                "fws":[32],
    205205        },
     206        "bitblock_load_aligned":\
     207        {
     208                "signature":"SIMD_type _mm_load_si128(SIMD_type* arg1)",
     209                "args_type":{"arg1":"SIMD_type*"},
     210                "return_type":"SIMD_type",
     211                "fws":[128],
     212        },
     213        "bitblock_store_aligned":\
     214        {
     215                "signature":"void _mm_store_si128(SIMD_type* arg1, SIMD_type arg2)",
     216                "args_type":{"arg1":"SIMD_type*", "arg2":"SIMD_type"},
     217                "return_type":"void",
     218                "fws":[128],
     219        },
     220        "bitblock_load_unaligned":\
     221        {
     222                "signature":"SIMD_type _mm_loadu_si128(SIMD_type* arg1)",
     223                "args_type":{"arg1":"SIMD_type*"},
     224                "return_type":"SIMD_type",
     225                "fws":[128],
     226        },
     227        "bitblock_store_unaligned":\
     228        {
     229                "signature":"void _mm_storeu_si128(SIMD_type* arg1, SIMD_type arg2)",
     230                "args_type":{"arg1":"SIMD_type*", "arg2":"SIMD_type"},
     231                "return_type":"void",
     232                "fws":[128],
     233        },
    206234        "_mm_sll_epi64":\
    207235        {
  • trunk/libgen/Library_Generator/SSE3Instructions.py

    r1521 r1570  
    204204                "fws":[32],
    205205        },
     206        "bitblock_load_aligned":\
     207        {
     208                "signature":"SIMD_type _mm_load_si128(SIMD_type* arg1)",
     209                "args_type":{"arg1":"SIMD_type*"},
     210                "return_type":"SIMD_type",
     211                "fws":[128],
     212        },
     213        "bitblock_store_aligned":\
     214        {
     215                "signature":"void _mm_store_si128(SIMD_type* arg1, SIMD_type arg2)",
     216                "args_type":{"arg1":"SIMD_type*", "arg2":"SIMD_type"},
     217                "return_type":"void",
     218                "fws":[128],
     219        },
     220        "bitblock_load_unaligned":\
     221        {
     222                "signature":"SIMD_type _mm_loadu_si128(SIMD_type* arg1)",
     223                "args_type":{"arg1":"SIMD_type*"},
     224                "return_type":"SIMD_type",
     225                "fws":[128],
     226        },
     227        "bitblock_store_unaligned":\
     228        {
     229                "signature":"void _mm_storeu_si128(SIMD_type* arg1, SIMD_type arg2)",
     230                "args_type":{"arg1":"SIMD_type*", "arg2":"SIMD_type"},
     231                "return_type":"void",
     232                "fws":[128],
     233        },
    206234        "_mm_sll_epi64":\
    207235        {
  • trunk/libgen/Library_Generator/SSE4_1Instructions.py

    r1521 r1570  
    225225                "fws":[8],
    226226        },
     227        "bitblock_load_aligned":\
     228        {
     229                "signature":"SIMD_type _mm_load_si128(SIMD_type* arg1)",
     230                "args_type":{"arg1":"SIMD_type*"},
     231                "return_type":"SIMD_type",
     232                "fws":[128],
     233        },
     234        "bitblock_store_aligned":\
     235        {
     236                "signature":"void _mm_store_si128(SIMD_type* arg1, SIMD_type arg2)",
     237                "args_type":{"arg1":"SIMD_type*", "arg2":"SIMD_type"},
     238                "return_type":"void",
     239                "fws":[128],
     240        },
     241        "bitblock_load_unaligned":\
     242        {
     243                "signature":"SIMD_type _mm_loadu_si128(SIMD_type* arg1)",
     244                "args_type":{"arg1":"SIMD_type*"},
     245                "return_type":"SIMD_type",
     246                "fws":[128],
     247        },
     248        "bitblock_store_unaligned":\
     249        {
     250                "signature":"void _mm_storeu_si128(SIMD_type* arg1, SIMD_type arg2)",
     251                "args_type":{"arg1":"SIMD_type*", "arg2":"SIMD_type"},
     252                "return_type":"void",
     253                "fws":[128],
     254        },
    227255        "_mm_sll_epi64":\
    228256        {
  • trunk/libgen/Library_Generator/SSE4_2Instructions.py

    r1521 r1570  
    225225                "fws":[8],
    226226        },
     227        "bitblock_load_aligned":\
     228        {
     229                "signature":"SIMD_type _mm_load_si128(SIMD_type* arg1)",
     230                "args_type":{"arg1":"SIMD_type*"},
     231                "return_type":"SIMD_type",
     232                "fws":[128],
     233        },
     234        "bitblock_store_aligned":\
     235        {
     236                "signature":"void _mm_store_si128(SIMD_type* arg1, SIMD_type arg2)",
     237                "args_type":{"arg1":"SIMD_type*", "arg2":"SIMD_type"},
     238                "return_type":"void",
     239                "fws":[128],
     240        },
     241        "bitblock_load_unaligned":\
     242        {
     243                "signature":"SIMD_type _mm_loadu_si128(SIMD_type* arg1)",
     244                "args_type":{"arg1":"SIMD_type*"},
     245                "return_type":"SIMD_type",
     246                "fws":[128],
     247        },
     248        "bitblock_store_unaligned":\
     249        {
     250                "signature":"void _mm_storeu_si128(SIMD_type* arg1, SIMD_type arg2)",
     251                "args_type":{"arg1":"SIMD_type*", "arg2":"SIMD_type"},
     252                "return_type":"void",
     253                "fws":[128],
     254        },
    227255        "_mm_sll_epi64":\
    228256        {
  • trunk/libgen/Library_Generator/SSSE3Instructions.py

    r1521 r1570  
    225225                "fws":[8],
    226226        },
     227        "bitblock_load_aligned":\
     228        {
     229                "signature":"SIMD_type _mm_load_si128(SIMD_type* arg1)",
     230                "args_type":{"arg1":"SIMD_type*"},
     231                "return_type":"SIMD_type",
     232                "fws":[128],
     233        },
     234        "bitblock_store_aligned":\
     235        {
     236                "signature":"void _mm_store_si128(SIMD_type* arg1, SIMD_type arg2)",
     237                "args_type":{"arg1":"SIMD_type*", "arg2":"SIMD_type"},
     238                "return_type":"void",
     239                "fws":[128],
     240        },
     241        "bitblock_load_unaligned":\
     242        {
     243                "signature":"SIMD_type _mm_loadu_si128(SIMD_type* arg1)",
     244                "args_type":{"arg1":"SIMD_type*"},
     245                "return_type":"SIMD_type",
     246                "fws":[128],
     247        },
     248        "bitblock_store_unaligned":\
     249        {
     250                "signature":"void _mm_storeu_si128(SIMD_type* arg1, SIMD_type arg2)",
     251                "args_type":{"arg1":"SIMD_type*", "arg2":"SIMD_type"},
     252                "return_type":"void",
     253                "fws":[128],
     254        },
    227255        "_mm_sll_epi64":\
    228256        {
  • trunk/libgen/Library_Generator/StandardTypes.py

    r1536 r1570  
    22import math
    33from types import *
     4
     5from Utility import configure
    46
    57def GetUnsignedIntType(typeStr, regSize, fw=64, truncated=True):
     
    5153    return LongType
    5254
     55def GetSIMDPointer(arch):
     56    return configure.Bitblock_type[arch] + "*"
     57
    5358def Is64BitFloatingType(typeStr):
    5459    return "__m256d" in typeStr
    5560   
     61def IsSIMDPointer(typeStr):
     62    return "SIMD_type*" == typeStr
     63
    5664def IsSIMDType(typeStr):
    57     return "SIMD_type" in typeStr or "bitblock" in typeStr or "__m128i" in typeStr
     65    return "SIMD_type" == typeStr or "bitblock" in typeStr or "__m128i" in typeStr
    5866
    5967def IsUnsignedIntType(typeStr):
  • trunk/libgen/Library_Generator/StrategyPool.py

    r1557 r1570  
    44        strategies = \
    55        {
     6                "add1":\
     7                {
     8                "body":r'''return simd_xor(arg1, arg2)''',
     9                "Ops":["simd_add", "simd_sub"],
     10                "Fws":[1],
     11                "Platforms":[configure.ALL],
     12                },
     13
    614                "unsigned_predicate_using_signed":\
    715                {
  • trunk/libgen/Library_Tester/utility.h

    r1539 r1570  
    1 #include "idisa_sse2.h"
     1#include "idisa_sse4_2.h"
    22#define USE_SSE
    33typedef __m128i SIMD_type;
Note: See TracChangeset for help on using the changeset viewer.