Changeset 3063 for trunk


Ignore:
Timestamp:
Apr 20, 2013, 11:32:33 AM (6 years ago)
Author:
cameron
Message:

Add floating point operations: i2f, f2i, fmul, fdiv, fadd, fsub, flt, fle, feq, fsqrt

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_sse2.cpp

    r3021 r3063  
    7070        static IDISA_ALWAYS_INLINE bitblock128_t add(bitblock128_t arg1, bitblock128_t arg2);
    7171        static IDISA_ALWAYS_INLINE bitblock128_t ugt(bitblock128_t arg1, bitblock128_t arg2);
     72        //
     73        // Hand-coded floating point routines.
     74        //
     75        static IDISA_ALWAYS_INLINE bitblock128_t i2f(bitblock128_t arg1);
     76        static IDISA_ALWAYS_INLINE bitblock128_t f2i(bitblock128_t arg1);
     77        static IDISA_ALWAYS_INLINE bitblock128_t fdiv(bitblock128_t arg1, bitblock128_t arg2);
     78        static IDISA_ALWAYS_INLINE bitblock128_t fmul(bitblock128_t arg1, bitblock128_t arg2);
     79        static IDISA_ALWAYS_INLINE bitblock128_t fadd(bitblock128_t arg1, bitblock128_t arg2);
     80        static IDISA_ALWAYS_INLINE bitblock128_t fsub(bitblock128_t arg1, bitblock128_t arg2);
     81        static IDISA_ALWAYS_INLINE bitblock128_t feq(bitblock128_t arg1, bitblock128_t arg2);
     82        static IDISA_ALWAYS_INLINE bitblock128_t flt(bitblock128_t arg1, bitblock128_t arg2);
     83        static IDISA_ALWAYS_INLINE bitblock128_t fle(bitblock128_t arg1, bitblock128_t arg2);
     84        static IDISA_ALWAYS_INLINE bitblock128_t fsqrt(bitblock128_t arg1);
    7285};
    7386
     
    526539template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill8(fw16_t val1, fw16_t val2, fw16_t val3, fw16_t val4, fw16_t val5, fw16_t val6, fw16_t val7, fw16_t val8);
    527540
     541//
     542// Hand-coded declarations of floating point routines.
     543//
     544template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::i2f(bitblock128_t arg1);
     545template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::f2i(bitblock128_t arg1);
     546template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fdiv(bitblock128_t arg1, bitblock128_t arg2);
     547template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fdiv(bitblock128_t arg1, bitblock128_t arg2);
     548template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fmul(bitblock128_t arg1, bitblock128_t arg2);
     549template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fmul(bitblock128_t arg1, bitblock128_t arg2);
     550template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fadd(bitblock128_t arg1, bitblock128_t arg2);
     551template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fadd(bitblock128_t arg1, bitblock128_t arg2);
     552template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fsub(bitblock128_t arg1, bitblock128_t arg2);
     553template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fsub(bitblock128_t arg1, bitblock128_t arg2);
     554template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fsqrt(bitblock128_t arg1);
     555template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fsqrt(bitblock128_t arg1);
     556template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::feq(bitblock128_t arg1, bitblock128_t arg2);
     557template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::feq(bitblock128_t arg1, bitblock128_t arg2);
     558template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::flt(bitblock128_t arg1, bitblock128_t arg2);
     559template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::flt(bitblock128_t arg1, bitblock128_t arg2);
     560template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fle(bitblock128_t arg1, bitblock128_t arg2);
     561template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fle(bitblock128_t arg1, bitblock128_t arg2);
     562
     563
     564
    528565//Implementation Part
     566
     567
     568//The total number of operations is 1.0
     569template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::i2f(bitblock128_t arg1)
     570{
     571        return (bitblock128_t)(_mm_cvtepi32_ps(arg1));
     572}
     573
     574//The total number of operations is 1.0
     575template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::f2i(bitblock128_t arg1)
     576{
     577        return (bitblock128_t)(_mm_cvtps_epi32((__m128) arg1));
     578}
     579
     580//The total number of operations is 1.0
     581template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fdiv(bitblock128_t arg1, bitblock128_t arg2)
     582{
     583        return (bitblock128_t) _mm_div_ps((__m128) arg1, (__m128) arg2);
     584}
     585
     586//The total number of operations is 1.0
     587template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fdiv(bitblock128_t arg1, bitblock128_t arg2)
     588{
     589        return (bitblock128_t) _mm_div_pd((__m128d) arg1, (__m128d) arg2);
     590}
     591
     592//The total number of operations is 1.0
     593template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fmul(bitblock128_t arg1, bitblock128_t arg2)
     594{
     595        return (bitblock128_t) _mm_mul_ps((__m128) arg1, (__m128) arg2);
     596}
     597
     598//The total number of operations is 1.0
     599template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fmul(bitblock128_t arg1, bitblock128_t arg2)
     600{
     601        return (bitblock128_t) _mm_mul_pd((__m128d) arg1, (__m128d) arg2);
     602}
     603
     604//The total number of operations is 1.0
     605template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fadd(bitblock128_t arg1, bitblock128_t arg2)
     606{
     607        return (bitblock128_t) _mm_add_ps((__m128) arg1, (__m128) arg2);
     608}
     609
     610//The total number of operations is 1.0
     611template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fadd(bitblock128_t arg1, bitblock128_t arg2)
     612{
     613        return (bitblock128_t) _mm_add_pd((__m128d) arg1, (__m128d) arg2);
     614}
     615
     616//The total number of operations is 1.0
     617template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fsub(bitblock128_t arg1, bitblock128_t arg2)
     618{
     619        return (bitblock128_t) _mm_sub_ps((__m128) arg1, (__m128) arg2);
     620}
     621
     622//The total number of operations is 1.0
     623template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fsub(bitblock128_t arg1, bitblock128_t arg2)
     624{
     625        return (bitblock128_t) _mm_sub_pd((__m128d) arg1, (__m128d) arg2);
     626}
     627
     628//The total number of operations is 1.0
     629template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fsqrt(bitblock128_t arg1)
     630{
     631        return (bitblock128_t)(_mm_sqrt_ps((__m128) arg1));
     632}
     633
     634//The total number of operations is 1.0
     635template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fsqrt(bitblock128_t arg1)
     636{
     637        return (bitblock128_t)(_mm_sqrt_pd((__m128d) arg1));
     638}
     639
     640//The total number of operations is 1.0
     641template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::feq(bitblock128_t arg1, bitblock128_t arg2)
     642{
     643        return (bitblock128_t) _mm_cmpeq_ss((__m128) arg1, (__m128) arg2);
     644}
     645
     646//The total number of operations is 1.0
     647template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::feq(bitblock128_t arg1, bitblock128_t arg2)
     648{
     649        return (bitblock128_t) _mm_cmpeq_sd((__m128d) arg1, (__m128d) arg2);
     650}
     651
     652//The total number of operations is 1.0
     653template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::flt(bitblock128_t arg1, bitblock128_t arg2)
     654{
     655        return (bitblock128_t) _mm_cmplt_ss((__m128) arg1, (__m128) arg2);
     656}
     657
     658//The total number of operations is 1.0
     659template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::flt(bitblock128_t arg1, bitblock128_t arg2)
     660{
     661        return (bitblock128_t) _mm_cmplt_sd((__m128d) arg1, (__m128d) arg2);
     662}
     663
     664//The total number of operations is 1.0
     665template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fle(bitblock128_t arg1, bitblock128_t arg2)
     666{
     667        return (bitblock128_t) _mm_cmple_ss((__m128) arg1, (__m128) arg2);
     668}
     669
     670//The total number of operations is 1.0
     671template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fle(bitblock128_t arg1, bitblock128_t arg2)
     672{
     673        return (bitblock128_t) _mm_cmple_sd((__m128d) arg1, (__m128d) arg2);
     674}
     675
     676
     677
     678
     679
    529680const uint64_t highbit_64 = ((uint64_t) 1) << 63;
    530681const uint32_t highbit_32 = 0x80000000;
Note: See TracChangeset for help on using the changeset viewer.