 Timestamp:
 Apr 20, 2013, 11:32:33 AM (6 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/idisa_cpp/idisa_sse2.cpp
r3021 r3063 70 70 static IDISA_ALWAYS_INLINE bitblock128_t add(bitblock128_t arg1, bitblock128_t arg2); 71 71 static IDISA_ALWAYS_INLINE bitblock128_t ugt(bitblock128_t arg1, bitblock128_t arg2); 72 // 73 // Handcoded floating point routines. 74 // 75 static IDISA_ALWAYS_INLINE bitblock128_t i2f(bitblock128_t arg1); 76 static IDISA_ALWAYS_INLINE bitblock128_t f2i(bitblock128_t arg1); 77 static IDISA_ALWAYS_INLINE bitblock128_t fdiv(bitblock128_t arg1, bitblock128_t arg2); 78 static IDISA_ALWAYS_INLINE bitblock128_t fmul(bitblock128_t arg1, bitblock128_t arg2); 79 static IDISA_ALWAYS_INLINE bitblock128_t fadd(bitblock128_t arg1, bitblock128_t arg2); 80 static IDISA_ALWAYS_INLINE bitblock128_t fsub(bitblock128_t arg1, bitblock128_t arg2); 81 static IDISA_ALWAYS_INLINE bitblock128_t feq(bitblock128_t arg1, bitblock128_t arg2); 82 static IDISA_ALWAYS_INLINE bitblock128_t flt(bitblock128_t arg1, bitblock128_t arg2); 83 static IDISA_ALWAYS_INLINE bitblock128_t fle(bitblock128_t arg1, bitblock128_t arg2); 84 static IDISA_ALWAYS_INLINE bitblock128_t fsqrt(bitblock128_t arg1); 72 85 }; 73 86 … … 526 539 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill8(fw16_t val1, fw16_t val2, fw16_t val3, fw16_t val4, fw16_t val5, fw16_t val6, fw16_t val7, fw16_t val8); 527 540 541 // 542 // Handcoded declarations of floating point routines. 543 // 544 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::i2f(bitblock128_t arg1); 545 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::f2i(bitblock128_t arg1); 546 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fdiv(bitblock128_t arg1, bitblock128_t arg2); 547 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fdiv(bitblock128_t arg1, bitblock128_t arg2); 548 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fmul(bitblock128_t arg1, bitblock128_t arg2); 549 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fmul(bitblock128_t arg1, bitblock128_t arg2); 550 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fadd(bitblock128_t arg1, bitblock128_t arg2); 551 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fadd(bitblock128_t arg1, bitblock128_t arg2); 552 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fsub(bitblock128_t arg1, bitblock128_t arg2); 553 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fsub(bitblock128_t arg1, bitblock128_t arg2); 554 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fsqrt(bitblock128_t arg1); 555 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fsqrt(bitblock128_t arg1); 556 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::feq(bitblock128_t arg1, bitblock128_t arg2); 557 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::feq(bitblock128_t arg1, bitblock128_t arg2); 558 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::flt(bitblock128_t arg1, bitblock128_t arg2); 559 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::flt(bitblock128_t arg1, bitblock128_t arg2); 560 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fle(bitblock128_t arg1, bitblock128_t arg2); 561 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fle(bitblock128_t arg1, bitblock128_t arg2); 562 563 564 528 565 //Implementation Part 566 567 568 //The total number of operations is 1.0 569 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::i2f(bitblock128_t arg1) 570 { 571 return (bitblock128_t)(_mm_cvtepi32_ps(arg1)); 572 } 573 574 //The total number of operations is 1.0 575 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::f2i(bitblock128_t arg1) 576 { 577 return (bitblock128_t)(_mm_cvtps_epi32((__m128) arg1)); 578 } 579 580 //The total number of operations is 1.0 581 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fdiv(bitblock128_t arg1, bitblock128_t arg2) 582 { 583 return (bitblock128_t) _mm_div_ps((__m128) arg1, (__m128) arg2); 584 } 585 586 //The total number of operations is 1.0 587 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fdiv(bitblock128_t arg1, bitblock128_t arg2) 588 { 589 return (bitblock128_t) _mm_div_pd((__m128d) arg1, (__m128d) arg2); 590 } 591 592 //The total number of operations is 1.0 593 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fmul(bitblock128_t arg1, bitblock128_t arg2) 594 { 595 return (bitblock128_t) _mm_mul_ps((__m128) arg1, (__m128) arg2); 596 } 597 598 //The total number of operations is 1.0 599 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fmul(bitblock128_t arg1, bitblock128_t arg2) 600 { 601 return (bitblock128_t) _mm_mul_pd((__m128d) arg1, (__m128d) arg2); 602 } 603 604 //The total number of operations is 1.0 605 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fadd(bitblock128_t arg1, bitblock128_t arg2) 606 { 607 return (bitblock128_t) _mm_add_ps((__m128) arg1, (__m128) arg2); 608 } 609 610 //The total number of operations is 1.0 611 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fadd(bitblock128_t arg1, bitblock128_t arg2) 612 { 613 return (bitblock128_t) _mm_add_pd((__m128d) arg1, (__m128d) arg2); 614 } 615 616 //The total number of operations is 1.0 617 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fsub(bitblock128_t arg1, bitblock128_t arg2) 618 { 619 return (bitblock128_t) _mm_sub_ps((__m128) arg1, (__m128) arg2); 620 } 621 622 //The total number of operations is 1.0 623 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fsub(bitblock128_t arg1, bitblock128_t arg2) 624 { 625 return (bitblock128_t) _mm_sub_pd((__m128d) arg1, (__m128d) arg2); 626 } 627 628 //The total number of operations is 1.0 629 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fsqrt(bitblock128_t arg1) 630 { 631 return (bitblock128_t)(_mm_sqrt_ps((__m128) arg1)); 632 } 633 634 //The total number of operations is 1.0 635 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fsqrt(bitblock128_t arg1) 636 { 637 return (bitblock128_t)(_mm_sqrt_pd((__m128d) arg1)); 638 } 639 640 //The total number of operations is 1.0 641 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::feq(bitblock128_t arg1, bitblock128_t arg2) 642 { 643 return (bitblock128_t) _mm_cmpeq_ss((__m128) arg1, (__m128) arg2); 644 } 645 646 //The total number of operations is 1.0 647 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::feq(bitblock128_t arg1, bitblock128_t arg2) 648 { 649 return (bitblock128_t) _mm_cmpeq_sd((__m128d) arg1, (__m128d) arg2); 650 } 651 652 //The total number of operations is 1.0 653 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::flt(bitblock128_t arg1, bitblock128_t arg2) 654 { 655 return (bitblock128_t) _mm_cmplt_ss((__m128) arg1, (__m128) arg2); 656 } 657 658 //The total number of operations is 1.0 659 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::flt(bitblock128_t arg1, bitblock128_t arg2) 660 { 661 return (bitblock128_t) _mm_cmplt_sd((__m128d) arg1, (__m128d) arg2); 662 } 663 664 //The total number of operations is 1.0 665 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::fle(bitblock128_t arg1, bitblock128_t arg2) 666 { 667 return (bitblock128_t) _mm_cmple_ss((__m128) arg1, (__m128) arg2); 668 } 669 670 //The total number of operations is 1.0 671 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::fle(bitblock128_t arg1, bitblock128_t arg2) 672 { 673 return (bitblock128_t) _mm_cmple_sd((__m128d) arg1, (__m128d) arg2); 674 } 675 676 677 678 679 529 680 const uint64_t highbit_64 = ((uint64_t) 1) << 63; 530 681 const uint32_t highbit_32 = 0x80000000;
Note: See TracChangeset
for help on using the changeset viewer.