- Timestamp:
- Sep 11, 2013, 5:04:25 PM (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/lib/idisa_cpp/idisa_avx2.cpp
r3453 r3462 1153 1153 } 1154 1154 1155 //The total number of operations is 103.1666666671155 //The total number of operations is 62.1666666667 1156 1156 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::ctz(bitblock256_t arg1) 1157 1157 { … … 1177 1177 } 1178 1178 1179 //The total number of operations is 17.751179 //The total number of operations is 6.875 1180 1180 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::sll(bitblock256_t arg1, bitblock256_t shift_mask) 1181 1181 { … … 1415 1415 } 1416 1416 1417 //The total number of operations is 59.01417 //The total number of operations is 18.0 1418 1418 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::popcount(bitblock256_t arg1) 1419 1419 { 1420 bitblock256_t tmpAns = simd256<(128)>::popcount(arg1); 1421 return simd256<(128)>::add(simd_and(tmpAns, simd256<256>::lomask()), simd256<256>::srli<(128)>(tmpAns)); 1420 return _mm256_castsi128_si256(_mm_cvtsi64_si128((int64_t)(bitblock256::popcount(arg1)))); 1422 1421 } 1423 1422 … … 1750 1749 } 1751 1750 1752 //The total number of operations is 18.51751 //The total number of operations is 6.875 1753 1752 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srl(bitblock256_t arg1, bitblock256_t shift_mask) 1754 1753 { … … 2506 2505 //The total number of operations is 1.0 2507 2506 template <> IDISA_ALWAYS_INLINE FieldType<256/8>::T hsimd256<8>::signmask(bitblock256_t arg1) 2508 { 2507 { 2509 2508 return _mm256_movemask_epi8(arg1); 2510 2509 } … … 3091 3090 } 3092 3091 3093 //The total number of operations is 20.53092 //The total number of operations is 5.5 3094 3093 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::dsrli(bitblock256_t arg1, bitblock256_t arg2) 3095 3094 { … … 3097 3096 } 3098 3097 3099 //The total number of operations is 20.53098 //The total number of operations is 5.5 3100 3099 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::dsrli(bitblock256_t arg1, bitblock256_t arg2) 3101 3100 { … … 3103 3102 } 3104 3103 3105 //The total number of operations is 20.53104 //The total number of operations is 5.5 3106 3105 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::dsrli(bitblock256_t arg1, bitblock256_t arg2) 3107 3106 { … … 3361 3360 } 3362 3361 3363 //The total number of operations is 10.03362 //The total number of operations is 2.25 3364 3363 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::srli(bitblock256_t arg1) 3365 3364 { 3366 return simd256<256>::srli<(sh*64)>(arg1);3367 } 3368 3369 //The total number of operations is 10.03365 return ((sh == 3) ? simd_and(_mm256_set_epi64x((int64_t)(0), (int64_t)(0), (int64_t)(0), (int64_t)(-1)), _mm256_permute4x64_epi64(arg1, (int32_t)(3))) : ((sh == 2) ? simd_and(_mm256_set_epi64x((int64_t)(0), (int64_t)(0), (int64_t)(-1), (int64_t)(-1)), _mm256_permute4x64_epi64(arg1, (int32_t)(14))) : ((sh == 1) ? simd_and(_mm256_set_epi64x((int64_t)(0), (int64_t)(-1), (int64_t)(-1), (int64_t)(-1)), _mm256_permute4x64_epi64(arg1, (int32_t)(57))) : ((sh == 0) ? arg1 : simd256<32>::constant<0>())))); 3366 } 3367 3368 //The total number of operations is 2.25 3370 3369 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::srli(bitblock256_t arg1) 3371 3370 { 3372 return simd256<256>::srli<(sh*128)>(arg1);3373 } 3374 3375 //The total number of operations is 10.03371 return mvmd256<(64)>::srli<(sh*2)>(arg1); 3372 } 3373 3374 //The total number of operations is 2.25 3376 3375 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::srli(bitblock256_t arg1) 3377 3376 { 3378 return simd256<256>::srli<(sh*256)>(arg1);3377 return mvmd256<(128)>::srli<(sh*2)>(arg1); 3379 3378 } 3380 3379 … … 3457 3456 } 3458 3457 3459 //The total number of operations is 20.53458 //The total number of operations is 5.5 3460 3459 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::dslli(bitblock256_t arg1, bitblock256_t arg2) 3461 3460 { … … 3463 3462 } 3464 3463 3465 //The total number of operations is 20.53464 //The total number of operations is 5.5 3466 3465 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::dslli(bitblock256_t arg1, bitblock256_t arg2) 3467 3466 { … … 3469 3468 } 3470 3469 3471 //The total number of operations is 20.53470 //The total number of operations is 5.5 3472 3471 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::dslli(bitblock256_t arg1, bitblock256_t arg2) 3473 3472 { … … 3505 3504 } 3506 3505 3507 //The total number of operations is 9.53506 //The total number of operations is 2.25 3508 3507 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::slli(bitblock256_t arg1) 3509 3508 { 3510 return simd256<256>::slli<(sh*64)>(arg1);3511 } 3512 3513 //The total number of operations is 9.53509 return ((sh == 1) ? simd_and(_mm256_set_epi64x((int64_t)(-1), (int64_t)(-1), (int64_t)(-1), (int64_t)(0)), _mm256_permute4x64_epi64(arg1, (int32_t)((144)))) : ((sh == 2) ? simd_and(_mm256_set_epi64x((int64_t)(-1), (int64_t)(-1), (int64_t)(0), (int64_t)(0)), _mm256_permute4x64_epi64(arg1, (int32_t)(64))) : ((sh == 3) ? simd_and(_mm256_set_epi64x((int64_t)(-1), (int64_t)(0), (int64_t)(0), (int64_t)(0)), _mm256_permute4x64_epi64(arg1, (int32_t)(0))) : ((sh == 0) ? arg1 : simd256<32>::constant<0>())))); 3510 } 3511 3512 //The total number of operations is 2.25 3514 3513 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::slli(bitblock256_t arg1) 3515 3514 { 3516 return simd256<256>::slli<(sh*128)>(arg1);3517 } 3518 3519 //The total number of operations is 9.53515 return mvmd256<(64)>::slli<(sh*2)>(arg1); 3516 } 3517 3518 //The total number of operations is 2.25 3520 3519 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::slli(bitblock256_t arg1) 3521 3520 { 3522 return simd256<256>::slli<(sh*256)>(arg1);3521 return mvmd256<(128)>::slli<(sh*2)>(arg1); 3523 3522 } 3524 3523 … … 3559 3558 } 3560 3559 3561 //The total number of operations is 17.753560 //The total number of operations is 6.875 3562 3561 IDISA_ALWAYS_INLINE bitblock256_t bitblock256::sll(bitblock256_t arg1, bitblock256_t arg2) 3563 3562 { … … 3577 3576 } 3578 3577 3579 //The total number of operations is 62.03578 //The total number of operations is 16.0 3580 3579 IDISA_ALWAYS_INLINE uint16_t bitblock256::popcount(bitblock256_t arg1) 3581 3580 { 3582 return mvmd256<64>::extract<0>(simd256<256>::popcount(arg1));3581 return (((__builtin_popcountll((uint64_t)(mvmd256<64>::extract<0>(arg1)))+__builtin_popcountll((uint64_t)(mvmd256<64>::extract<1>(arg1))))+__builtin_popcountll((uint64_t)(mvmd256<64>::extract<2>(arg1))))+__builtin_popcountll((uint64_t)(mvmd256<64>::extract<3>(arg1)))); 3583 3582 } 3584 3583 … … 3589 3588 } 3590 3589 3591 //The total number of operations is 18.53590 //The total number of operations is 6.875 3592 3591 IDISA_ALWAYS_INLINE bitblock256_t bitblock256::srl(bitblock256_t arg1, bitblock256_t arg2) 3593 3592 {
Note: See TracChangeset
for help on using the changeset viewer.