 Timestamp:
 May 23, 2012, 10:31:55 PM (7 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/idisa_cpp/idisa_avx.cpp
r2127 r2143 460 460 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill(uint64_t val1); 461 461 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill(uint64_t val1); 462 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::fill(uint64_t val1); 463 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::fill(uint64_t val1); 464 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::fill(uint64_t val1); 462 465 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<1>::extract(bitblock256_t arg1); 463 466 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<2>::extract(bitblock256_t arg1); … … 487 490 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4); 488 491 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4); 492 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4); 489 493 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::srli(bitblock256_t arg1); 490 494 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::srli(bitblock256_t arg1); … … 501 505 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill2(uint64_t val1, uint64_t val2); 502 506 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill2(uint64_t val1, uint64_t val2); 507 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::fill2(uint64_t val1, uint64_t val2); 508 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::fill2(uint64_t val1, uint64_t val2); 503 509 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::dslli(bitblock256_t arg1, bitblock256_t arg2); 504 510 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::dslli(bitblock256_t arg1, bitblock256_t arg2); … … 2927 2933 } 2928 2934 2935 //The total number of operations is 5.0 2936 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::fill(uint64_t val1) 2937 { 2938 return mvmd256<(32)>::fill2((val1>>(32)), (val1&((4294967296ULL)1))); 2939 } 2940 2941 //The total number of operations is 1.0 2942 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::fill(uint64_t val1) 2943 { 2944 return mvmd256<(64)>::fill2(0, val1); 2945 } 2946 2947 //The total number of operations is 5.0 2948 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::fill(uint64_t val1) 2949 { 2950 return mvmd256<(128)>::fill2(0, val1); 2951 } 2952 2929 2953 //The total number of operations is 1.5 2930 2954 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<1>::extract(bitblock256_t arg1) … … 3095 3119 } 3096 3120 3121 //The total number of operations is 5.0 3122 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4) 3123 { 3124 return simd256<1>::ifh(simd256<(256)>::himask(), mvmd256<64>::fill2(val1, val2), mvmd256<64>::fill2(val3, val4)); 3125 } 3126 3097 3127 //The total number of operations is 14.5 3098 3128 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::srli(bitblock256_t arg1) … … 3179 3209 } 3180 3210 3211 //The total number of operations is 1.0 3212 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::fill2(uint64_t val1, uint64_t val2) 3213 { 3214 return mvmd256<(32)>::fill4(0, val1, 0, val2); 3215 } 3216 3217 //The total number of operations is 5.0 3218 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::fill2(uint64_t val1, uint64_t val2) 3219 { 3220 return simd256<1>::ifh(simd256<(256)>::himask(), mvmd256<128>::fill(val1), mvmd256<128>::fill(val2)); 3221 } 3222 3181 3223 //The total number of operations is 29.5 3182 3224 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::dslli(bitblock256_t arg1, bitblock256_t arg2) … … 3369 3411 bitblock128_t s = avx_select_lo128(shft); 3370 3412 bitblock128_t c128 = _mm_cvtsi32_si128(128); 3371 bitblock128_t x = avx_select_lo128(r); 3372 bitblock128_t y = avx_select_hi128(r); 3373 3374 return 3375 avx_general_combine256( 3376 _mm_or_si128( 3377 _mm_or_si128(bitblock128::sll(x, s), bitblock128::sll(y, _mm_sub_epi32(s, c128))), 3378 bitblock128::srl(y, _mm_sub_epi32(c128, s))), 3379 bitblock128::sll(y, s)); 3413 bitblock128_t x = avx_select_hi128(r); 3414 bitblock128_t y = avx_select_lo128(r); 3415 3416 if (bitblock128::any(simd128<16>::srli<7>(s))) { 3417 x = bitblock128::sll(y, _mm_sub_epi32(s, c128)); 3418 y = simd128<1>::constant<0>(); 3419 } 3420 else { 3421 x = simd_or(bitblock128::sll(x, s), bitblock128::srl(y, _mm_sub_epi32(c128, s))); 3422 y = bitblock128::sll(y, s); 3423 } 3424 return avx_general_combine256(x, y); 3380 3425 } 3381 3426 … … 3384 3429 bitblock128_t s = avx_select_lo128(shft); 3385 3430 bitblock128_t c128 = _mm_cvtsi32_si128(128); 3386 bitblock128_t x = avx_select_lo128(r); 3387 bitblock128_t y = avx_select_hi128(r); 3388 3389 return 3390 avx_general_combine256( 3391 bitblock128::srl(x, s), 3392 _mm_or_si128( 3393 _mm_or_si128(bitblock128::srl(y, s), bitblock128::srl(x, _mm_sub_epi32(s, c128))), 3394 bitblock128::sll(x, _mm_sub_epi32(c128, s)))); 3395 } 3431 bitblock128_t x = avx_select_hi128(r); 3432 bitblock128_t y = avx_select_lo128(r); 3433 3434 if (bitblock128::any(simd128<16>::srli<7>(s))) { 3435 y = bitblock128::srl(x, _mm_sub_epi32(s, c128)); 3436 x = simd128<1>::constant<0>(); 3437 } 3438 else { 3439 y = simd_or(bitblock128::srl(y, s), bitblock128::sll(x, _mm_sub_epi32(c128, s))); 3440 x = bitblock128::srl(x, s); 3441 } 3442 return avx_general_combine256(x, y); 3443 } 3444 3396 3445 3397 3446 #endif
Note: See TracChangeset
for help on using the changeset viewer.