Changeset 2143 for trunk/lib/idisa_cpp


Ignore:
Timestamp:
May 23, 2012, 10:31:55 PM (7 years ago)
Author:
cameron
Message:

Fixed hand-coded bitblock::srl, sll

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_avx.cpp

    r2127 r2143  
    460460template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill(uint64_t val1);
    461461template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill(uint64_t val1);
     462template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::fill(uint64_t val1);
     463template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::fill(uint64_t val1);
     464template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::fill(uint64_t val1);
    462465template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<1>::extract(bitblock256_t arg1);
    463466template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<2>::extract(bitblock256_t arg1);
     
    487490template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
    488491template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
     492template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
    489493template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::srli(bitblock256_t arg1);
    490494template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::srli(bitblock256_t arg1);
     
    501505template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill2(uint64_t val1, uint64_t val2);
    502506template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill2(uint64_t val1, uint64_t val2);
     507template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::fill2(uint64_t val1, uint64_t val2);
     508template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::fill2(uint64_t val1, uint64_t val2);
    503509template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::dslli(bitblock256_t arg1, bitblock256_t arg2);
    504510template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::dslli(bitblock256_t arg1, bitblock256_t arg2);
     
    29272933}
    29282934
     2935//The total number of operations is 5.0
     2936template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::fill(uint64_t val1)
     2937{
     2938        return mvmd256<(32)>::fill2((val1>>(32)), (val1&((4294967296ULL)-1)));
     2939}
     2940
     2941//The total number of operations is 1.0
     2942template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::fill(uint64_t val1)
     2943{
     2944        return mvmd256<(64)>::fill2(0, val1);
     2945}
     2946
     2947//The total number of operations is 5.0
     2948template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::fill(uint64_t val1)
     2949{
     2950        return mvmd256<(128)>::fill2(0, val1);
     2951}
     2952
    29292953//The total number of operations is 1.5
    29302954template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<1>::extract(bitblock256_t arg1)
     
    30953119}
    30963120
     3121//The total number of operations is 5.0
     3122template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     3123{
     3124        return simd256<1>::ifh(simd256<(256)>::himask(), mvmd256<64>::fill2(val1, val2), mvmd256<64>::fill2(val3, val4));
     3125}
     3126
    30973127//The total number of operations is 14.5
    30983128template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::srli(bitblock256_t arg1)
     
    31793209}
    31803210
     3211//The total number of operations is 1.0
     3212template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::fill2(uint64_t val1, uint64_t val2)
     3213{
     3214        return mvmd256<(32)>::fill4(0, val1, 0, val2);
     3215}
     3216
     3217//The total number of operations is 5.0
     3218template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::fill2(uint64_t val1, uint64_t val2)
     3219{
     3220        return simd256<1>::ifh(simd256<(256)>::himask(), mvmd256<128>::fill(val1), mvmd256<128>::fill(val2));
     3221}
     3222
    31813223//The total number of operations is 29.5
    31823224template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::dslli(bitblock256_t arg1, bitblock256_t arg2)
     
    33693411        bitblock128_t s = avx_select_lo128(shft);
    33703412        bitblock128_t c128 = _mm_cvtsi32_si128(128);
    3371         bitblock128_t x = avx_select_lo128(r);
    3372         bitblock128_t y = avx_select_hi128(r);
    3373 
    3374         return
    3375         avx_general_combine256(
    3376            _mm_or_si128(
    3377                 _mm_or_si128(bitblock128::sll(x, s), bitblock128::sll(y, _mm_sub_epi32(s, c128))),
    3378                 bitblock128::srl(y, _mm_sub_epi32(c128, s))),
    3379         bitblock128::sll(y, s));
     3413        bitblock128_t x = avx_select_hi128(r);
     3414        bitblock128_t y = avx_select_lo128(r);
     3415
     3416        if (bitblock128::any(simd128<16>::srli<7>(s))) {
     3417          x = bitblock128::sll(y, _mm_sub_epi32(s, c128));
     3418          y = simd128<1>::constant<0>();
     3419        }
     3420        else {
     3421          x = simd_or(bitblock128::sll(x, s), bitblock128::srl(y, _mm_sub_epi32(c128, s)));
     3422          y = bitblock128::sll(y, s);
     3423        }
     3424        return avx_general_combine256(x, y);
    33803425}
    33813426
     
    33843429        bitblock128_t s = avx_select_lo128(shft);
    33853430        bitblock128_t c128 = _mm_cvtsi32_si128(128);
    3386         bitblock128_t x = avx_select_lo128(r);
    3387         bitblock128_t y = avx_select_hi128(r);
    3388 
    3389         return
    3390         avx_general_combine256(
    3391            bitblock128::srl(x, s),
    3392            _mm_or_si128(
    3393                 _mm_or_si128(bitblock128::srl(y, s), bitblock128::srl(x, _mm_sub_epi32(s, c128))),
    3394                 bitblock128::sll(x, _mm_sub_epi32(c128, s))));
    3395 }
     3431        bitblock128_t x = avx_select_hi128(r);
     3432        bitblock128_t y = avx_select_lo128(r);
     3433
     3434        if (bitblock128::any(simd128<16>::srli<7>(s))) {
     3435          y = bitblock128::srl(x, _mm_sub_epi32(s, c128));
     3436          x = simd128<1>::constant<0>();
     3437        }
     3438        else {
     3439          y = simd_or(bitblock128::srl(y, s), bitblock128::sll(x, _mm_sub_epi32(c128, s)));
     3440          x = bitblock128::srl(x, s);
     3441        }
     3442        return avx_general_combine256(x, y);
     3443}
     3444
    33963445
    33973446#endif
Note: See TracChangeset for help on using the changeset viewer.