Changeset 4958


Ignore:
Timestamp:
Mar 7, 2016, 11:36:05 AM (4 years ago)
Author:
lindanl
Message:

optimize pack for block size of 64.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_i64_builder.cpp

    r4944 r4958  
    1313namespace IDISA {
    1414
    15 
     15/*
    1616Value * IDISA_I64_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {
    1717    Value * mask0 = getInt64(0xFF00000000000000);
     
    3838
    3939}
     40*/
     41
     42Value * IDISA_I64_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {
     43    Value * mask02 = getInt64(0xFF000000FF000000);
     44    Value * mask13 = getInt64(0x0000FF000000FF00);
     45
     46    Value * b1 = simd_or(simd_and(b, mask02), simd_slli(64, simd_and(b, mask13), 8));
     47    Value * a1 = simd_or(simd_and(a, mask02), simd_slli(64, simd_and(a, mask13), 8));
     48
     49    Value * mask01 = getInt64(0xFFFF000000000000);
     50    Value * mask23 = getInt64(0x00000000FFFF0000);
     51
     52    Value * b2 = simd_or(simd_and(b1, mask01), simd_slli(64, simd_and(b1, mask23), 16));
     53    Value * a2 = simd_or(simd_srli(64, simd_and(a1, mask01), 32), simd_srli(64, simd_and(a1, mask23), 16));
     54
     55    return simd_or(b2, a2);
     56}
     57
     58Value * IDISA_I64_Builder::hsimd_packl(unsigned fw, Value * a, Value * b) {
     59    Value * mask02 = getInt64(0x00FF000000FF0000);
     60    Value * mask13 = getInt64(0x000000FF000000FF);
     61
     62    Value * b1 = simd_or(simd_srli(64, simd_and(b, mask02), 8), simd_and(b, mask13));
     63    Value * a1 = simd_or(simd_srli(64, simd_and(a, mask02), 8), simd_and(a, mask13));
     64
     65    Value * mask01 = getInt64(0x0000FFFF00000000);
     66    Value * mask23 = getInt64(0x000000000000FFFF);
     67
     68    Value * b2 = simd_or(simd_slli(64, simd_and(b1, mask01), 16), simd_slli(64, simd_and(b1, mask23), 32));
     69    Value * a2 = simd_or(simd_srli(64, simd_and(a1, mask01), 16), simd_and(a1, mask23));
     70
     71    return simd_or(b2, a2);
     72}
    4073
    4174}
Note: See TracChangeset for help on using the changeset viewer.