Changeset 4051 for trunk/lib_ir/p2s.h


Ignore:
Timestamp:
Aug 19, 2014, 9:15:30 PM (5 years ago)
Author:
linmengl
Message:

add pure ir p2s, get the same performance on SSE2 now

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib_ir/p2s.h

    r4050 r4051  
    66extern "C" {
    77  void p2s_step_ir(BitBlock p0, BitBlock p1, BitBlock hi_mask, BitBlock shift_mask, BitBlock *s0, BitBlock *s1);
     8
     9  void p2s_bytemerge_ir(BitBlock p0, BitBlock p1, BitBlock p2, BitBlock p3, BitBlock p4, BitBlock p5, BitBlock p6, BitBlock p7, BitBlock *s0, BitBlock *s1, BitBlock *s2, BitBlock *s3, BitBlock *s4, BitBlock *s5, BitBlock *s6, BitBlock *s7);
     10
    811}
    912
    10 #define p2s_bytemerge(p0,p1,p2,p3,p4,p5,p6,p7,s0,s1,s2,s3,s4,s5,s6,s7)  \
    11   do { \
    12         BitBlock bit00004444_0,bit22226666_0,bit00004444_1,bit22226666_1; \
    13         BitBlock bit11115555_0,bit33337777_0,bit11115555_1,bit33337777_1; \
    14         BitBlock bit00224466_0,bit00224466_1,bit00224466_2,bit00224466_3; \
    15         BitBlock bit11335577_0,bit11335577_1,bit11335577_2,bit11335577_3; \
    16         p2s_step_ir(p0,p4,himask_8(),const16_4(),&bit00004444_0,&bit00004444_1);  \
    17         p2s_step_ir(p1,p5,himask_8(),const16_4(),&bit11115555_0,&bit11115555_1);  \
    18         p2s_step_ir(p2,p6,himask_8(),const16_4(),&bit22226666_0,&bit22226666_1);  \
    19         p2s_step_ir(p3,p7,himask_8(),const16_4(),&bit33337777_0,&bit33337777_1);  \
    20         p2s_step_ir(bit00004444_0,bit22226666_0,himask_4(),const16_2(),&bit00224466_0,&bit00224466_1);  \
    21         p2s_step_ir(bit11115555_0,bit33337777_0,himask_4(),const16_2(),&bit11335577_0,&bit11335577_1);  \
    22         p2s_step_ir(bit00004444_1,bit22226666_1,himask_4(),const16_2(),&bit00224466_2,&bit00224466_3);  \
    23         p2s_step_ir(bit11115555_1,bit33337777_1,himask_4(),const16_2(),&bit11335577_2,&bit11335577_3);  \
    24         p2s_step_ir(bit00224466_0,bit11335577_0,himask_2(),const16_1(),&s0,&s1);  \
    25         p2s_step_ir(bit00224466_1,bit11335577_1,himask_2(),const16_1(),&s2,&s3);  \
    26         p2s_step_ir(bit00224466_2,bit11335577_2,himask_2(),const16_1(),&s4,&s5);  \
    27         p2s_step_ir(bit00224466_3,bit11335577_3,himask_2(),const16_1(),&s6,&s7);  \
    28   } while(0)
     13#define P2S_ALGORITHM p2s_bytemerge
    2914
     15#define p2s_bytemerge(p0, p1, p2, p3, p4, p5, p6, p7, s0, s1, s2, s3, s4, s5, s6, s7)\
     16  p2s_bytemerge_ir(p0, p1, p2, p3, p4, p5, p6, p7, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7)
     17
     18#define p2s(p0, p1, p2, p3, p4, p5, p6, p7, s0, s1, s2, s3, s4, s5, s6, s7)\
     19  P2S_ALGORITHM(p0, p1, p2, p3, p4, p5, p6, p7, s7, s6, s5, s4, s3, s2, s1, s0)
    3020#endif
Note: See TracChangeset for help on using the changeset viewer.