source: trunk/lib_ir/s2p.h @ 3933

Last change on this file since 3933 was 3933, checked in by linmengl, 5 years ago

IR inline s2p_bytepack, get the same perf result of SSE2 now.

File size: 3.1 KB
Line 
1#ifndef S2P_H
2#define S2P_H
3
4#include "idisa.hpp"
5
6extern "C" {
7  //void s2p_ideal(BitBlock s0, BitBlock s1, BitBlock s2, BitBlock s3, BitBlock s4, BitBlock s5, BitBlock s6, BitBlock s7, BitBlock *p0, BitBlock *p1, BitBlock *p2, BitBlock *p3, BitBlock *p4, BitBlock *p5, BitBlock *p6, BitBlock *p7);
8
9  BitBlock packh_16(BitBlock a, BitBlock b);
10  BitBlock packl_16(BitBlock a, BitBlock b);
11
12  BitBlock packh_8(BitBlock a, BitBlock b);
13  BitBlock packl_8(BitBlock a, BitBlock b);
14
15  BitBlock packh_4(BitBlock a, BitBlock b);
16  BitBlock packl_4(BitBlock a, BitBlock b);
17
18  BitBlock ifh_1(BitBlock a, BitBlock b, BitBlock c);
19  BitBlock srli_16(BitBlock a, BitBlock shift_mask);
20  BitBlock slli_16(BitBlock a, BitBlock shift_mask);
21
22  void s2p_step_ir(BitBlock s0, BitBlock s1, BitBlock hi_mask, BitBlock shift_mask, BitBlock *p0, BitBlock *p1);
23
24  void s2p_bytepack_ir(BitBlock s0, BitBlock s1, BitBlock s2, BitBlock s3, BitBlock s4, BitBlock s5, BitBlock s6, BitBlock s7, BitBlock* p0, BitBlock* p1, BitBlock* p2, BitBlock* p3, BitBlock* p4, BitBlock* p5, BitBlock* p6, BitBlock* p7);
25
26  BitBlock const16_1();
27  BitBlock const16_2();
28  BitBlock const16_4();
29
30  BitBlock himask_2();
31  BitBlock himask_4();
32  BitBlock himask_8();
33}
34
35//S2P_ALGORITHM: s2p_bytepack_inline, written in pure IR in order to use
36//immediate constants in shifting.
37#define S2P_ALGORITHM s2p_bytepack_inline
38
39#define s2p_bytepack_inline(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7) \
40  s2p_bytepack_ir(s0, s1, s2, s3, s4, s5, s6, s7, &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7)
41
42//This macro is deprecated. Use inline version.
43#define s2p_bytepack(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7) \
44  do {\
45        BitBlock bit00224466_0,bit00224466_1,bit00224466_2,bit00224466_3;\
46        BitBlock bit11335577_0,bit11335577_1,bit11335577_2,bit11335577_3;\
47        BitBlock bit00004444_0,bit22226666_0,bit00004444_1,bit22226666_1;\
48        BitBlock bit11115555_0,bit33337777_0,bit11115555_1,bit33337777_1;\
49        s2p_step_ir(s0,s1,himask_2(),const16_1(),&bit00224466_0,&bit11335577_0);\
50        s2p_step_ir(s2,s3,himask_2(),const16_1(),&bit00224466_1,&bit11335577_1);\
51        s2p_step_ir(s4,s5,himask_2(),const16_1(),&bit00224466_2,&bit11335577_2);\
52        s2p_step_ir(s6,s7,himask_2(),const16_1(),&bit00224466_3,&bit11335577_3);\
53        s2p_step_ir(bit00224466_0,bit00224466_1,himask_4(),const16_2(),&bit00004444_0,&bit22226666_0);\
54        s2p_step_ir(bit00224466_2,bit00224466_3,himask_4(),const16_2(),&bit00004444_1,&bit22226666_1);\
55        s2p_step_ir(bit11335577_0,bit11335577_1,himask_4(),const16_2(),&bit11115555_0,&bit33337777_0);\
56        s2p_step_ir(bit11335577_2,bit11335577_3,himask_4(),const16_2(),&bit11115555_1,&bit33337777_1);\
57        s2p_step_ir(bit00004444_0,bit00004444_1,himask_8(),const16_4(),&p0,&p4);\
58        s2p_step_ir(bit11115555_0,bit11115555_1,himask_8(),const16_4(),&p1,&p5);\
59        s2p_step_ir(bit22226666_0,bit22226666_1,himask_8(),const16_4(),&p2,&p6);\
60        s2p_step_ir(bit33337777_0,bit33337777_1,himask_8(),const16_4(),&p3,&p7);\
61  } while(0)
62
63#define s2p(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7)\
64  S2P_ALGORITHM(s7, s6, s5, s4, s3, s2, s1, s0, p0, p1, p2, p3, p4, p5, p6, p7)
65
66#endif
Note: See TracBrowser for help on using the repository browser.