source: trunk/lib_ir/s2p.h @ 4313

Last change on this file since 4313 was 4050, checked in by linmengl, 5 years ago

add p2s.ll file, p2s_step works now

File size: 2.8 KB
Line 
1#ifndef S2P_H
2#define S2P_H
3
4#include "ir_common.h"
5
6extern "C" {
7  void s2p_step_ir(BitBlock s0, BitBlock s1, BitBlock hi_mask, BitBlock shift_mask, BitBlock *p0, BitBlock *p1);
8
9  void s2p_bytepack_ir(BitBlock s0, BitBlock s1, BitBlock s2, BitBlock s3, BitBlock s4, BitBlock s5, BitBlock s6, BitBlock s7, BitBlock* p0, BitBlock* p1, BitBlock* p2, BitBlock* p3, BitBlock* p4, BitBlock* p5, BitBlock* p6, BitBlock* p7);
10
11  void s2p_ideal_ir(BitBlock s0, BitBlock s1, BitBlock s2, BitBlock s3, BitBlock s4, BitBlock s5, BitBlock s6, BitBlock s7, BitBlock *p0, BitBlock *p1, BitBlock *p2, BitBlock *p3, BitBlock *p4, BitBlock *p5, BitBlock *p6, BitBlock *p7);
12}
13
14//S2P_ALGORITHM:
15//s2p_bytepack_inline, written in pure IR in order to use immediate constants in shifting.
16//s2p_ideal: use 3 stage packh2,4,8 (only available on Haswell avx2)
17#define S2P_ALGORITHM s2p_bytepack_inline
18
19#define s2p_ideal(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7) \
20  s2p_ideal_ir(s0, s1, s2, s3, s4, s5, s6, s7, &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7)
21
22#define s2p_bytepack_inline(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7) \
23  s2p_bytepack_ir(s0, s1, s2, s3, s4, s5, s6, s7, &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7)
24
25//This macro is deprecated. Use inline version.
26#define s2p_bytepack(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7) \
27  do {\
28        BitBlock bit00224466_0,bit00224466_1,bit00224466_2,bit00224466_3;\
29        BitBlock bit11335577_0,bit11335577_1,bit11335577_2,bit11335577_3;\
30        BitBlock bit00004444_0,bit22226666_0,bit00004444_1,bit22226666_1;\
31        BitBlock bit11115555_0,bit33337777_0,bit11115555_1,bit33337777_1;\
32        s2p_step_ir(s0,s1,himask_2(),const16_1(),&bit00224466_0,&bit11335577_0);\
33        s2p_step_ir(s2,s3,himask_2(),const16_1(),&bit00224466_1,&bit11335577_1);\
34        s2p_step_ir(s4,s5,himask_2(),const16_1(),&bit00224466_2,&bit11335577_2);\
35        s2p_step_ir(s6,s7,himask_2(),const16_1(),&bit00224466_3,&bit11335577_3);\
36        s2p_step_ir(bit00224466_0,bit00224466_1,himask_4(),const16_2(),&bit00004444_0,&bit22226666_0);\
37        s2p_step_ir(bit00224466_2,bit00224466_3,himask_4(),const16_2(),&bit00004444_1,&bit22226666_1);\
38        s2p_step_ir(bit11335577_0,bit11335577_1,himask_4(),const16_2(),&bit11115555_0,&bit33337777_0);\
39        s2p_step_ir(bit11335577_2,bit11335577_3,himask_4(),const16_2(),&bit11115555_1,&bit33337777_1);\
40        s2p_step_ir(bit00004444_0,bit00004444_1,himask_8(),const16_4(),&p0,&p4);\
41        s2p_step_ir(bit11115555_0,bit11115555_1,himask_8(),const16_4(),&p1,&p5);\
42        s2p_step_ir(bit22226666_0,bit22226666_1,himask_8(),const16_4(),&p2,&p6);\
43        s2p_step_ir(bit33337777_0,bit33337777_1,himask_8(),const16_4(),&p3,&p7);\
44  } while(0)
45
46#define s2p(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7)\
47  S2P_ALGORITHM(s7, s6, s5, s4, s3, s2, s1, s0, p0, p1, p2, p3, p4, p5, p6, p7)
48
49#endif
Note: See TracBrowser for help on using the repository browser.