source: proto/Compiler/template.c @ 392

Last change on this file since 392 was 392, checked in by ksherdy, 9 years ago

Remove macro depencies on local variables, mask_2, mask_4, mask_8.

File size: 6.2 KB
Line 
1#include <stdio.h>
2#include <stdlib.h>
3#include <errno.h>
4#include "sse_simd.h"
5
6
7typedef SIMD_type BytePack;
8typedef SIMD_type BitBlock;
9
10
11// Profiling
12
13#ifdef BUFFER_PROFILING
14#include "../Profiling/BOM_Profiler.c"
15BOM_Table * transcode_timer;
16#endif
17
18
19#define s2p_step(s0,s1,hi_mask,shift,p0,p1)  \
20{ \
21BitBlock t0,t1; \
22t0= simd_pack_16_hh(s0,s1) ; \
23t1= simd_pack_16_ll(s0,s1) ; \
24p0= simd_if(hi_mask,t0,simd_srli_16(t1,shift) ) ; \
25p1= simd_if(hi_mask,simd_slli_16(t0,shift) ,t1) ; \
26} \
27
28#define s2p_bytepack(s0,s1,s2,s3,s4,s5,s6,s7,p0,p1,p2,p3,p4,p5,p6,p7)  \
29{BitBlock bit00224466_0,bit00224466_1,bit00224466_2,bit00224466_3; \
30BitBlock bit11335577_0,bit11335577_1,bit11335577_2,bit11335577_3; \
31BitBlock bit00004444_0,bit22226666_0,bit00004444_1,bit22226666_1; \
32BitBlock bit11115555_0,bit33337777_0,bit11115555_1,bit33337777_1; \
33s2p_step(s0,s1,simd_himask_2,1,bit00224466_0,bit11335577_0)  \
34s2p_step(s2,s3,simd_himask_2,1,bit00224466_1,bit11335577_1)  \
35s2p_step(s4,s5,simd_himask_2,1,bit00224466_2,bit11335577_2)  \
36s2p_step(s6,s7,simd_himask_2,1,bit00224466_3,bit11335577_3)  \
37s2p_step(bit00224466_0,bit00224466_1,simd_himask_4,2,bit00004444_0,bit22226666_0)  \
38s2p_step(bit00224466_2,bit00224466_3,simd_himask_4,2,bit00004444_1,bit22226666_1)  \
39s2p_step(bit11335577_0,bit11335577_1,simd_himask_4,2,bit11115555_0,bit33337777_0)  \
40s2p_step(bit11335577_2,bit11335577_3,simd_himask_4,2,bit11115555_1,bit33337777_1)  \
41s2p_step(bit00004444_0,bit00004444_1,simd_himask_8,4,p0,p4)  \
42s2p_step(bit11115555_0,bit11115555_1,simd_himask_8,4,p1,p5)  \
43s2p_step(bit22226666_0,bit22226666_1,simd_himask_8,4,p2,p6)  \
44s2p_step(bit33337777_0,bit33337777_1,simd_himask_8,4,p3,p7)  \
45} \
46
47#define p2s_step(p0,p1,hi_mask,shift,s0,s1)  \
48{ \
49BitBlock t0,t1; \
50t0= simd_if(hi_mask,p0,simd_srli_16(p1,shift) ) ; \
51t1= simd_if(hi_mask,simd_slli_16(p0,shift) ,p1) ; \
52s0= simd_mergeh_8(t0,t1) ; \
53s1= simd_mergel_8(t0,t1) ; \
54} \
55
56#define p2s_bytemerge(p0,p1,p2,p3,p4,p5,p6,p7,s0,s1,s2,s3,s4,s5,s6,s7)  \
57{ \
58BitBlock bit00004444_0,bit22226666_0,bit00004444_1,bit22226666_1; \
59BitBlock bit11115555_0,bit33337777_0,bit11115555_1,bit33337777_1; \
60BitBlock bit00224466_0,bit00224466_1,bit00224466_2,bit00224466_3; \
61BitBlock bit11335577_0,bit11335577_1,bit11335577_2,bit11335577_3; \
62p2s_step(p0,p4,simd_himask_8,4,bit00004444_0,bit00004444_1)  \
63p2s_step(p1,p5,simd_himask_8,4,bit11115555_0,bit11115555_1)  \
64p2s_step(p2,p6,simd_himask_8,4,bit22226666_0,bit22226666_1)  \
65p2s_step(p3,p7,simd_himask_8,4,bit33337777_0,bit33337777_1)  \
66p2s_step(bit00004444_0,bit22226666_0,simd_himask_4,2,bit00224466_0,bit00224466_1)  \
67p2s_step(bit11115555_0,bit33337777_0,simd_himask_4,2,bit11335577_0,bit11335577_1)  \
68p2s_step(bit00004444_1,bit22226666_1,simd_himask_4,2,bit00224466_2,bit00224466_3)  \
69p2s_step(bit11115555_1,bit33337777_1,simd_himask_4,2,bit11335577_2,bit11335577_3)  \
70p2s_step(bit00224466_0,bit11335577_0,simd_himask_2,1,s0,s1)  \
71p2s_step(bit00224466_1,bit11335577_1,simd_himask_2,1,s2,s3)  \
72p2s_step(bit00224466_2,bit11335577_2,simd_himask_2,1,s4,s5)  \
73p2s_step(bit00224466_3,bit11335577_3,simd_himask_2,1,s6,s7)  \
74} \
75
76#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
77  __asm__  ("sahf\n\t" \
78        "adc %[e1], %[z1]\n\t" \
79        "adc %[e2], %[z2]\n\t" \
80        "lahf\n\t" \
81     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
82         : "[z1]" (x1), "[z2]" (x2), \
83           [e1] "r" (y1), [e2] "r" (y2), \
84           "[carryflag]" (carry) \
85         : "cc")
86
87#define adc128(first, second, carry, sum) \
88do\
89{\
90  union {__m128i bitblock;\
91         uint64_t int64[2];} rslt;\
92\
93  union {__m128i bitblock;\
94         uint64_t int64[2];} x;\
95\
96  union {__m128i bitblock;\
97         uint64_t int64[2];} y;\
98\
99  x.bitblock = first;\
100  y.bitblock = second;\
101\
102  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
103  sum = rslt.bitblock;\
104}while(0)
105
106
107
108#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \
109  __asm__  ("sahf\n\t" \
110        "sbb %[e1], %[z1]\n\t" \
111        "sbb %[e2], %[z2]\n\t" \
112        "lahf\n\t" \
113     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
114         : "[z1]" (x1), "[z2]" (x2), \
115           [e1] "r" (y1), [e2] "r" (y2), \
116           "[carryflag]" (carry) \
117         : "cc")
118
119#define sbb128(first, second, carry, sum) \
120do\
121{ union {__m128i bitblock;\
122         uint64_t int64[2];} rslt;\
123\
124  union {__m128i bitblock;\
125         uint64_t int64[2];} x;\
126\
127  union {__m128i bitblock;\
128         uint64_t int64[2];} y;\
129\
130  x.bitblock = first;\
131  y.bitblock = second;\
132\
133  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
134                   rslt.int64[0], rslt.int64[1], carry);\
135  sum = rslt.bitblock;\
136}while(0)
137
138       
139           
140#define BLOCK_SIZE 128
141
142void do_process(FILE *infile, FILE *outfile) {
143
144  @decl
145
146  BytePack U8[8];
147//  BitBlock u8bit[8];
148
149  int block_pos = 0;
150
151  int chars_read = fread(&U8[0], 1, BLOCK_SIZE, infile);
152  while (chars_read > 0) {
153    for (int i = chars_read;i < BLOCK_SIZE; i++) {
154       ((char *) &U8[0])[i] = 0;
155    }
156
157    s2p_bytepack(U8[7], U8[6], U8[5], U8[4], U8[3], U8[2], U8[1], U8[0],
158    array_u8bit__0_,array_u8bit__1_,array_u8bit__2_,array_u8bit__3_,array_u8bit__4_,array_u8bit__5_,array_u8bit__6_,array_u8bit__7_);
159
160    @stmts
161
162    if (bitblock_has_bit(error_mask)) {
163      int errpos = count_forward_zeroes(error_mask);
164      fprintf(stderr, "Error at byte #%i\n", block_pos + errpos);
165    }
166    block_pos += BLOCK_SIZE;
167    chars_read = fread(&U8[0], 1, BLOCK_SIZE, infile);
168  }
169
170  fclose(infile);
171  fclose(outfile);
172}
173
174
175
176
177int
178main(int argc, char * argv[]) {
179  if (argc < 2) {
180    printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
181          exit(-1);
182  }
183  char * filename = argv[1];
184#ifdef BUFFER_PROFILING
185  transcode_timer = init_BOM_timer(BUFFER_SIZE);
186#endif
187  FILE *infile, *outfile;
188  infile = fopen(filename, "rb");
189  if (!infile) {
190      fprintf(stderr, "Error: cannot open %s for input.\n", filename);
191      exit(-1);
192  }
193
194  if (argc < 3) outfile = stdout;
195  else {
196    outfile = fopen(argv[2], "wb");
197    if (!outfile) {
198      fprintf(stderr, "Error: cannot open %s for writing.\n", argv[2]);
199      exit(-1);
200    }
201  }
202
203  do_process(infile, outfile);
204
205#ifdef BUFFER_PROFILING
206  printf("Buffer conversion timing.\n");
207  dump_BOM_table(transcode_timer);
208#endif
209  return(0);
210}
Note: See TracBrowser for help on using the repository browser.