source: proto/u16u8/src/u16u8.c @ 501

Last change on this file since 501 was 501, checked in by ksherdy, 9 years ago

Update svn:exterals

File size: 20.8 KB
Line 
1#include <stdio.h>
2#include <stdlib.h>
3#include <errno.h>
4#include <stdint.h>
5#include <string.h>
6#include <sys/types.h>
7#include <sys/stat.h>
8
9#include "../lib/lib_simd.h"
10#include "../lib/block_carry.h"
11
12typedef SIMD_type BytePack;
13typedef SIMD_type BitBlock;
14
15
16#include "../lib/perflib/perfsec.h"
17
18#ifdef BUFFER_PROFILING
19        BOM_Table * transcode_timer;
20
21#elif CODE_CLOCKER
22        #define NUM_EVENTS 1
23        int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
24        //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
25        //int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
26        int cal_size = 20;
27        CC * transcode_timer = new CC(Events,NUM_EVENTS,cal_size);
28#else
29        void * transcode_timer;
30#endif
31/*===========================================================================*/
32/* UErrorCode */
33/* Extracted from ICU */
34/*===========================================================================*/
35
36typedef enum UErrorCode {
37
38    U_ZERO_ERROR              =  0,     /**< No error, no warning. */
39    U_TRUNCATED_CHAR_FOUND    = 11,     /**< Character conversion: Incomplete input sequence. */
40    U_ILLEGAL_CHAR_FOUND      = 12,     /**< Character conversion: Illegal input sequence/combination of input units. */
41} UErrorCode;
42
43typedef  uint16_t UChar;
44
45#define s2p_step(s0,s1,hi_mask,shift,p0,p1)  \
46{ \
47BitBlock t0,t1; \
48t0= simd_pack_16_hh(s0,s1) ; \
49t1= simd_pack_16_ll(s0,s1) ; \
50p0= simd_if(hi_mask,t0,simd_srli_16(t1,shift) ) ; \
51p1= simd_if(hi_mask,simd_slli_16(t0,shift) ,t1) ; \
52} \
53
54#define s2p_bytepack(s0,s1,s2,s3,s4,s5,s6,s7,p0,p1,p2,p3,p4,p5,p6,p7)  \
55{BitBlock bit00224466_0,bit00224466_1,bit00224466_2,bit00224466_3; \
56BitBlock bit11335577_0,bit11335577_1,bit11335577_2,bit11335577_3; \
57BitBlock bit00004444_0,bit22226666_0,bit00004444_1,bit22226666_1; \
58BitBlock bit11115555_0,bit33337777_0,bit11115555_1,bit33337777_1; \
59s2p_step(s0,s1,simd_himask_2,1,bit00224466_0,bit11335577_0)  \
60s2p_step(s2,s3,simd_himask_2,1,bit00224466_1,bit11335577_1)  \
61s2p_step(s4,s5,simd_himask_2,1,bit00224466_2,bit11335577_2)  \
62s2p_step(s6,s7,simd_himask_2,1,bit00224466_3,bit11335577_3)  \
63s2p_step(bit00224466_0,bit00224466_1,simd_himask_4,2,bit00004444_0,bit22226666_0)  \
64s2p_step(bit00224466_2,bit00224466_3,simd_himask_4,2,bit00004444_1,bit22226666_1)  \
65s2p_step(bit11335577_0,bit11335577_1,simd_himask_4,2,bit11115555_0,bit33337777_0)  \
66s2p_step(bit11335577_2,bit11335577_3,simd_himask_4,2,bit11115555_1,bit33337777_1)  \
67s2p_step(bit00004444_0,bit00004444_1,simd_himask_8,4,p0,p4)  \
68s2p_step(bit11115555_0,bit11115555_1,simd_himask_8,4,p1,p5)  \
69s2p_step(bit22226666_0,bit22226666_1,simd_himask_8,4,p2,p6)  \
70s2p_step(bit33337777_0,bit33337777_1,simd_himask_8,4,p3,p7)  \
71} \
72
73#define p2s_step(p0,p1,hi_mask,shift,s0,s1)  \
74{ \
75BitBlock t0,t1; \
76t0= simd_if(hi_mask,p0,simd_srli_16(p1,shift) ) ; \
77t1= simd_if(hi_mask,simd_slli_16(p0,shift) ,p1) ; \
78s0= simd_mergeh_8(t0,t1) ; \
79s1= simd_mergel_8(t0,t1) ; \
80} \
81
82#define p2s_bytemerge(p0,p1,p2,p3,p4,p5,p6,p7,s0,s1,s2,s3,s4,s5,s6,s7)  \
83{ \
84BitBlock bit00004444_0,bit22226666_0,bit00004444_1,bit22226666_1; \
85BitBlock bit11115555_0,bit33337777_0,bit11115555_1,bit33337777_1; \
86BitBlock bit00224466_0,bit00224466_1,bit00224466_2,bit00224466_3; \
87BitBlock bit11335577_0,bit11335577_1,bit11335577_2,bit11335577_3; \
88p2s_step(p0,p4,simd_himask_8,4,bit00004444_0,bit00004444_1)  \
89p2s_step(p1,p5,simd_himask_8,4,bit11115555_0,bit11115555_1)  \
90p2s_step(p2,p6,simd_himask_8,4,bit22226666_0,bit22226666_1)  \
91p2s_step(p3,p7,simd_himask_8,4,bit33337777_0,bit33337777_1)  \
92p2s_step(bit00004444_0,bit22226666_0,simd_himask_4,2,bit00224466_0,bit00224466_1)  \
93p2s_step(bit11115555_0,bit33337777_0,simd_himask_4,2,bit11335577_0,bit11335577_1)  \
94p2s_step(bit00004444_1,bit22226666_1,simd_himask_4,2,bit00224466_2,bit00224466_3)  \
95p2s_step(bit11115555_1,bit33337777_1,simd_himask_4,2,bit11335577_2,bit11335577_3)  \
96p2s_step(bit00224466_0,bit11335577_0,simd_himask_2,1,s0,s1)  \
97p2s_step(bit00224466_1,bit11335577_1,simd_himask_2,1,s2,s3)  \
98p2s_step(bit00224466_2,bit11335577_2,simd_himask_2,1,s4,s5)  \
99p2s_step(bit00224466_3,bit11335577_3,simd_himask_2,1,s6,s7)  \
100} \
101
102
103static inline void interleave4(BitBlock p0, BitBlock p1, BitBlock p2, BitBlock p3, 
104                               BitBlock& s0, BitBlock& s1, BitBlock& s2, BitBlock& s3);
105
106static inline void interleave4(BitBlock p0, BitBlock p1, BitBlock p2, BitBlock p3, 
107                               BitBlock& s0, BitBlock& s1, BitBlock& s2, BitBlock& s3){
108        BitBlock hpair0 = simd_mergel_1(p1, p0);
109        BitBlock hpair1 = simd_mergeh_1(p1, p0);
110        BitBlock lpair0 = simd_mergel_1(p3, p2);
111        BitBlock lpair1 = simd_mergeh_1(p3, p2);
112        s0 = simd_mergel_2(lpair0,hpair0);
113        s1 = simd_mergeh_2(lpair0,hpair0);
114        s2 = simd_mergel_2(lpair1,hpair1);
115        s3 = simd_mergeh_2(lpair1,hpair1);
116}
117
118
119
120
121#define do_right16_shifts(vec,rshift1,rshift2,rshift4,rshift8)  \
122{BitBlock s2; \
123vec= simd_sub_16(vec,simd_srli_16(simd_and(rshift1,vec) ,1) ) ; \
124s2= simd_and(rshift2,vec) ; \
125vec= simd_or(simd_srli_16(s2,2) ,simd_xor(vec,s2) ) ; \
126s2= simd_and(rshift4,vec) ; \
127vec= simd_or(simd_srli_16(s2,4) ,simd_xor(vec,s2) ) ; \
128s2= simd_and(rshift8,vec) ; \
129vec= simd_or(simd_srli_16(s2,8) ,simd_xor(vec,s2) ) ; \
130}
131
132static inline void bit_del_16(BitBlock &s, BitBlock delmask){
133        BitBlock del16_rshift1;
134        BitBlock del16_rshift2;
135        BitBlock del16_rshift4;
136        BitBlock del16_rshift8;
137       
138        BitBlock del16_trans2;
139        BitBlock del16_trans4;
140        BitBlock del16_trans8;
141        BitBlock shift_bits;
142       
143        del16_rshift1= simd_xor(simd_slli_16(delmask,1),simd_slli_16(delmask,2));
144        del16_rshift1= simd_xor(del16_rshift1,simd_slli_16(del16_rshift1,2));
145        del16_rshift1= simd_xor(del16_rshift1,simd_slli_16(del16_rshift1,4));
146        del16_rshift1= simd_xor(del16_rshift1,simd_slli_16(del16_rshift1,8));
147       
148        del16_trans2= simd_and(del16_rshift1,delmask);
149        del16_rshift2= simd_xor(simd_slli_16(del16_trans2,1),simd_slli_16(del16_trans2,2));
150        del16_rshift2= simd_xor(del16_rshift2,simd_slli_16(del16_rshift2,2));
151        del16_rshift2= simd_xor(del16_rshift2,simd_slli_16(del16_rshift2,4));
152        del16_rshift2= simd_xor(del16_rshift2,simd_slli_16(del16_rshift2,8));
153       
154        del16_trans4= simd_and(del16_rshift2,del16_trans2);
155        del16_rshift4= simd_xor(simd_slli_16(del16_trans4,1),simd_slli_16(del16_trans4,2));
156        del16_rshift4= simd_xor(del16_rshift4,simd_slli_16(del16_rshift4,2));
157        del16_rshift4= simd_xor(del16_rshift4,simd_slli_16(del16_rshift4,4));
158        del16_rshift4= simd_xor(del16_rshift4,simd_slli_16(del16_rshift4,8));
159       
160        del16_trans8= simd_and(del16_rshift4,del16_trans4);
161        del16_rshift8= simd_xor(simd_slli_16(del16_trans8,1),simd_slli_16(del16_trans8,2));
162        del16_rshift8= simd_xor(del16_rshift8,simd_slli_16(del16_rshift8,2));
163        del16_rshift8= simd_xor(del16_rshift8,simd_slli_16(del16_rshift8,4));
164        del16_rshift8= simd_xor(del16_rshift8,simd_slli_16(del16_rshift8,8));
165       
166        del16_rshift1= simd_andc(del16_rshift1,delmask);
167        del16_rshift2= simd_andc(del16_rshift2,delmask);
168        del16_rshift4= simd_andc(del16_rshift4,delmask);
169        del16_rshift8= simd_andc(del16_rshift8,delmask);
170       
171        del16_rshift2= simd_sub_16(del16_rshift2,simd_srli_16(simd_and(del16_rshift1,del16_rshift2),1));
172       
173        del16_rshift4= simd_sub_16(del16_rshift4,simd_srli_16(simd_and(del16_rshift1,del16_rshift4),1));
174        shift_bits= simd_and(del16_rshift2,del16_rshift4);
175        del16_rshift4= simd_or(simd_srli_16(shift_bits,2),simd_xor(del16_rshift4,shift_bits));
176       
177        del16_rshift8= simd_sub_16(del16_rshift8,simd_srli_16(simd_and(del16_rshift1,del16_rshift8),1));
178        shift_bits= simd_and(del16_rshift2,del16_rshift8);
179        del16_rshift8= simd_or(simd_srli_16(shift_bits,2),simd_xor(del16_rshift8,shift_bits)); 
180        shift_bits= simd_and(del16_rshift4,del16_rshift8);
181        del16_rshift8= simd_or(simd_srli_16(shift_bits,4),simd_xor(del16_rshift8,shift_bits));
182       
183        s = simd_andc(s, delmask);
184        do_right16_shifts (s, del16_rshift1, del16_rshift2, del16_rshift4,  del16_rshift8)
185
186}
187
188
189static inline void del_count(BitBlock delmask, short *u8_bytes_per_reg){
190
191BitBlock delcounts_2;
192BitBlock delcounts_4;
193BitBlock delcounts_8;
194BitBlock delcounts_16;
195
196delcounts_2 = simd_add_2_lh(delmask, delmask);
197delcounts_4 = simd_add_4_lh(delcounts_2, delcounts_2);
198delcounts_8 = simd_add_8_lh(delcounts_4, delcounts_4);
199delcounts_16 = simd_add_16_lh(delcounts_8, delcounts_8);
200sisd_store_aligned(simd_sub_16(simd_const_16(16), delcounts_16), (BytePack *) &u8_bytes_per_reg[0]);
201}
202
203
204
205#define BLOCK_SIZE 128
206
207void u16u8(char ** targetbuf, const char * targetlimit, const UChar ** srcbuf, const UChar * srclimit, UErrorCode * err){
208
209  BitBlock hcarry;
210BitBlock array_u16l__4_;
211BitBlock array_u16h__4_;
212BitBlock array_u8_butlast__5_;
213BitBlock strct_u16__utf8_3_;
214BitBlock array_u8_butlast__2_;
215BitBlock array_u8_last__5_;
216BitBlock Temp37;
217BitBlock array_u16l__0_;
218BitBlock efghi_e;
219BitBlock strct_u16__ASCII_;
220BitBlock array_u8_last__1_;
221BitBlock array_u8_pre__3_;
222BitBlock Temp31;
223BitBlock Temp30;
224BitBlock Temp33;
225BitBlock Temp32;
226BitBlock Temp35;
227BitBlock Temp34;
228BitBlock array_u8_butlast__6_;
229BitBlock Temp36;
230BitBlock array_u16h__0_;
231BitBlock array_u16l__1_;
232BitBlock strct_u16__surrogate_;
233BitBlock array_u8_last__0_;
234BitBlock array_u16l__7_;
235BitBlock array_u8_last__4_;
236BitBlock gcarry;
237BitBlock array_u8_butlast__3_;
238BitBlock strct_u16__error_;
239BitBlock array_u8_pre__7_;
240BitBlock strct_u16__lsurrogate_;
241BitBlock array_u8_butlast__7_;
242BitBlock array_u16h__7_;
243BitBlock array_u8_pre__4_;
244BitBlock Temp17;
245BitBlock array_u16h__3_;
246BitBlock array_u8_pre__0_;
247BitBlock array_u16l__3_;
248BitBlock array_u8_last__3_;
249BitBlock array_u16l__6_;
250BitBlock u8_2or3;
251BitBlock array_u16h__6_;
252BitBlock Temp13;
253BitBlock Temp12;
254BitBlock Temp11;
255BitBlock Temp10;
256BitBlock array_u8_butlast__4_;
257BitBlock Temp16;
258BitBlock Temp15;
259BitBlock Temp14;
260BitBlock array_u8_butlast__0_;
261BitBlock array_u8_last__7_;
262BitBlock Temp19;
263BitBlock Temp18;
264BitBlock array_u16l__2_;
265BitBlock array_u8_pre__1_;
266BitBlock array_u16l__5_;
267BitBlock array_u8_pre__5_;
268BitBlock array_u16h__2_;
269BitBlock Temp5;
270BitBlock array_u8_last__2_;
271BitBlock array_u16h__5_;
272BitBlock above_0x7F;
273BitBlock above_0x7FF;
274BitBlock strct_u16__hsurrogate_;
275BitBlock efghi_i;
276BitBlock efghi_h;
277BitBlock efghi_g;
278BitBlock efghi_f;
279BitBlock array_u8_last__6_;
280BitBlock array_u8_butlast__1_;
281BitBlock AllOne = simd_const_1(1);
282BitBlock AllZero = simd_const_1(0);
283BitBlock Temp28;
284BitBlock Temp29;
285BitBlock array_u8_pre__2_;
286BitBlock Temp22;
287BitBlock Temp23;
288BitBlock Temp20;
289BitBlock Temp21;
290BitBlock Temp26;
291BitBlock strct_u16__utf8_2_;
292BitBlock Temp24;
293BitBlock Temp25;
294BitBlock Temp3;
295BitBlock Temp2;
296BitBlock Temp1;
297BitBlock Temp27;
298BitBlock Temp7;
299BitBlock Temp6;
300BitBlock array_u16h__1_;
301BitBlock Temp4;
302BitBlock array_u8_pre__6_;
303BitBlock Temp9;
304BitBlock Temp8;
305CarryType carry1 = Carry0;
306CarryType carry0 = Carry0;
307CarryType carry2 = Carry0;
308
309
310SIMD_type u16l [8];
311SIMD_type u16h [8];
312
313
314  BytePack * U16;
315  BytePack U16h[8];
316  BytePack U16l[8];
317
318  BitBlock doublepair[32];
319
320  BitBlock endmask;
321
322  BitBlock delmask[4];
323
324  BitBlock U8[32];
325
326  int target_bckup_steps = 0;
327  int pos = 0;
328
329  intptr_t  units_remaining  = 0;
330  intptr_t  units_read  = 0;
331
332  bool error_found = false;
333  int err_pos;
334
335 
336
337  do {
338   
339    PERF_SEC_START(transcode_timer);
340
341    U16 = (BytePack *)(*srcbuf);
342    units_remaining = srclimit-(*srcbuf);
343
344    if(units_remaining < BLOCK_SIZE){
345        endmask = sisd_sll(simd_const_1(1),sisd_from_int((int) units_remaining));
346        units_read = units_remaining;
347
348//      ((uint16_t *) U16)[units_read/2] = 0;
349    }
350    else {
351        endmask = simd_const_1(0);
352        units_read = BLOCK_SIZE;
353    }
354
355    for (int i=0; i< 8; i++){
356      U16h[i] = simd_pack_16_ll(sisd_load_unaligned(&U16[i*2+1]),sisd_load_unaligned(&U16[i*2]));
357      U16l[i] = simd_pack_16_hh(sisd_load_unaligned(&U16[i*2+1]),sisd_load_unaligned(&U16[i*2]));
358    }
359
360
361    s2p_bytepack(U16h[7], U16h[6], U16h[5], U16h[4], U16h[3], U16h[2], U16h[1], U16h[0],
362    u16h[0], u16h[1], u16h[2], u16h[3], u16h[4], u16h[5], u16h[6], u16h[7]);
363
364    s2p_bytepack(U16l[7], U16l[6], U16l[5], U16l[4], U16l[3], U16l[2], U16l[1], U16l[0],
365    u16l[0], u16l[1], u16l[2], u16l[3], u16l[4], u16l[5], u16l[6], u16l[7]);
366
367array_u16l__0_ = u16l [0];
368array_u16l__1_ = u16l [1];
369array_u16l__2_ = u16l [2];
370array_u16l__3_ = u16l [3];
371array_u16l__4_ = u16l [4];
372array_u16l__5_ = u16l [5];
373array_u16l__6_ = u16l [6];
374array_u16l__7_ = u16l [7];
375
376array_u16h__0_ = u16h [0];
377array_u16h__1_ = u16h [1];
378array_u16h__2_ = u16h [2];
379array_u16h__3_ = u16h [3];
380array_u16h__4_ = u16h [4];
381array_u16h__5_ = u16h [5];
382array_u16h__6_ = u16h [6];
383array_u16h__7_ = u16h [7];
384
385    Temp1 = simd_or(array_u16h__0_,array_u16h__1_);
386Temp2 = simd_or(array_u16h__2_,array_u16h__3_);
387Temp3 = simd_or(Temp1,Temp2);
388above_0x7FF = simd_or(Temp3,array_u16h__4_);
389Temp4 = simd_or(above_0x7FF,array_u16h__5_);
390Temp5 = simd_or(array_u16h__6_,array_u16h__7_);
391Temp6 = simd_or(Temp4,Temp5);
392above_0x7F = simd_or(Temp6,array_u16l__0_);
393Temp7 = simd_and(array_u16h__0_,array_u16h__1_);
394Temp8 = simd_andc(array_u16h__3_,array_u16h__2_);
395Temp9 = simd_and(Temp7,Temp8);
396strct_u16__surrogate_ = simd_and(Temp9,array_u16h__4_);
397strct_u16__hsurrogate_ = simd_andc(strct_u16__surrogate_,array_u16h__5_);
398strct_u16__lsurrogate_ = simd_and(strct_u16__surrogate_,array_u16h__5_);
399strct_u16__utf8_3_ = simd_andc(above_0x7FF,strct_u16__surrogate_);
400strct_u16__utf8_2_ = simd_andc(above_0x7F,above_0x7FF);
401strct_u16__ASCII_ = simd_andc(AllOne,above_0x7F);
402advance_with_carry(strct_u16__hsurrogate_, carry0, Temp10);
403strct_u16__error_ = simd_xor(Temp10,strct_u16__lsurrogate_);
404efghi_i = simd_andc(AllOne,array_u16l__1_);
405efghi_h = simd_xor(array_u16l__0_,array_u16l__1_);
406hcarry = simd_andc(array_u16l__0_,efghi_h);
407efghi_g = simd_xor(array_u16h__7_,hcarry);
408gcarry = simd_andc(array_u16h__7_,efghi_g);
409efghi_f = simd_xor(array_u16h__6_,gcarry);
410efghi_e = simd_andc(array_u16h__6_,efghi_f);
411array_u8_last__0_ = simd_andc(AllOne,strct_u16__ASCII_);
412array_u8_last__1_ = simd_and(strct_u16__ASCII_,array_u16l__1_);
413Temp11 = simd_andc(array_u16l__2_,strct_u16__hsurrogate_);
414Temp12 = simd_and(efghi_h,strct_u16__hsurrogate_);
415array_u8_last__2_ = simd_or(Temp11,Temp12);
416Temp13 = simd_andc(array_u16l__3_,strct_u16__hsurrogate_);
417Temp14 = simd_and(efghi_i,strct_u16__hsurrogate_);
418array_u8_last__3_ = simd_or(Temp13,Temp14);
419Temp15 = simd_andc(array_u16l__4_,strct_u16__hsurrogate_);
420Temp16 = simd_and(array_u16l__2_,strct_u16__hsurrogate_);
421array_u8_last__4_ = simd_or(Temp15,Temp16);
422Temp17 = simd_andc(array_u16l__5_,strct_u16__hsurrogate_);
423Temp18 = simd_and(array_u16l__3_,strct_u16__hsurrogate_);
424array_u8_last__5_ = simd_or(Temp17,Temp18);
425Temp19 = simd_andc(array_u16l__6_,strct_u16__hsurrogate_);
426Temp20 = simd_and(array_u16l__4_,strct_u16__hsurrogate_);
427array_u8_last__6_ = simd_or(Temp19,Temp20);
428Temp21 = simd_andc(array_u16l__7_,strct_u16__hsurrogate_);
429Temp22 = simd_and(array_u16l__5_,strct_u16__hsurrogate_);
430array_u8_last__7_ = simd_or(Temp21,Temp22);
431u8_2or3 = simd_or(strct_u16__utf8_2_,strct_u16__utf8_3_);
432array_u8_butlast__0_ = simd_andc(AllOne,strct_u16__ASCII_);
433array_u8_butlast__1_ = simd_or(strct_u16__utf8_2_,strct_u16__hsurrogate_);
434Temp23 = simd_and(strct_u16__utf8_3_,array_u16h__4_);
435Temp24 = simd_or(strct_u16__hsurrogate_,Temp23);
436advance_with_carry(array_u16l__6_, carry1, Temp25);
437Temp26 = simd_and(strct_u16__lsurrogate_,Temp25);
438array_u8_butlast__2_ = simd_or(Temp24,Temp26);
439Temp27 = simd_and(u8_2or3,array_u16h__5_);
440Temp28 = simd_or(strct_u16__hsurrogate_,Temp27);
441advance_with_carry(array_u16l__7_, carry2, Temp29);
442Temp30 = simd_and(strct_u16__lsurrogate_,Temp29);
443array_u8_butlast__3_ = simd_or(Temp28,Temp30);
444Temp31 = simd_or(strct_u16__hsurrogate_,strct_u16__ASCII_);
445array_u8_butlast__4_ = simd_andc(array_u16h__6_,Temp31);
446Temp32 = simd_andc(array_u16h__7_,Temp31);
447Temp33 = simd_and(efghi_e,strct_u16__hsurrogate_);
448array_u8_butlast__5_ = simd_or(Temp32,Temp33);
449Temp34 = simd_andc(array_u16l__0_,Temp31);
450Temp35 = simd_and(efghi_f,strct_u16__hsurrogate_);
451array_u8_butlast__6_ = simd_or(Temp34,Temp35);
452Temp36 = simd_andc(array_u16l__1_,Temp31);
453Temp37 = simd_and(efghi_g,strct_u16__hsurrogate_);
454array_u8_butlast__7_ = simd_or(Temp36,Temp37);
455array_u8_pre__0_ = simd_andc(above_0x7FF,strct_u16__surrogate_);
456array_u8_pre__1_ = simd_andc(above_0x7FF,strct_u16__surrogate_);
457array_u8_pre__2_ = simd_andc(above_0x7FF,strct_u16__surrogate_);
458array_u8_pre__3_ = AllZero;
459array_u8_pre__4_ = simd_and(strct_u16__utf8_3_,array_u16h__0_);
460array_u8_pre__5_ = simd_and(strct_u16__utf8_3_,array_u16h__1_);
461array_u8_pre__6_ = simd_and(strct_u16__utf8_3_,array_u16h__2_);
462array_u8_pre__7_ = simd_and(strct_u16__utf8_3_,array_u16h__3_);
463
464
465/* Convert flattened array values to arrays */
466SIMD_type u8_pre [8];
467u8_pre[0] = array_u8_pre__0_;
468u8_pre[1] = array_u8_pre__1_;
469u8_pre[2] = array_u8_pre__2_;
470u8_pre[3] = array_u8_pre__3_;
471u8_pre[4] = array_u8_pre__4_;
472u8_pre[5] = array_u8_pre__5_;
473u8_pre[6] = array_u8_pre__6_;
474u8_pre[7] = array_u8_pre__7_;
475
476SIMD_type u8_butlast [8];
477u8_butlast[0] = array_u8_butlast__0_;
478u8_butlast[1] = array_u8_butlast__1_;
479u8_butlast[2] = array_u8_butlast__2_;
480u8_butlast[3] = array_u8_butlast__3_;
481u8_butlast[4] = array_u8_butlast__4_;
482u8_butlast[5] = array_u8_butlast__5_;
483u8_butlast[6] = array_u8_butlast__6_;
484u8_butlast[7] = array_u8_butlast__7_;
485
486SIMD_type u8_last [8];
487u8_last[0] = array_u8_last__0_;
488u8_last[1] = array_u8_last__1_;
489u8_last[2] = array_u8_last__2_;
490u8_last[3] = array_u8_last__3_;
491u8_last[4] = array_u8_last__4_;
492u8_last[5] = array_u8_last__5_;
493u8_last[6] = array_u8_last__6_;
494u8_last[7] = array_u8_last__7_;
495
496
497    if (bitblock_has_bit(strct_u16__error_)) {
498       
499       err_pos = count_forward_zeroes(strct_u16__error_);
500       error_found = true;
501        if ((err_pos == units_read)) {
502                err_pos--;
503                *err = U_TRUNCATED_CHAR_FOUND;
504                (*srcbuf) += err_pos;
505                if(err_pos<0){
506                        target_bckup_steps = 2;
507                        (*targetbuf) -= target_bckup_steps;
508                        return;
509                }               
510        }
511        else {
512          if((((unsigned char *)U16h)[err_pos]< 0xDC) || (((unsigned char *)U16h)[err_pos] >= 0xE0))
513                err_pos--;
514                *err = U_ILLEGAL_CHAR_FOUND;
515                (*srcbuf) += err_pos;
516                if(err_pos<0){
517                        target_bckup_steps = 2;
518                        (*targetbuf) -= target_bckup_steps;
519                        return;
520                }
521        }
522        endmask = sisd_sll(simd_const_1(1),sisd_from_int(err_pos));
523    }
524
525    for (int i=0; i< 8; i++){
526        interleave4(simd_const_8(0), u8_pre[i], u8_butlast[i], u8_last[i],
527                                doublepair[i*4], doublepair[i*4+1], 
528                                doublepair[i*4+2], doublepair[i*4+3]);
529    }
530
531        interleave4(simd_const_1(1), simd_or(endmask, simd_not(strct_u16__utf8_3_)), 
532                                simd_or(endmask, strct_u16__ASCII_), endmask,
533                                delmask[0], delmask[1], delmask[2], delmask[3]);
534
535
536     for (int i=0; i< 32; i=i+4){
537        bit_del_16(doublepair[i],delmask[0]);
538        bit_del_16(doublepair[i+1],delmask[1]);
539        bit_del_16(doublepair[i+2],delmask[2]);
540        bit_del_16(doublepair[i+3],delmask[3]);
541     }
542
543    short u8_bytes_per_reg[8];
544
545    for (int i=0; i< 4; i++){
546      p2s_bytemerge(doublepair[i],doublepair[4+i],doublepair[8+i],doublepair[12+i],doublepair[16+i],doublepair[20+i],doublepair[24+i],doublepair[28+i],
547                        U8[i*8+7],U8[i*8+6],U8[i*8+5],U8[i*8+4],U8[i*8+3],U8[i*8+2],U8[i*8+1],U8[i*8]);
548       for(int k=0; k<8; k++) u8_bytes_per_reg[k] = 0;
549       del_count(delmask[i],u8_bytes_per_reg);
550 
551       for(int j=0; j<8; j++){
552        sisd_store_unaligned(U8[i*8+j],(SIMD_type *) (*targetbuf));
553        *targetbuf += u8_bytes_per_reg[j];
554       }
555    }
556
557    if(!error_found) (*srcbuf) += units_read;
558
559
560    PERF_SEC_END(transcode_timer, units_read*2);
561
562
563    } while ((units_read == BLOCK_SIZE) && !error_found);
564
565}
566
567
568
569
570int
571main(int argc, char * argv[]) {
572        char * infilename, * outfilename;       
573        FILE *infile, *outfile;
574        struct stat fileinfo;
575        const UChar * srcbuf;
576        const UChar * srclimit;
577        const char * targetlimit;
578        char * targetbuf;
579        char * target_head;
580        UErrorCode status;
581        int chars_read;
582
583        if (argc < 2) {
584                printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
585                exit(-1);
586        }
587
588        infilename = argv[1];
589        stat(infilename, &fileinfo);
590        infile = fopen(infilename, "rb");
591        if (!infile) {
592                fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
593                exit(-1);
594        }
595       
596        if (argc < 3) outfile = stdout;
597        else {
598                outfilename = argv[2];
599                outfile = fopen(outfilename, "wb");
600                if (!outfile) {
601                        fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
602                        exit(-1);
603                }
604        }
605
606
607        srcbuf = (UChar *) malloc(fileinfo.st_size+1);
608        if (!srcbuf) {
609              fprintf(stderr, "Error: buffer for %s of size %zu cannot be created.\n", infilename, fileinfo.st_size+1);
610              exit(-1);
611        }
612
613        PERF_SEC_INIT(transcode_timer);
614
615
616        chars_read = fread((void *)srcbuf, 1, fileinfo.st_size, infile);
617
618        srclimit = srcbuf + chars_read/2;
619//      (*srcbuf)[fileinfo.st_size] = '\0';
620        fclose(infile);
621
622        targetbuf = (char *) malloc(chars_read*2);
623        targetlimit = targetbuf + chars_read*2;
624        target_head = targetbuf;
625
626        u16u8(&targetbuf, targetlimit, &srcbuf, srclimit, &status);
627        fwrite(target_head , 1 ,  targetbuf - target_head, outfile );
628
629        if (status == U_ILLEGAL_CHAR_FOUND) {
630                fprintf(stderr, "Illegal UTF-16 sequence at position %lu in source.\n", chars_read-(srclimit-srcbuf)*2);
631        }
632        else if (status == U_TRUNCATED_CHAR_FOUND) {
633                fprintf(stderr, "EOF with incomplete UTF-16 sequence at position %lu in source.\n",chars_read-(srclimit-srcbuf)*2);
634        }
635
636        PERF_SEC_DUMP(transcode_timer);
637
638        PERF_SEC_DESTROY(transcode_timer);
639
640        fclose(outfile);
641
642        return(0);
643}
Note: See TracBrowser for help on using the repository browser.