source: proto/u16u8/src/u16u8.c @ 470

Last change on this file since 470 was 470, checked in by cameron, 9 years ago

Move u16u8.c into src subdirectory

File size: 21.8 KB
Line 
1#include <stdio.h>
2#include <stdlib.h>
3#include <errno.h>
4#include <stdint.h>
5#include <string.h>
6#include <sys/types.h>
7#include <sys/stat.h>
8#include "lib/lib_simd.h"
9#define min(x,y) ((x) <(y) ?(x) :(y) )  \
10
11typedef SIMD_type BytePack;
12typedef SIMD_type BitBlock;
13
14
15// Profiling
16
17#ifdef BUFFER_PROFILING
18#include "../Profiling/BOM_Profiler.c"
19BOM_Table * transcode_timer;
20#endif
21
22/*===========================================================================*/
23/* UErrorCode */
24/* Extracted from ICU */
25/*===========================================================================*/
26
27typedef enum UErrorCode {
28
29    U_ZERO_ERROR              =  0,     /**< No error, no warning. */
30    U_TRUNCATED_CHAR_FOUND    = 11,     /**< Character conversion: Incomplete input sequence. */
31    U_ILLEGAL_CHAR_FOUND      = 12,     /**< Character conversion: Illegal input sequence/combination of input units. */
32} UErrorCode;
33
34typedef  uint16_t UChar;
35
36#define s2p_step(s0,s1,hi_mask,shift,p0,p1)  \
37{ \
38BitBlock t0,t1; \
39t0= simd_pack_16_hh(s0,s1) ; \
40t1= simd_pack_16_ll(s0,s1) ; \
41p0= simd_if(hi_mask,t0,simd_srli_16(t1,shift) ) ; \
42p1= simd_if(hi_mask,simd_slli_16(t0,shift) ,t1) ; \
43} \
44
45#define s2p_bytepack(s0,s1,s2,s3,s4,s5,s6,s7,p0,p1,p2,p3,p4,p5,p6,p7)  \
46{BitBlock bit00224466_0,bit00224466_1,bit00224466_2,bit00224466_3; \
47BitBlock bit11335577_0,bit11335577_1,bit11335577_2,bit11335577_3; \
48BitBlock bit00004444_0,bit22226666_0,bit00004444_1,bit22226666_1; \
49BitBlock bit11115555_0,bit33337777_0,bit11115555_1,bit33337777_1; \
50s2p_step(s0,s1,simd_himask_2,1,bit00224466_0,bit11335577_0)  \
51s2p_step(s2,s3,simd_himask_2,1,bit00224466_1,bit11335577_1)  \
52s2p_step(s4,s5,simd_himask_2,1,bit00224466_2,bit11335577_2)  \
53s2p_step(s6,s7,simd_himask_2,1,bit00224466_3,bit11335577_3)  \
54s2p_step(bit00224466_0,bit00224466_1,simd_himask_4,2,bit00004444_0,bit22226666_0)  \
55s2p_step(bit00224466_2,bit00224466_3,simd_himask_4,2,bit00004444_1,bit22226666_1)  \
56s2p_step(bit11335577_0,bit11335577_1,simd_himask_4,2,bit11115555_0,bit33337777_0)  \
57s2p_step(bit11335577_2,bit11335577_3,simd_himask_4,2,bit11115555_1,bit33337777_1)  \
58s2p_step(bit00004444_0,bit00004444_1,simd_himask_8,4,p0,p4)  \
59s2p_step(bit11115555_0,bit11115555_1,simd_himask_8,4,p1,p5)  \
60s2p_step(bit22226666_0,bit22226666_1,simd_himask_8,4,p2,p6)  \
61s2p_step(bit33337777_0,bit33337777_1,simd_himask_8,4,p3,p7)  \
62} \
63
64#define p2s_step(p0,p1,hi_mask,shift,s0,s1)  \
65{ \
66BitBlock t0,t1; \
67t0= simd_if(hi_mask,p0,simd_srli_16(p1,shift) ) ; \
68t1= simd_if(hi_mask,simd_slli_16(p0,shift) ,p1) ; \
69s0= simd_mergeh_8(t0,t1) ; \
70s1= simd_mergel_8(t0,t1) ; \
71} \
72
73#define p2s_bytemerge(p0,p1,p2,p3,p4,p5,p6,p7,s0,s1,s2,s3,s4,s5,s6,s7)  \
74{ \
75BitBlock bit00004444_0,bit22226666_0,bit00004444_1,bit22226666_1; \
76BitBlock bit11115555_0,bit33337777_0,bit11115555_1,bit33337777_1; \
77BitBlock bit00224466_0,bit00224466_1,bit00224466_2,bit00224466_3; \
78BitBlock bit11335577_0,bit11335577_1,bit11335577_2,bit11335577_3; \
79p2s_step(p0,p4,simd_himask_8,4,bit00004444_0,bit00004444_1)  \
80p2s_step(p1,p5,simd_himask_8,4,bit11115555_0,bit11115555_1)  \
81p2s_step(p2,p6,simd_himask_8,4,bit22226666_0,bit22226666_1)  \
82p2s_step(p3,p7,simd_himask_8,4,bit33337777_0,bit33337777_1)  \
83p2s_step(bit00004444_0,bit22226666_0,simd_himask_4,2,bit00224466_0,bit00224466_1)  \
84p2s_step(bit11115555_0,bit33337777_0,simd_himask_4,2,bit11335577_0,bit11335577_1)  \
85p2s_step(bit00004444_1,bit22226666_1,simd_himask_4,2,bit00224466_2,bit00224466_3)  \
86p2s_step(bit11115555_1,bit33337777_1,simd_himask_4,2,bit11335577_2,bit11335577_3)  \
87p2s_step(bit00224466_0,bit11335577_0,simd_himask_2,1,s0,s1)  \
88p2s_step(bit00224466_1,bit11335577_1,simd_himask_2,1,s2,s3)  \
89p2s_step(bit00224466_2,bit11335577_2,simd_himask_2,1,s4,s5)  \
90p2s_step(bit00224466_3,bit11335577_3,simd_himask_2,1,s6,s7)  \
91} \
92
93
94static inline void interleave4(BitBlock p0, BitBlock p1, BitBlock p2, BitBlock p3, 
95                               BitBlock& s0, BitBlock& s1, BitBlock& s2, BitBlock& s3);
96
97static inline void interleave4(BitBlock p0, BitBlock p1, BitBlock p2, BitBlock p3, 
98                               BitBlock& s0, BitBlock& s1, BitBlock& s2, BitBlock& s3){
99        BitBlock hpair0 = simd_mergel_1(p1, p0);
100        BitBlock hpair1 = simd_mergeh_1(p1, p0);
101        BitBlock lpair0 = simd_mergel_1(p3, p2);
102        BitBlock lpair1 = simd_mergeh_1(p3, p2);
103        s0 = simd_mergel_2(lpair0,hpair0);
104        s1 = simd_mergeh_2(lpair0,hpair0);
105        s2 = simd_mergel_2(lpair1,hpair1);
106        s3 = simd_mergeh_2(lpair1,hpair1);
107}
108
109
110
111
112#define do_right16_shifts(vec,rshift1,rshift2,rshift4,rshift8)  \
113{BitBlock s2; \
114vec= simd_sub_16(vec,simd_srli_16(simd_and(rshift1,vec) ,1) ) ; \
115s2= simd_and(rshift2,vec) ; \
116vec= simd_or(simd_srli_16(s2,2) ,simd_xor(vec,s2) ) ; \
117s2= simd_and(rshift4,vec) ; \
118vec= simd_or(simd_srli_16(s2,4) ,simd_xor(vec,s2) ) ; \
119s2= simd_and(rshift8,vec) ; \
120vec= simd_or(simd_srli_16(s2,8) ,simd_xor(vec,s2) ) ; \
121}
122
123static inline void bit_del_16(BitBlock &s, BitBlock delmask){
124        BitBlock del16_rshift1;
125        BitBlock del16_rshift2;
126        BitBlock del16_rshift4;
127        BitBlock del16_rshift8;
128       
129        BitBlock del16_trans2;
130        BitBlock del16_trans4;
131        BitBlock del16_trans8;
132        BitBlock shift_bits;
133       
134        del16_rshift1= simd_xor(simd_slli_16(delmask,1),simd_slli_16(delmask,2));
135        del16_rshift1= simd_xor(del16_rshift1,simd_slli_16(del16_rshift1,2));
136        del16_rshift1= simd_xor(del16_rshift1,simd_slli_16(del16_rshift1,4));
137        del16_rshift1= simd_xor(del16_rshift1,simd_slli_16(del16_rshift1,8));
138       
139        del16_trans2= simd_and(del16_rshift1,delmask);
140        del16_rshift2= simd_xor(simd_slli_16(del16_trans2,1),simd_slli_16(del16_trans2,2));
141        del16_rshift2= simd_xor(del16_rshift2,simd_slli_16(del16_rshift2,2));
142        del16_rshift2= simd_xor(del16_rshift2,simd_slli_16(del16_rshift2,4));
143        del16_rshift2= simd_xor(del16_rshift2,simd_slli_16(del16_rshift2,8));
144       
145        del16_trans4= simd_and(del16_rshift2,del16_trans2);
146        del16_rshift4= simd_xor(simd_slli_16(del16_trans4,1),simd_slli_16(del16_trans4,2));
147        del16_rshift4= simd_xor(del16_rshift4,simd_slli_16(del16_rshift4,2));
148        del16_rshift4= simd_xor(del16_rshift4,simd_slli_16(del16_rshift4,4));
149        del16_rshift4= simd_xor(del16_rshift4,simd_slli_16(del16_rshift4,8));
150       
151        del16_trans8= simd_and(del16_rshift4,del16_trans4);
152        del16_rshift8= simd_xor(simd_slli_16(del16_trans8,1),simd_slli_16(del16_trans8,2));
153        del16_rshift8= simd_xor(del16_rshift8,simd_slli_16(del16_rshift8,2));
154        del16_rshift8= simd_xor(del16_rshift8,simd_slli_16(del16_rshift8,4));
155        del16_rshift8= simd_xor(del16_rshift8,simd_slli_16(del16_rshift8,8));
156       
157        del16_rshift1= simd_andc(del16_rshift1,delmask);
158        del16_rshift2= simd_andc(del16_rshift2,delmask);
159        del16_rshift4= simd_andc(del16_rshift4,delmask);
160        del16_rshift8= simd_andc(del16_rshift8,delmask);
161       
162        del16_rshift2= simd_sub_16(del16_rshift2,simd_srli_16(simd_and(del16_rshift1,del16_rshift2),1));
163       
164        del16_rshift4= simd_sub_16(del16_rshift4,simd_srli_16(simd_and(del16_rshift1,del16_rshift4),1));
165        shift_bits= simd_and(del16_rshift2,del16_rshift4);
166        del16_rshift4= simd_or(simd_srli_16(shift_bits,2),simd_xor(del16_rshift4,shift_bits));
167       
168        del16_rshift8= simd_sub_16(del16_rshift8,simd_srli_16(simd_and(del16_rshift1,del16_rshift8),1));
169        shift_bits= simd_and(del16_rshift2,del16_rshift8);
170        del16_rshift8= simd_or(simd_srli_16(shift_bits,2),simd_xor(del16_rshift8,shift_bits)); 
171        shift_bits= simd_and(del16_rshift4,del16_rshift8);
172        del16_rshift8= simd_or(simd_srli_16(shift_bits,4),simd_xor(del16_rshift8,shift_bits));
173       
174        s = simd_andc(s, delmask);
175        do_right16_shifts (s, del16_rshift1, del16_rshift2, del16_rshift4,  del16_rshift8)
176
177}
178
179
180static inline void del_count(BitBlock delmask, short *u8_bytes_per_reg){
181
182BitBlock delcounts_2;
183BitBlock delcounts_4;
184BitBlock delcounts_8;
185BitBlock delcounts_16;
186
187delcounts_2 = simd_add_2_lh(delmask, delmask);
188delcounts_4 = simd_add_4_lh(delcounts_2, delcounts_2);
189delcounts_8 = simd_add_8_lh(delcounts_4, delcounts_4);
190delcounts_16 = simd_add_16_lh(delcounts_8, delcounts_8);
191sisd_store_aligned(simd_sub_16(simd_const_16(16), delcounts_16), (BytePack *) &u8_bytes_per_reg[0]);
192}
193
194#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
195  __asm__  ("sahf\n\t" \
196        "adc %[e1], %[z1]\n\t" \
197        "adc %[e2], %[z2]\n\t" \
198        "lahf\n\t" \
199     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
200         : "[z1]" (x1), "[z2]" (x2), \
201           [e1] "r" (y1), [e2] "r" (y2), \
202           "[carryflag]" (carry) \
203         : "cc")
204
205#define adc128(first, second, carry, sum) \
206do\
207{\
208  union {__m128i bitblock;\
209         uint64_t int64[2];} rslt;\
210\
211  union {__m128i bitblock;\
212         uint64_t int64[2];} x;\
213\
214  union {__m128i bitblock;\
215         uint64_t int64[2];} y;\
216\
217  x.bitblock = first;\
218  y.bitblock = second;\
219\
220  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
221  sum = rslt.bitblock;\
222}while(0)
223
224#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \
225  __asm__  ("sahf\n\t" \
226        "sbb %[e1], %[z1]\n\t" \
227        "sbb %[e2], %[z2]\n\t" \
228        "lahf\n\t" \
229     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
230         : "[z1]" (x1), "[z2]" (x2), \
231           [e1] "r" (y1), [e2] "r" (y2), \
232           "[carryflag]" (carry) \
233         : "cc")
234
235
236#define sbb128(first, second, carry, sum) \
237do\
238{ union {__m128i bitblock;\
239         uint64_t int64[2];} rslt;\
240\
241  union {__m128i bitblock;\
242         uint64_t int64[2];} x;\
243\
244  union {__m128i bitblock;\
245         uint64_t int64[2];} y;\
246\
247  x.bitblock = first;\
248  y.bitblock = second;\
249\
250  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
251                   rslt.int64[0], rslt.int64[1], carry);\
252  sum = rslt.bitblock;\
253}while(0)
254
255
256#define BLOCK_SIZE 128
257
258void u16u8(char ** targetbuf, const char * targetlimit, const UChar ** srcbuf, const UChar * srclimit, UErrorCode * err){
259
260  int carry1=0;
261int carry0=0;
262int carry2=0;
263BitBlock hcarry;
264BitBlock array_u16l__4_;
265BitBlock array_u16h__4_;
266BitBlock array_u8_butlast__5_;
267BitBlock strct_u16__utf8_3_;
268BitBlock array_u8_butlast__2_;
269BitBlock array_u8_last__5_;
270BitBlock Temp37;
271BitBlock array_u16l__0_;
272BitBlock efghi_e;
273BitBlock strct_u16__ASCII_;
274BitBlock array_u8_last__1_;
275BitBlock array_u8_pre__3_;
276BitBlock Temp31;
277BitBlock Temp30;
278BitBlock Temp33;
279BitBlock Temp32;
280BitBlock Temp35;
281BitBlock Temp34;
282BitBlock array_u8_butlast__6_;
283BitBlock Temp36;
284BitBlock array_u16h__0_;
285BitBlock array_u16l__1_;
286BitBlock strct_u16__surrogate_;
287BitBlock array_u8_last__0_;
288BitBlock array_u16l__7_;
289BitBlock array_u8_last__4_;
290BitBlock gcarry;
291BitBlock array_u8_butlast__3_;
292BitBlock strct_u16__error_;
293BitBlock array_u8_pre__7_;
294BitBlock strct_u16__lsurrogate_;
295BitBlock array_u8_butlast__7_;
296BitBlock array_u16h__7_;
297BitBlock array_u8_pre__4_;
298BitBlock Temp17;
299BitBlock array_u16h__3_;
300BitBlock array_u8_pre__0_;
301BitBlock array_u16l__3_;
302BitBlock array_u8_last__3_;
303BitBlock array_u16l__6_;
304BitBlock u8_2or3;
305BitBlock array_u16h__6_;
306BitBlock Temp13;
307BitBlock Temp12;
308BitBlock Temp11;
309BitBlock Temp10;
310BitBlock array_u8_butlast__4_;
311BitBlock Temp16;
312BitBlock Temp15;
313BitBlock Temp14;
314BitBlock array_u8_butlast__0_;
315BitBlock array_u8_last__7_;
316BitBlock Temp19;
317BitBlock Temp18;
318BitBlock array_u16l__2_;
319BitBlock array_u8_pre__1_;
320BitBlock array_u16l__5_;
321BitBlock array_u8_pre__5_;
322BitBlock array_u16h__2_;
323BitBlock Temp5;
324BitBlock array_u8_last__2_;
325BitBlock array_u16h__5_;
326BitBlock above_0x7F;
327BitBlock above_0x7FF;
328BitBlock strct_u16__hsurrogate_;
329BitBlock efghi_i;
330BitBlock efghi_h;
331BitBlock efghi_g;
332BitBlock efghi_f;
333BitBlock array_u8_last__6_;
334BitBlock array_u8_butlast__1_;
335BitBlock AllOne = simd_const_1(1);
336BitBlock AllZero = simd_const_1(0);
337BitBlock Temp28;
338BitBlock Temp29;
339BitBlock array_u8_pre__2_;
340BitBlock Temp22;
341BitBlock Temp23;
342BitBlock Temp20;
343BitBlock Temp21;
344BitBlock Temp26;
345BitBlock strct_u16__utf8_2_;
346BitBlock Temp24;
347BitBlock Temp25;
348BitBlock Temp3;
349BitBlock Temp2;
350BitBlock Temp1;
351BitBlock Temp27;
352BitBlock Temp7;
353BitBlock Temp6;
354BitBlock array_u16h__1_;
355BitBlock Temp4;
356BitBlock array_u8_pre__6_;
357BitBlock Temp9;
358BitBlock Temp8;
359
360
361SIMD_type u16l [8];
362SIMD_type u16h [8];
363
364
365  BytePack * U16;
366  BytePack U16h[8];
367  BytePack U16l[8];
368
369  BitBlock doublepair[32];
370
371  BitBlock endmask;
372
373  BitBlock delmask[4];
374
375  BitBlock U8[32];
376
377  int target_bckup_steps = 0;
378  int pos = 0;
379
380  int  units_read  = 0;
381
382  bool error_found = false;
383  int err_pos;
384
385
386
387  do {
388
389    U16 = (BytePack *)(*srcbuf);
390    units_read = min(srclimit-(*srcbuf),BLOCK_SIZE);
391
392    if(units_read < BLOCK_SIZE){
393        endmask = sisd_sll(simd_const_1(1),sisd_from_int(units_read));
394//      ((uint16_t *) U16)[units_read/2] = 0;
395    }
396    else endmask = simd_const_1(0);
397
398
399    for (int i=0; i< 8; i++){
400      U16h[i] = simd_pack_16_ll(sisd_load_unaligned(&U16[i*2+1]),sisd_load_unaligned(&U16[i*2]));
401      U16l[i] = simd_pack_16_hh(sisd_load_unaligned(&U16[i*2+1]),sisd_load_unaligned(&U16[i*2]));
402    }
403
404
405    s2p_bytepack(U16h[7], U16h[6], U16h[5], U16h[4], U16h[3], U16h[2], U16h[1], U16h[0],
406    u16h[0], u16h[1], u16h[2], u16h[3], u16h[4], u16h[5], u16h[6], u16h[7]);
407
408    s2p_bytepack(U16l[7], U16l[6], U16l[5], U16l[4], U16l[3], U16l[2], U16l[1], U16l[0],
409    u16l[0], u16l[1], u16l[2], u16l[3], u16l[4], u16l[5], u16l[6], u16l[7]);
410
411array_u16l__0_ = u16l [0];
412array_u16l__1_ = u16l [1];
413array_u16l__2_ = u16l [2];
414array_u16l__3_ = u16l [3];
415array_u16l__4_ = u16l [4];
416array_u16l__5_ = u16l [5];
417array_u16l__6_ = u16l [6];
418array_u16l__7_ = u16l [7];
419
420array_u16h__0_ = u16h [0];
421array_u16h__1_ = u16h [1];
422array_u16h__2_ = u16h [2];
423array_u16h__3_ = u16h [3];
424array_u16h__4_ = u16h [4];
425array_u16h__5_ = u16h [5];
426array_u16h__6_ = u16h [6];
427array_u16h__7_ = u16h [7];
428
429    Temp1 = simd_or(array_u16h__0_,array_u16h__1_);
430Temp2 = simd_or(array_u16h__2_,array_u16h__3_);
431Temp3 = simd_or(Temp1,Temp2);
432above_0x7FF = simd_or(Temp3,array_u16h__4_);
433Temp4 = simd_or(above_0x7FF,array_u16h__5_);
434Temp5 = simd_or(array_u16h__6_,array_u16h__7_);
435Temp6 = simd_or(Temp4,Temp5);
436above_0x7F = simd_or(Temp6,array_u16l__0_);
437Temp7 = simd_and(array_u16h__0_,array_u16h__1_);
438Temp8 = simd_andc(array_u16h__3_,array_u16h__2_);
439Temp9 = simd_and(Temp7,Temp8);
440strct_u16__surrogate_ = simd_and(Temp9,array_u16h__4_);
441strct_u16__hsurrogate_ = simd_andc(strct_u16__surrogate_,array_u16h__5_);
442strct_u16__lsurrogate_ = simd_and(strct_u16__surrogate_,array_u16h__5_);
443strct_u16__utf8_3_ = simd_andc(above_0x7FF,strct_u16__surrogate_);
444strct_u16__utf8_2_ = simd_andc(above_0x7F,above_0x7FF);
445strct_u16__ASCII_ = simd_andc(AllOne,above_0x7F);
446adc128(strct_u16__hsurrogate_, strct_u16__hsurrogate_, carry0, Temp10);
447strct_u16__error_ = simd_xor(Temp10,strct_u16__lsurrogate_);
448efghi_i = simd_andc(AllOne,array_u16l__1_);
449efghi_h = simd_xor(array_u16l__0_,array_u16l__1_);
450hcarry = simd_andc(array_u16l__0_,efghi_h);
451efghi_g = simd_xor(array_u16h__7_,hcarry);
452gcarry = simd_andc(array_u16h__7_,efghi_g);
453efghi_f = simd_xor(array_u16h__6_,gcarry);
454efghi_e = simd_andc(array_u16h__6_,efghi_f);
455array_u8_last__0_ = simd_andc(AllOne,strct_u16__ASCII_);
456array_u8_last__1_ = simd_and(strct_u16__ASCII_,array_u16l__1_);
457Temp11 = simd_andc(array_u16l__2_,strct_u16__hsurrogate_);
458Temp12 = simd_and(efghi_h,strct_u16__hsurrogate_);
459array_u8_last__2_ = simd_or(Temp11,Temp12);
460Temp13 = simd_andc(array_u16l__3_,strct_u16__hsurrogate_);
461Temp14 = simd_and(efghi_i,strct_u16__hsurrogate_);
462array_u8_last__3_ = simd_or(Temp13,Temp14);
463Temp15 = simd_andc(array_u16l__4_,strct_u16__hsurrogate_);
464Temp16 = simd_and(array_u16l__2_,strct_u16__hsurrogate_);
465array_u8_last__4_ = simd_or(Temp15,Temp16);
466Temp17 = simd_andc(array_u16l__5_,strct_u16__hsurrogate_);
467Temp18 = simd_and(array_u16l__3_,strct_u16__hsurrogate_);
468array_u8_last__5_ = simd_or(Temp17,Temp18);
469Temp19 = simd_andc(array_u16l__6_,strct_u16__hsurrogate_);
470Temp20 = simd_and(array_u16l__4_,strct_u16__hsurrogate_);
471array_u8_last__6_ = simd_or(Temp19,Temp20);
472Temp21 = simd_andc(array_u16l__7_,strct_u16__hsurrogate_);
473Temp22 = simd_and(array_u16l__5_,strct_u16__hsurrogate_);
474array_u8_last__7_ = simd_or(Temp21,Temp22);
475u8_2or3 = simd_or(strct_u16__utf8_2_,strct_u16__utf8_3_);
476array_u8_butlast__0_ = simd_andc(AllOne,strct_u16__ASCII_);
477array_u8_butlast__1_ = simd_or(strct_u16__utf8_2_,strct_u16__hsurrogate_);
478Temp23 = simd_and(strct_u16__utf8_3_,array_u16h__4_);
479Temp24 = simd_or(strct_u16__hsurrogate_,Temp23);
480adc128(array_u16l__6_, array_u16l__6_, carry1, Temp25);
481Temp26 = simd_and(strct_u16__lsurrogate_,Temp25);
482array_u8_butlast__2_ = simd_or(Temp24,Temp26);
483Temp27 = simd_and(u8_2or3,array_u16h__5_);
484Temp28 = simd_or(strct_u16__hsurrogate_,Temp27);
485adc128(array_u16l__7_, array_u16l__7_, carry2, Temp29);
486Temp30 = simd_and(strct_u16__lsurrogate_,Temp29);
487array_u8_butlast__3_ = simd_or(Temp28,Temp30);
488Temp31 = simd_or(strct_u16__hsurrogate_,strct_u16__ASCII_);
489array_u8_butlast__4_ = simd_andc(array_u16h__6_,Temp31);
490Temp32 = simd_andc(array_u16h__7_,Temp31);
491Temp33 = simd_and(efghi_e,strct_u16__hsurrogate_);
492array_u8_butlast__5_ = simd_or(Temp32,Temp33);
493Temp34 = simd_andc(array_u16l__0_,Temp31);
494Temp35 = simd_and(efghi_f,strct_u16__hsurrogate_);
495array_u8_butlast__6_ = simd_or(Temp34,Temp35);
496Temp36 = simd_andc(array_u16l__1_,Temp31);
497Temp37 = simd_and(efghi_g,strct_u16__hsurrogate_);
498array_u8_butlast__7_ = simd_or(Temp36,Temp37);
499array_u8_pre__0_ = simd_andc(above_0x7FF,strct_u16__surrogate_);
500array_u8_pre__1_ = simd_andc(above_0x7FF,strct_u16__surrogate_);
501array_u8_pre__2_ = simd_andc(above_0x7FF,strct_u16__surrogate_);
502array_u8_pre__3_ = AllZero;
503array_u8_pre__4_ = simd_and(strct_u16__utf8_3_,array_u16h__0_);
504array_u8_pre__5_ = simd_and(strct_u16__utf8_3_,array_u16h__1_);
505array_u8_pre__6_ = simd_and(strct_u16__utf8_3_,array_u16h__2_);
506array_u8_pre__7_ = simd_and(strct_u16__utf8_3_,array_u16h__3_);
507
508
509/* Convert flattened array values to arrays */
510SIMD_type u8_pre [8];
511u8_pre[0] = array_u8_pre__0_;
512u8_pre[1] = array_u8_pre__1_;
513u8_pre[2] = array_u8_pre__2_;
514u8_pre[3] = array_u8_pre__3_;
515u8_pre[4] = array_u8_pre__4_;
516u8_pre[5] = array_u8_pre__5_;
517u8_pre[6] = array_u8_pre__6_;
518u8_pre[7] = array_u8_pre__7_;
519
520SIMD_type u8_butlast [8];
521u8_butlast[0] = array_u8_butlast__0_;
522u8_butlast[1] = array_u8_butlast__1_;
523u8_butlast[2] = array_u8_butlast__2_;
524u8_butlast[3] = array_u8_butlast__3_;
525u8_butlast[4] = array_u8_butlast__4_;
526u8_butlast[5] = array_u8_butlast__5_;
527u8_butlast[6] = array_u8_butlast__6_;
528u8_butlast[7] = array_u8_butlast__7_;
529
530SIMD_type u8_last [8];
531u8_last[0] = array_u8_last__0_;
532u8_last[1] = array_u8_last__1_;
533u8_last[2] = array_u8_last__2_;
534u8_last[3] = array_u8_last__3_;
535u8_last[4] = array_u8_last__4_;
536u8_last[5] = array_u8_last__5_;
537u8_last[6] = array_u8_last__6_;
538u8_last[7] = array_u8_last__7_;
539
540
541    if (bitblock_has_bit(strct_u16__error_)) {
542       
543       err_pos = count_forward_zeroes(strct_u16__error_);
544       error_found = true;
545        if ((err_pos == units_read)) {
546                err_pos--;
547                *err = U_TRUNCATED_CHAR_FOUND;
548                (*srcbuf) += err_pos;
549                if(err_pos<0){
550                        target_bckup_steps = 2;
551                        (*targetbuf) -= target_bckup_steps;
552                        return;
553                }               
554        }
555        else {
556          if((((unsigned char *)U16h)[err_pos]< 0xDC) || (((unsigned char *)U16h)[err_pos] >= 0xE0))
557                err_pos--;
558                *err = U_ILLEGAL_CHAR_FOUND;
559                (*srcbuf) += err_pos;
560                if(err_pos<0){
561                        target_bckup_steps = 2;
562                        (*targetbuf) -= target_bckup_steps;
563                        return;
564                }
565        }
566        endmask = sisd_sll(simd_const_1(1),sisd_from_int(err_pos));
567    }
568
569    for (int i=0; i< 8; i++){
570        interleave4(simd_const_8(0), u8_pre[i], u8_butlast[i], u8_last[i],
571                                doublepair[i*4], doublepair[i*4+1], 
572                                doublepair[i*4+2], doublepair[i*4+3]);
573    }
574
575        interleave4(simd_const_1(1), simd_or(endmask, simd_not(strct_u16__utf8_3_)), 
576                                simd_or(endmask, strct_u16__ASCII_), endmask,
577                                delmask[0], delmask[1], delmask[2], delmask[3]);
578
579
580     for (int i=0; i< 32; i=i+4){
581        bit_del_16(doublepair[i],delmask[0]);
582        bit_del_16(doublepair[i+1],delmask[1]);
583        bit_del_16(doublepair[i+2],delmask[2]);
584        bit_del_16(doublepair[i+3],delmask[3]);
585     }
586
587    short u8_bytes_per_reg[8];
588
589    for (int i=0; i< 4; i++){
590      p2s_bytemerge(doublepair[i],doublepair[4+i],doublepair[8+i],doublepair[12+i],doublepair[16+i],doublepair[20+i],doublepair[24+i],doublepair[28+i],
591                        U8[i*8+7],U8[i*8+6],U8[i*8+5],U8[i*8+4],U8[i*8+3],U8[i*8+2],U8[i*8+1],U8[i*8]);
592       for(int k=0; k<8; k++) u8_bytes_per_reg[k] = 0;
593       del_count(delmask[i],u8_bytes_per_reg);
594 
595       for(int j=0; j<8; j++){
596        sisd_store_unaligned(U8[i*8+j],(SIMD_type *) (*targetbuf));
597        *targetbuf += u8_bytes_per_reg[j];
598       }
599    }
600
601    if(!error_found) (*srcbuf) += units_read;
602
603    } while ((units_read == BLOCK_SIZE) && !error_found);
604
605}
606
607
608
609
610int
611main(int argc, char * argv[]) {
612        char * infilename, * outfilename;       
613        FILE *infile, *outfile;
614        struct stat fileinfo;
615        const UChar * srcbuf;
616        const UChar * srclimit;
617        const char * targetlimit;
618        char * targetbuf;
619        char * target_head;
620        UErrorCode status;
621        int chars_read;
622
623        if (argc < 2) {
624                printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
625                exit(-1);
626        }
627
628        infilename = argv[1];
629        stat(infilename, &fileinfo);
630        infile = fopen(infilename, "rb");
631        if (!infile) {
632                fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
633                exit(-1);
634        }
635       
636        if (argc < 3) outfile = stdout;
637        else {
638                outfilename = argv[2];
639                outfile = fopen(outfilename, "wb");
640                if (!outfile) {
641                        fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
642                        exit(-1);
643                }
644        }
645
646
647        srcbuf = (UChar *) malloc(fileinfo.st_size+1);
648        if (!srcbuf) {
649              fprintf(stderr, "Error: buffer for %s of size %zu cannot be created.\n", infilename, fileinfo.st_size+1);
650              exit(-1);
651        }
652
653        chars_read = fread((void *)srcbuf, 1, fileinfo.st_size, infile);
654
655        srclimit = srcbuf + chars_read/2;
656//      (*srcbuf)[fileinfo.st_size] = '\0';
657        fclose(infile);
658
659        targetbuf = (char *) malloc(chars_read*2);
660        targetlimit = targetbuf + chars_read*2;
661        target_head = targetbuf;
662
663        u16u8(&targetbuf, targetlimit, &srcbuf, srclimit, &status);
664        fwrite(target_head , 1 ,  targetbuf - target_head, outfile );
665
666        if (status == U_ILLEGAL_CHAR_FOUND) {
667                fprintf(stderr, "Illegal UTF-16 sequence at position %lu in source.\n", chars_read-(srclimit-srcbuf)*2);
668        }
669        else if (status == U_TRUNCATED_CHAR_FOUND) {
670                fprintf(stderr, "EOF with incomplete UTF-16 sequence at position %lu in source.\n",chars_read-(srclimit-srcbuf)*2);
671        }
672
673//      fclose(infile);
674//      fclose(outfile);
675
676
677
678        return(0);
679}
Note: See TracBrowser for help on using the repository browser.