source: GPU/xmlwf.cl @ 1669

Last change on this file since 1669 was 1669, checked in by lindanl, 8 years ago

Parabix on GPU : Add u8, scope and ctcdpi. Add scan functions.

File size: 26.3 KB
Line 
1
2#define CARRY_BIT_MASK 0x8000000000000000
3#define WORK_GROUP_SIZE 64
4#define ELEMENTS 1024*128
5#define MAX_CARRY 10
6#define EOF_mask 0xFFFFFFFFFFFFFFFF
7
8/******************Library Functions 64-bit********************/
9#define BitBlock unsigned long long
10#define simd_or(b1, b2) (b1 | b2)
11#define simd_and(b1, b2) (b1 & b2)
12#define simd_xor(b1, b2) (b1 ^ b2)
13#define simd_andc(b1, b2) (b1 & (~b2))
14#define simd_not(b) (~b)
15#define simd_nor(a,b) (simd_not(simd_or(a,b)))
16
17#define simd_himask_2 0xAAAAAAAAAAAAAAAA
18#define simd_himask_4 0xCCCCCCCCCCCCCCCC
19#define simd_himask_8 0xF0F0F0F0F0F0F0F0
20/* Little-endian */
21#define simd_himask_16 0xFF00FF00FF00FF00
22#define simd_himask_32 0xFFFF0000FFFF0000
23
24#define bitblock_has_bit(v) (v!=0)
25
26#define simd_add_32(a,b) (a+b)
27#define simd_sub_32(a,b) (a-b)
28
29#define sisd_slli(r, shft) (r<<shft)
30#define sisd_srli(r, shft) (r>>shft)
31
32#define sisd_from_int(n) n
33
34#define simd_const_1(x)\
35  ((x)==0 ? 0 : 0xFFFFFFFFFFFFFFFF)
36
37#define simd_if(cond, then_val, else_val) \
38  simd_or(simd_and(then_val, cond), simd_andc(else_val, cond))
39
40#define simd_pack_16_hh(b, a)\
41(a & 0xFF00000000000000)|((a & 0x0000FF0000000000)<<8)|((a & 0X00000000FF000000)<<16)|((a & 0X000000000000FF00)<<24)|\
42((b & 0xFF00000000000000)>>32)|((b & 0x0000FF0000000000)>>24)|((b & 0X00000000FF000000)>>16)|((b & 0X000000000000FF00)>>8)
43
44#define simd_pack_16_ll(b, a)\
45((a & 0x00FF000000000000)<<8)|((a & 0x000000FF00000000)<<16)|((a & 0X0000000000FF0000)<<24)|((a & 0X00000000000000FF)<<32)|\
46((b & 0x00FF000000000000)>>24)|((b & 0x000000FF00000000)>>16)|((b & 0X0000000000FF0000)>>8)|(b & 0X00000000000000FF)
47
48#define s2p_step(s0, s1, hi_mask, shift, p0, p1)  \
49  do {\
50        BitBlock t0,t1;\
51        t0 = simd_pack_16_hh(s0, s1);\
52        t1 = simd_pack_16_ll(s0, s1);\
53        p0 = simd_if(hi_mask, t0, (t1 >> shift));\
54        p1 = simd_if(hi_mask, (t0 << shift), t1);\
55  } while(0)
56
57#define s2p_bytepack(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7) \
58  do {\
59        BitBlock bit00224466_0,bit00224466_1,bit00224466_2,bit00224466_3;\
60        BitBlock bit11335577_0,bit11335577_1,bit11335577_2,bit11335577_3;\
61        BitBlock bit00004444_0,bit22226666_0,bit00004444_1,bit22226666_1;\
62        BitBlock bit11115555_0,bit33337777_0,bit11115555_1,bit33337777_1;\
63        s2p_step(s0,s1,simd_himask_2,1,bit00224466_0,bit11335577_0);\
64        s2p_step(s2,s3,simd_himask_2,1,bit00224466_1,bit11335577_1);\
65        s2p_step(s4,s5,simd_himask_2,1,bit00224466_2,bit11335577_2);\
66        s2p_step(s6,s7,simd_himask_2,1,bit00224466_3,bit11335577_3);\
67        s2p_step(bit00224466_0,bit00224466_1,simd_himask_4,2,bit00004444_0,bit22226666_0);\
68        s2p_step(bit00224466_2,bit00224466_3,simd_himask_4,2,bit00004444_1,bit22226666_1);\
69        s2p_step(bit11335577_0,bit11335577_1,simd_himask_4,2,bit11115555_0,bit33337777_0);\
70        s2p_step(bit11335577_2,bit11335577_3,simd_himask_4,2,bit11115555_1,bit33337777_1);\
71        s2p_step(bit00004444_0,bit00004444_1,simd_himask_8,4,p0,p4);\
72        s2p_step(bit11115555_0,bit11115555_1,simd_himask_8,4,p1,p5);\
73        s2p_step(bit22226666_0,bit22226666_1,simd_himask_8,4,p2,p6);\
74        s2p_step(bit33337777_0,bit33337777_1,simd_himask_8,4,p3,p7);\
75  } while(0)
76
77
78inline BitBlock adc(int idx, BitBlock a, BitBlock b, __local BitBlock *carry, __local BitBlock *bubble, BitBlock *group_carry, const int carryno){
79        BitBlock carry_mask;
80        BitBlock bubble_mask;
81
82        BitBlock partial_sum = a+b;
83        BitBlock gen = a&b;
84        BitBlock prop = a^b;
85        carry[idx] = ((gen | (prop & ~partial_sum))&CARRY_BIT_MASK)>>(WORK_GROUP_SIZE-1-idx);
86        bubble[idx] = (partial_sum + 1)? 0:(((long long)1)<<idx);
87       
88        barrier(CLK_LOCAL_MEM_FENCE);
89        for(int offset=WORK_GROUP_SIZE/2; offset>0; offset=offset>>1){
90                carry[idx] = carry[idx]|carry[idx^offset];
91                bubble[idx] = bubble[idx]|bubble[idx^offset];
92                barrier(CLK_LOCAL_MEM_FENCE);
93        }
94       
95        carry_mask = (carry[0]<<1)|group_carry[carryno];
96        bubble_mask = bubble[0];
97       
98        BitBlock s = (carry_mask + bubble_mask) & ~bubble_mask;
99        BitBlock inc = s | (s-carry_mask);
100        BitBlock rslt = partial_sum + ((inc>>idx)&0x1);
101        group_carry[carryno] = (carry[0]|(bubble_mask & inc))>>63;
102        return rslt;
103}
104
105inline BitBlock sbb(int idx, BitBlock a, BitBlock b, __local BitBlock *carry, __local BitBlock *bubble, BitBlock *group_carry, const int carryno){
106        BitBlock carry_mask;
107        BitBlock bubble_mask;
108
109        BitBlock partial_diff = a-b;
110        BitBlock gen = b & ~a;
111        BitBlock prop = ~(a^b);
112        carry[idx] = ((gen | (prop & partial_diff))&CARRY_BIT_MASK)>>(63-idx);
113        bubble[idx] = (partial_diff)? 0:(((long long)1)<<idx);
114       
115        barrier(CLK_LOCAL_MEM_FENCE);
116        for(int offset=WORK_GROUP_SIZE/2; offset>0; offset=offset>>1){
117                carry[idx] = carry[idx]|carry[idx^offset];
118                bubble[idx] = bubble[idx]|bubble[idx^offset];
119                barrier(CLK_LOCAL_MEM_FENCE);
120        }
121       
122        carry_mask = (carry[0]<<1)|group_carry[carryno];
123        bubble_mask = bubble[0];
124       
125        BitBlock s = (carry_mask + bubble_mask) & ~bubble_mask;
126        BitBlock dec = s | (s-carry_mask);
127        BitBlock rslt = partial_diff - ((dec>>idx)&0x1);
128        group_carry[carryno] = (carry[0]|(bubble_mask & dec))>>63;
129        return rslt;
130}
131
132inline BitBlock advance(int idx, BitBlock a, __local BitBlock *carry, BitBlock *group_carry, const int carryno){
133        carry[0] = group_carry[carryno];
134        BitBlock c = a<<1;
135        carry[idx+1] = (a & CARRY_BIT_MASK)>>63;
136        barrier(CLK_LOCAL_MEM_FENCE);
137        group_carry[carryno] = carry[WORK_GROUP_SIZE];
138        c = c | carry[idx];
139        return c;
140}
141
142static inline BitBlock scanthru(int idx, BitBlock markers, BitBlock charclass, __local BitBlock *carry, __local BitBlock *bubble, BitBlock *group_carry, const int carryno) {
143        return simd_andc(adc(idx, markers, charclass, carry, bubble, group_carry, carryno), charclass);
144}
145
146static inline BitBlock scanto(int idx, BitBlock markers, BitBlock charclass, __local BitBlock *carry, __local BitBlock *bubble, BitBlock *group_carry, const int carryno) {
147        return simd_and(adc(idx, markers, simd_not(charclass), carry, bubble, group_carry, carryno), charclass);
148}
149
150static inline BitBlock scantofirst(int idx, BitBlock charclass, __local BitBlock *carry, __local BitBlock *bubble, BitBlock *group_carry, const int carryno) {
151        BitBlock marker;
152        group_carry[carryno] = simd_xor(group_carry[carryno], 1);
153    marker = adc(idx, 0, simd_not(charclass), carry, bubble, group_carry, carryno);
154    group_carry[carryno] = simd_xor(group_carry[carryno], 1);
155    return simd_and(marker, charclass);
156}
157
158inline void CarryInit(BitBlock * carry, int count){
159    for (int j=0; j < count; j++)
160         carry[j] = 0;
161}
162
163static inline int CarryTest(BitBlock * cq, const int carryno, const int carry_count) {
164  BitBlock c1 = cq[carryno];
165  for (int i = carryno + 1; i < carryno + carry_count; i++) {
166    c1 = c1 | cq[i];
167  }
168  return bitblock_has_bit(c1);
169}
170
171static inline void CarryCombine(BitBlock * cq, BitBlock * local_cq, const int carryno, const int carry_count) {
172  for (int i = 0; i < carry_count; i++) {
173    cq[carryno+i] = simd_or(cq[carryno+i], local_cq[i]);
174  }
175}
176/**************Parabix Structs***************/
177  struct Basis_bits {
178  BitBlock bit_0;
179  BitBlock bit_1;
180  BitBlock bit_2;
181  BitBlock bit_3;
182  BitBlock bit_4;
183  BitBlock bit_5;
184  BitBlock bit_6;
185  BitBlock bit_7;
186};
187
188  struct Lex {
189  BitBlock CR;
190  BitBlock LF;
191  BitBlock HT;
192  BitBlock SP;
193  BitBlock CRLF;
194  BitBlock RefStart;
195  BitBlock Semicolon;
196  BitBlock Colon;
197  BitBlock LAngle;
198  BitBlock RAngle;
199  BitBlock LBracket;
200  BitBlock RBracket;
201  BitBlock Exclam;
202  BitBlock QMark;
203  BitBlock Hyphen;
204  BitBlock Equals;
205  BitBlock SQuote;
206  BitBlock DQuote;
207  BitBlock Slash;
208  BitBlock Hash;
209  BitBlock x;
210  BitBlock ASCII_name_start;
211  BitBlock ASCII_name_char;
212  BitBlock NameScan;
213  BitBlock Digit;
214  BitBlock Hex;
215  BitBlock WS;
216  BitBlock error;
217};
218
219  struct U8 {
220  BitBlock suffix;
221  BitBlock FFFE_FFFF;
222  BitBlock error;
223};
224
225  struct Scope1 {
226  BitBlock RefStart;
227  BitBlock LAngle;
228  BitBlock Hyphen;
229  BitBlock QMark;
230  BitBlock RBracket;
231};
232
233  struct CtCDPI_Callouts {
234  BitBlock CD_end;
235  BitBlock Ct_starts;
236  BitBlock Ct_ends;
237  BitBlock CD_starts;
238  BitBlock CD_ends;
239  BitBlock PI_starts;
240  BitBlock PI_name_starts;
241  BitBlock PI_name_ends;
242  BitBlock PI_ends;
243  BitBlock CtCDPI_mask;
244  BitBlock error;
245};
246
247/**************Parabix Functions***************/
248#define classify_bytes_Validate_utf8(idx, basis_bits, lex, u8, carry, group_carry)\
249 do {\
250                BitBlock temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;\
251                BitBlock temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;\
252                BitBlock temp19, temp20, temp21, temp22, temp23, temp24, temp25, temp26;\
253                BitBlock temp27, temp28, temp29, temp30, temp31, temp32, temp33, temp34;\
254                BitBlock temp35, temp36, temp37, temp38, temp39, temp40, temp41, temp42;\
255                BitBlock temp43, temp44, temp45, temp46, temp47, temp48, x00_x1F, temp49;\
256                BitBlock temp50, temp51, temp52, temp53, temp54, temp55, temp56, temp57;\
257                BitBlock temp58, temp59, temp60, temp61, temp62, temp63, temp64, temp65;\
258                BitBlock u8anyscope, temp66, temp67, temp68, temp69, temp70, temp71;\
259                BitBlock unibyte, prefix, prefix2, prefix3, prefix4, badprefix;\
260                BitBlock xE0, xED, xF0, xF4, xA0_xBF, x80_x9F, x90_xBF, x80_x8F, xEF, xBF, xBE;\
261                BitBlock scope22, scope32, scope33, scope42, scope43, scope44, xE0_scope, xED_scope, xF0_scope, xF4_scope, xEF_scope;\
262                BitBlock temp72, temp73, u8lastscope, u8error1, u8error2;\
263                BitBlock u8error3, u8error4, EF_BF_pending, u8mismatch;\
264\
265        temp1 = simd_or(basis_bits.bit_0, basis_bits.bit_1);\
266        temp2 = simd_andc(basis_bits.bit_2, basis_bits.bit_3);\
267        temp3 = simd_andc(temp2, temp1);\
268        temp4 = simd_andc(basis_bits.bit_5, basis_bits.bit_4);\
269        temp5 = simd_andc(basis_bits.bit_6, basis_bits.bit_7);\
270        temp6 = simd_and(temp4, temp5);\
271        lex.RefStart = simd_and(temp3, temp6);\
272        temp7 = simd_and(basis_bits.bit_2, basis_bits.bit_3);\
273        temp8 = simd_andc(temp7, temp1);\
274        temp9 = simd_andc(basis_bits.bit_4, basis_bits.bit_5);\
275        temp10 = simd_and(basis_bits.bit_6, basis_bits.bit_7);\
276        temp11 = simd_and(temp9, temp10);\
277        lex.Semicolon = simd_and(temp8, temp11);\
278        temp12 = simd_and(basis_bits.bit_4, basis_bits.bit_5);\
279        temp13 = simd_or(basis_bits.bit_6, basis_bits.bit_7);\
280        temp14 = simd_andc(temp12, temp13);\
281        lex.LAngle = simd_and(temp8, temp14);\
282        temp15 = simd_and(temp12, temp5);\
283        lex.RAngle = simd_and(temp8, temp15);\
284        temp16 = simd_andc(basis_bits.bit_1, basis_bits.bit_0);\
285        temp17 = simd_andc(basis_bits.bit_3, basis_bits.bit_2);\
286        temp18 = simd_and(temp16, temp17);\
287        lex.LBracket = simd_and(temp18, temp11);\
288        temp19 = simd_andc(basis_bits.bit_7, basis_bits.bit_6);\
289        temp20 = simd_and(temp12, temp19);\
290        lex.RBracket = simd_and(temp18, temp20);\
291        temp21 = simd_or(basis_bits.bit_4, basis_bits.bit_5);\
292        temp22 = simd_andc(temp19, temp21);\
293        lex.Exclam = simd_and(temp3, temp22);\
294        temp23 = simd_and(temp12, temp10);\
295        lex.QMark = simd_and(temp8, temp23);\
296        lex.Hyphen = simd_and(temp3, temp20);\
297        lex.Equals = simd_and(temp8, temp20);\
298        temp24 = simd_and(temp4, temp10);\
299        lex.SQuote = simd_and(temp3, temp24);\
300        temp25 = simd_andc(temp5, temp21);\
301        lex.DQuote = simd_and(temp3, temp25);\
302        lex.Slash = simd_and(temp3, temp23);\
303        temp26 = simd_andc(temp10, temp21);\
304        lex.Hash = simd_and(temp3, temp26);\
305        temp27 = simd_and(temp16, temp7);\
306        temp28 = simd_andc(temp9, temp13);\
307        lex.x = simd_and(temp27, temp28);\
308        temp29 = simd_and(temp9, temp5);\
309        lex.Colon = simd_and(temp8, temp29);\
310        temp30 = simd_and(temp18, temp23);\
311        temp31 = simd_or(temp30, lex.Colon);\
312        temp32 = simd_andc(temp16, basis_bits.bit_2);\
313        temp33 = simd_or(basis_bits.bit_5, temp10);\
314        temp34 = simd_and(basis_bits.bit_4, temp33);\
315        temp35 = simd_not(temp34);\
316        temp36 = simd_or(temp21, temp13);\
317        temp37 = simd_or(simd_and(basis_bits.bit_3, temp35), simd_andc(temp36, basis_bits.bit_3));\
318        temp38 = simd_and(temp32, temp37);\
319        temp39 = simd_or(temp31, temp38);\
320        temp40 = simd_and(temp16, basis_bits.bit_2);\
321        temp41 = simd_and(temp40, temp37);\
322        lex.ASCII_name_start = simd_or(temp39, temp41);\
323        temp42 = simd_or(temp30, lex.Hyphen);\
324        temp43 = simd_and(temp3, temp15);\
325        temp44 = simd_or(temp42, temp43);\
326        temp45 = simd_andc(temp8, temp34);\
327        temp46 = simd_or(temp44, temp45);\
328        temp47 = simd_or(temp46, temp38);\
329        lex.ASCII_name_char = simd_or(temp47, temp41);\
330        lex.NameScan = simd_or(lex.ASCII_name_char, basis_bits.bit_0);\
331        temp48 = simd_or(temp1, basis_bits.bit_2);\
332        x00_x1F = simd_not(temp48);\
333        temp49 = simd_or(basis_bits.bit_2, basis_bits.bit_3);\
334        temp50 = simd_or(temp1, temp49);\
335        lex.CR = simd_andc(temp20, temp50);\
336        lex.LF = simd_andc(temp29, temp50);\
337        temp51 = simd_and(temp9, temp19);\
338        lex.HT = simd_andc(temp51, temp50);\
339        lex.SP = simd_andc(temp3, temp36);\
340        temp52 = simd_or(temp20, temp29);\
341        temp53 = simd_or(temp52, temp51);\
342        temp54 = simd_andc(temp53, temp50);\
343        lex.WS = simd_or(temp54, lex.SP);\
344        temp55 = simd_or(basis_bits.bit_5, basis_bits.bit_6);\
345        temp56 = simd_and(basis_bits.bit_4, temp55);\
346        lex.Digit = simd_andc(temp8, temp56);\
347        temp57 = simd_andc(temp16, temp49);\
348        temp58 = simd_andc(temp57, basis_bits.bit_4);\
349        temp59 = simd_not(temp10);\
350        temp60 = simd_or(simd_and(basis_bits.bit_5, temp59), simd_andc(temp13, basis_bits.bit_5));\
351        temp61 = simd_and(temp58, temp60);\
352        temp62 = simd_or(lex.Digit, temp61);\
353        temp63 = simd_and(temp16, temp2);\
354        temp64 = simd_andc(temp63, basis_bits.bit_4);\
355        temp65 = simd_and(temp64, temp60);\
356        lex.Hex = simd_or(temp62, temp65);\
357        lex.error = simd_andc(x00_x1F, lex.WS);\
358                unibyte = simd_not(basis_bits.bit_0);\
359        u8.suffix = simd_const_1(0);\
360        u8.error = simd_const_1(0);\
361        u8.FFFE_FFFF = simd_const_1(0);\
362        u8anyscope = simd_const_1(0);\
363        if ((bitblock_has_bit(basis_bits.bit_0) || CarryTest(group_carry, 0, 11))) {\
364          prefix = simd_and(basis_bits.bit_0, basis_bits.bit_1);\
365          prefix2 = simd_andc(prefix, basis_bits.bit_2);\
366          prefix3 = simd_and(prefix, temp2);\
367          prefix4 = simd_and(prefix, temp7);\
368          u8.suffix = simd_andc(basis_bits.bit_0, basis_bits.bit_1);\
369          temp66 = simd_andc(prefix, temp49);\
370          temp67 = simd_or(temp21, basis_bits.bit_6);\
371          temp68 = simd_andc(temp66, temp67);\
372          temp69 = simd_and(basis_bits.bit_5, temp13);\
373          temp70 = simd_or(basis_bits.bit_4, temp69);\
374          temp71 = simd_and(prefix4, temp70);\
375          badprefix = simd_or(temp68, temp71);\
376          u8.error = badprefix;\
377          scope22 = advance(idx, prefix2, carry, group_carry, 0);\
378          u8anyscope = scope22;\
379          if (bitblock_has_bit(simd_or(prefix3, prefix4))) {\
380            xE0 = simd_andc(prefix3, temp36);\
381            xED = simd_and(prefix3, temp20);\
382            xF0 = simd_andc(prefix4, temp36);\
383            temp72 = simd_andc(temp4, temp13);\
384            xF4 = simd_and(prefix4, temp72);\
385            xA0_xBF = simd_and(u8.suffix, basis_bits.bit_2);\
386            x80_x9F = simd_andc(u8.suffix, basis_bits.bit_2);\
387            x90_xBF = simd_and(u8.suffix, temp49);\
388            x80_x8F = simd_andc(u8.suffix, temp49);\
389            xEF = simd_and(prefix3, temp23);\
390            temp73 = simd_and(u8.suffix, temp7);\
391            xBF = simd_and(temp73, temp23);\
392            xBE = simd_and(temp73, temp15);\
393            xE0_scope = advance(idx, xE0, carry, group_carry, 1);\
394            xED_scope = advance(idx, xED, carry, group_carry, 2);\
395            xF0_scope = advance(idx, xF0, carry, group_carry, 3);\
396            xF4_scope = advance(idx, xF4, carry, group_carry, 4);\
397            xEF_scope = advance(idx, xEF, carry, group_carry, 5);\
398            scope32 = advance(idx, prefix3, carry, group_carry, 6);\
399            scope33 = advance(idx, scope32, carry, group_carry, 7);\
400            scope42 = advance(idx, prefix4, carry, group_carry, 8);\
401            scope43 = advance(idx, scope42, carry, group_carry, 9);\
402            scope44 = advance(idx, scope43, carry, group_carry, 10);\
403            u8lastscope = simd_or(simd_or(scope22, scope33), scope44);\
404            u8anyscope = simd_or(simd_or(simd_or(u8lastscope, scope32), scope42), scope43);\
405            u8error1 = simd_and(xE0_scope, x80_x9F);\
406            u8error2 = simd_and(xED_scope, xA0_xBF);\
407            u8error3 = simd_and(xF0_scope, x80_x8F);\
408            u8error4 = simd_and(xF4_scope, x90_xBF);\
409            u8.error = simd_or(u8.error, simd_or(simd_or(simd_or(u8error1, u8error2), u8error3), u8error4));\
410            EF_BF_pending = advance(idx, simd_and(xEF_scope, xBF), carry, group_carry, 11);\
411            u8.FFFE_FFFF = simd_and(EF_BF_pending, simd_or(xBE, xBF));\
412          }\
413        }\
414        u8mismatch = simd_xor(u8anyscope, u8.suffix);\
415        u8.error = simd_or(u8.error, u8mismatch);\
416  } while (0)
417
418 #define add_scope_streams(idx, lex, scope1, carry, group_carry)\
419 do {\
420                BitBlock v, w, v1, w1;\
421\
422        v = simd_or(lex.LAngle, lex.Hyphen);\
423        w = simd_or(lex.Hyphen, lex.QMark);\
424        v1 = advance(idx, v, carry, group_carry, 0);\
425        w1 = advance(idx, w, carry, group_carry, 1);\
426        scope1.LAngle = simd_andc(v1, w1);\
427        scope1.Hyphen = simd_and(v1, w1);\
428        scope1.QMark = simd_andc(w1, v1);\
429  } while (0)
430
431 #define parse_CtCDPI(idx, ctCDPI_Callouts, lex, scope1, carry, bubble, group_carry)\
432 do {\
433                BitBlock CtCDPI_starts, Ct_errors, PI_start, CtCD_start, CtCDPI_start;\
434                BitBlock DoubleHyphen, PI_end, CtCDPI_Cursor, PI_Cursor, CD_Ct_Cursor;\
435                BitBlock CD_Cursor, Ct_Cursor, PI_name_end;\
436\
437        ctCDPI_Callouts.CD_end = simd_const_1(0);\
438        ctCDPI_Callouts.Ct_starts = simd_const_1(0);\
439        ctCDPI_Callouts.Ct_ends = simd_const_1(0);\
440        ctCDPI_Callouts.CD_starts = simd_const_1(0);\
441        ctCDPI_Callouts.CD_ends = simd_const_1(0);\
442        ctCDPI_Callouts.PI_starts = simd_const_1(0);\
443        ctCDPI_Callouts.PI_name_starts = simd_const_1(0);\
444        ctCDPI_Callouts.PI_name_ends = simd_const_1(0);\
445        ctCDPI_Callouts.PI_ends = simd_const_1(0);\
446        ctCDPI_Callouts.CtCDPI_mask = simd_const_1(0);\
447        ctCDPI_Callouts.error = simd_const_1(0);\
448        CtCDPI_starts = simd_const_1(0);\
449        Ct_errors = simd_const_1(0);\
450        if ((bitblock_has_bit(lex.RBracket) || CarryTest(group_carry, 0, 2))) {\
451          scope1.RBracket = advance(idx, lex.RBracket, carry, group_carry, 0);\
452          ctCDPI_Callouts.CD_end = simd_and(advance(idx, simd_and(scope1.RBracket, lex.RBracket), carry, group_carry, 1), lex.RAngle);\
453        }\
454        PI_start = simd_and(scope1.LAngle, lex.QMark);\
455        CtCD_start = simd_and(scope1.LAngle, lex.Exclam);\
456        CtCDPI_start = simd_or(PI_start, CtCD_start);\
457        DoubleHyphen = simd_and(scope1.Hyphen, lex.Hyphen);\
458        PI_end = simd_and(scope1.QMark, lex.RAngle);\
459        CtCDPI_Cursor = scantofirst(idx, CtCDPI_start, carry, bubble, group_carry, 2);\
460        if ((bitblock_has_bit(CtCDPI_Cursor) || CarryTest(group_carry, 3, 14))) {\
461          CtCDPI_starts = simd_or(CtCDPI_starts, CtCDPI_Cursor);\
462          PI_Cursor = simd_and(CtCDPI_Cursor, PI_start);\
463          CD_Ct_Cursor = advance(idx, simd_andc(CtCDPI_Cursor, PI_Cursor), carry, group_carry, 3);\
464          CD_Cursor = simd_and(CD_Ct_Cursor, lex.LBracket);\
465          Ct_Cursor = simd_and(CD_Ct_Cursor, lex.Hyphen);\
466          ctCDPI_Callouts.PI_starts = simd_or(ctCDPI_Callouts.PI_starts, PI_Cursor);\
467          ctCDPI_Callouts.CD_starts = simd_or(ctCDPI_Callouts.CD_starts, CD_Cursor);\
468          ctCDPI_Callouts.Ct_starts = simd_or(ctCDPI_Callouts.Ct_starts, Ct_Cursor);\
469          Ct_Cursor = advance(idx, Ct_Cursor, carry, group_carry, 4);\
470          Ct_errors = simd_or(Ct_errors, simd_andc(Ct_Cursor, lex.Hyphen));\
471          Ct_Cursor = advance(idx, advance(idx, Ct_Cursor, carry, group_carry, 5), carry, group_carry, 6);\
472          PI_Cursor = advance(idx, PI_Cursor, carry, group_carry, 7);\
473          ctCDPI_Callouts.PI_name_starts = simd_or(ctCDPI_Callouts.PI_name_starts, PI_Cursor);\
474          PI_name_end = scanthru(idx, PI_Cursor, lex.NameScan, carry, bubble, group_carry, 8);\
475          ctCDPI_Callouts.PI_name_ends = simd_or(ctCDPI_Callouts.PI_name_ends, PI_name_end);\
476          PI_Cursor = scanto(idx, PI_name_end, PI_end, carry, bubble, group_carry, 9);\
477          CD_Cursor = scanto(idx, CD_Cursor, ctCDPI_Callouts.CD_end, carry, bubble, group_carry, 10);\
478          Ct_Cursor = advance(idx, scanto(idx, Ct_Cursor, DoubleHyphen, carry, bubble, group_carry, 11), carry, group_carry, 12);\
479          ctCDPI_Callouts.PI_ends = simd_or(ctCDPI_Callouts.PI_ends, PI_Cursor);\
480          ctCDPI_Callouts.CD_ends = simd_or(ctCDPI_Callouts.CD_ends, CD_Cursor);\
481          ctCDPI_Callouts.Ct_ends = simd_or(ctCDPI_Callouts.Ct_ends, Ct_Cursor);\
482          CtCDPI_Cursor = simd_or(simd_or(PI_Cursor, CD_Cursor), Ct_Cursor);\
483          CtCDPI_Cursor = scanto(idx, CtCDPI_Cursor, CtCDPI_start, carry, bubble, group_carry, 13);\
484          ctCDPI_Callouts.CtCDPI_mask = sbb(idx, advance(idx, simd_or(simd_or(ctCDPI_Callouts.CD_ends, ctCDPI_Callouts.Ct_ends), ctCDPI_Callouts.PI_ends), carry, group_carry, 14), CtCDPI_starts, carry, bubble, group_carry, 15);\
485          ctCDPI_Callouts.error = simd_or(Ct_errors, simd_andc(ctCDPI_Callouts.Ct_ends, lex.RAngle));\
486          ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_andc(advance(idx, simd_andc(ctCDPI_Callouts.PI_name_ends, lex.WS), carry, group_carry, 16), PI_end));\
487          ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_and(ctCDPI_Callouts.PI_name_starts, ctCDPI_Callouts.PI_name_ends));\
488          ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_andc(ctCDPI_Callouts.CtCDPI_mask, EOF_mask));\
489          while (bitblock_has_bit(CtCDPI_Cursor)) {\
490            BitBlock subcarryQ[14];\
491            CarryInit(subcarryQ, 14);\
492            CtCDPI_starts = simd_or(CtCDPI_starts, CtCDPI_Cursor);\
493            PI_Cursor = simd_and(CtCDPI_Cursor, PI_start);\
494            CD_Ct_Cursor = advance(idx, simd_andc(CtCDPI_Cursor, PI_Cursor), carry, subcarryQ, 0);\
495            CD_Cursor = simd_and(CD_Ct_Cursor, lex.LBracket);\
496            Ct_Cursor = simd_and(CD_Ct_Cursor, lex.Hyphen);\
497            ctCDPI_Callouts.PI_starts = simd_or(ctCDPI_Callouts.PI_starts, PI_Cursor);\
498            ctCDPI_Callouts.CD_starts = simd_or(ctCDPI_Callouts.CD_starts, CD_Cursor);\
499            ctCDPI_Callouts.Ct_starts = simd_or(ctCDPI_Callouts.Ct_starts, Ct_Cursor);\
500            Ct_Cursor = advance(idx, Ct_Cursor, carry, subcarryQ, 1);\
501            Ct_errors = simd_or(Ct_errors, simd_andc(Ct_Cursor, lex.Hyphen));\
502            Ct_Cursor = advance(idx, advance(idx, Ct_Cursor, carry, subcarryQ, 2), carry, subcarryQ, 3);\
503            PI_Cursor = advance(idx, PI_Cursor, carry, subcarryQ, 4);\
504            ctCDPI_Callouts.PI_name_starts = simd_or(ctCDPI_Callouts.PI_name_starts, PI_Cursor);\
505            PI_name_end = scanthru(idx, PI_Cursor, lex.NameScan, carry, bubble, subcarryQ, 5);\
506            ctCDPI_Callouts.PI_name_ends = simd_or(ctCDPI_Callouts.PI_name_ends, PI_name_end);\
507            PI_Cursor = scanto(idx, PI_name_end, PI_end, carry, bubble, subcarryQ, 6);\
508            CD_Cursor = scanto(idx, CD_Cursor, ctCDPI_Callouts.CD_end, carry, bubble, subcarryQ, 7);\
509            Ct_Cursor = advance(idx, scanto(idx, Ct_Cursor, DoubleHyphen, carry, bubble, subcarryQ, 8), carry, subcarryQ, 9);\
510            ctCDPI_Callouts.PI_ends = simd_or(ctCDPI_Callouts.PI_ends, PI_Cursor);\
511            ctCDPI_Callouts.CD_ends = simd_or(ctCDPI_Callouts.CD_ends, CD_Cursor);\
512            ctCDPI_Callouts.Ct_ends = simd_or(ctCDPI_Callouts.Ct_ends, Ct_Cursor);\
513            CtCDPI_Cursor = simd_or(simd_or(PI_Cursor, CD_Cursor), Ct_Cursor);\
514            CtCDPI_Cursor = scanto(idx, CtCDPI_Cursor, CtCDPI_start, carry, bubble, subcarryQ, 10);\
515            ctCDPI_Callouts.CtCDPI_mask = sbb(idx, advance(idx, simd_or(simd_or(ctCDPI_Callouts.CD_ends, ctCDPI_Callouts.Ct_ends), ctCDPI_Callouts.PI_ends), carry, subcarryQ, 11), CtCDPI_starts, carry, bubble, subcarryQ, 12);\
516            ctCDPI_Callouts.error = simd_or(Ct_errors, simd_andc(ctCDPI_Callouts.Ct_ends, lex.RAngle));\
517            ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_andc(advance(idx, simd_andc(ctCDPI_Callouts.PI_name_ends, lex.WS), carry, subcarryQ, 13), PI_end));\
518            ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_and(ctCDPI_Callouts.PI_name_starts, ctCDPI_Callouts.PI_name_ends));\
519            ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_andc(ctCDPI_Callouts.CtCDPI_mask, EOF_mask));\
520            CarryCombine(group_carry, subcarryQ, 3, 14);\
521          }\
522        }\
523  } while (0)
524
525/*************Kernel Function****************/
526__kernel
527void xmlwf(__global BitBlock *s, 
528                   __global BitBlock *err_pos,
529                        __local BitBlock *carry,
530                        __local BitBlock *bubble) {
531
532   int i = get_local_id(0);
533   struct Basis_bits basis_bits;
534   struct Lex lex;
535   struct U8 u8;
536   struct Scope1 scope1;
537   struct CtCDPI_Callouts ctCDPI_Callouts;
538
539   BitBlock classify_bytes_Validate_utf8_carryQ[12];
540   CarryInit(classify_bytes_Validate_utf8_carryQ, 12);
541   BitBlock add_scope_streams_carry[2];
542   CarryInit(add_scope_streams_carry, 2);
543   BitBlock parse_CtCDPI_carryQ[17];
544   CarryInit(parse_CtCDPI_carryQ, 17);
545   
546   BitBlock group_carryQ[MAX_CARRY];
547   CarryInit(group_carryQ, 2);
548
549   for(int idx=i; idx<ELEMENTS;idx=idx+WORK_GROUP_SIZE){
550                s2p_bytepack(s[idx*8], s[idx*8+1], s[idx*8+2], s[idx*8+3], s[idx*8+4], s[idx*8+5], s[idx*8+6], s[idx*8+7], 
551                basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, 
552                basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
553               
554                classify_bytes_Validate_utf8(i, basis_bits, lex, u8, carry, classify_bytes_Validate_utf8_carryQ);
555                add_scope_streams(i, lex, scope1, carry, add_scope_streams_carry);
556                parse_CtCDPI(idx, ctCDPI_Callouts, lex, scope1, carry, bubble, parse_CtCDPI_carryQ);
557                err_pos[idx] = ctCDPI_Callouts.error;
558        }
559
560}
Note: See TracBrowser for help on using the repository browser.