Changeset 1669 for GPU/xmlwf.cl
 Timestamp:
 Nov 7, 2011, 4:45:11 PM (8 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

GPU/xmlwf.cl
r1668 r1669 3 3 #define WORK_GROUP_SIZE 64 4 4 #define ELEMENTS 1024*128 5 #define MAX_CARRY 2 5 #define MAX_CARRY 10 6 #define EOF_mask 0xFFFFFFFFFFFFFFFF 6 7 7 8 /******************Library Functions 64bit********************/ … … 75 76 76 77 77 #define adc(idx, a, b, c, carry, bubble, group_carry) \ 78 do {\ 79 \ 80 BitBlock carry_mask;\ 81 BitBlock bubble_mask;\ 82 \ 83 BitBlock partial_sum = a+b;\ 84 BitBlock gen = a&b;\ 85 BitBlock prop = a^b;\ 86 carry[idx] = ((gen  (prop & ~partial_sum))&CARRY_BIT_MASK)>>(WORK_GROUP_SIZE1idx);\ 87 bubble[idx] = (partial_sum + 1)? 0:(((long long)1)<<idx);\ 88 \ 89 barrier(CLK_LOCAL_MEM_FENCE);\ 90 for(int offset=WORK_GROUP_SIZE/2; offset>0; offset=offset>>1){\ 91 carry[idx] = carry[idx]carry[idx^offset];\ 92 bubble[idx] = bubble[idx]bubble[idx^offset];\ 93 barrier(CLK_LOCAL_MEM_FENCE);\ 94 }\ 95 \ 96 carry_mask = (carry[0]<<1)group_carry;\ 97 bubble_mask = bubble[0];\ 98 \ 99 BitBlock s = (carry_mask + bubble_mask) & ~bubble_mask;\ 100 BitBlock inc = s  (scarry_mask);\ 101 c = partial_sum + ((inc>>idx)&0x1);\ 102 group_carry = (carry[0](bubble_mask & inc))>>63;\ 103 \ 104 }while(0) 105 106 #define sbb(idx, a, b, c, carry, bubble, group_carry) \ 107 do {\ 108 \ 109 BitBlock carry_mask;\ 110 BitBlock bubble_mask;\ 111 \ 112 BitBlock partial_diff = ab;\ 113 BitBlock gen = b & ~a;\ 114 BitBlock prop = ~(a^b);\ 115 carry[idx] = ((gen  (prop & partial_diff))&CARRY_BIT_MASK)>>(63idx);\ 116 bubble[idx] = (partial_diff)? 0:(((long long)1)<<idx);\ 117 \ 118 barrier(CLK_LOCAL_MEM_FENCE);\ 119 for(int offset=WORK_GROUP_SIZE/2; offset>0; offset=offset>>1){\ 120 carry[idx] = carry[idx]carry[idx^offset];\ 121 bubble[idx] = bubble[idx]bubble[idx^offset];\ 122 barrier(CLK_LOCAL_MEM_FENCE);\ 123 }\ 124 \ 125 carry_mask = (carry[0]<<1)group_carry;\ 126 bubble_mask = bubble[0];\ 127 \ 128 BitBlock s = (carry_mask + bubble_mask) & ~bubble_mask;\ 129 BitBlock dec = s  (scarry_mask);\ 130 c = partial_diff  ((dec>>idx)&0x1);\ 131 group_carry = (carry[0](bubble_mask & dec))>>63;\ 132 \ 133 }while(0) 134 135 #define advance(idx, a, c, carry, group_carry) \ 136 do {\ 137 carry[0] = group_carry;\ 138 c = a<<1;\ 139 carry[idx+1] = (a & CARRY_BIT_MASK)>>63;\ 140 barrier(CLK_LOCAL_MEM_FENCE);\ 141 group_carry = carry[WORK_GROUP_SIZE];\ 142 c = c  carry[idx];\ 143 \ 144 }while(0) 145 78 inline BitBlock adc(int idx, BitBlock a, BitBlock b, __local BitBlock *carry, __local BitBlock *bubble, BitBlock *group_carry, const int carryno){ 79 BitBlock carry_mask; 80 BitBlock bubble_mask; 81 82 BitBlock partial_sum = a+b; 83 BitBlock gen = a&b; 84 BitBlock prop = a^b; 85 carry[idx] = ((gen  (prop & ~partial_sum))&CARRY_BIT_MASK)>>(WORK_GROUP_SIZE1idx); 86 bubble[idx] = (partial_sum + 1)? 0:(((long long)1)<<idx); 87 88 barrier(CLK_LOCAL_MEM_FENCE); 89 for(int offset=WORK_GROUP_SIZE/2; offset>0; offset=offset>>1){ 90 carry[idx] = carry[idx]carry[idx^offset]; 91 bubble[idx] = bubble[idx]bubble[idx^offset]; 92 barrier(CLK_LOCAL_MEM_FENCE); 93 } 94 95 carry_mask = (carry[0]<<1)group_carry[carryno]; 96 bubble_mask = bubble[0]; 97 98 BitBlock s = (carry_mask + bubble_mask) & ~bubble_mask; 99 BitBlock inc = s  (scarry_mask); 100 BitBlock rslt = partial_sum + ((inc>>idx)&0x1); 101 group_carry[carryno] = (carry[0](bubble_mask & inc))>>63; 102 return rslt; 103 } 104 105 inline BitBlock sbb(int idx, BitBlock a, BitBlock b, __local BitBlock *carry, __local BitBlock *bubble, BitBlock *group_carry, const int carryno){ 106 BitBlock carry_mask; 107 BitBlock bubble_mask; 108 109 BitBlock partial_diff = ab; 110 BitBlock gen = b & ~a; 111 BitBlock prop = ~(a^b); 112 carry[idx] = ((gen  (prop & partial_diff))&CARRY_BIT_MASK)>>(63idx); 113 bubble[idx] = (partial_diff)? 0:(((long long)1)<<idx); 114 115 barrier(CLK_LOCAL_MEM_FENCE); 116 for(int offset=WORK_GROUP_SIZE/2; offset>0; offset=offset>>1){ 117 carry[idx] = carry[idx]carry[idx^offset]; 118 bubble[idx] = bubble[idx]bubble[idx^offset]; 119 barrier(CLK_LOCAL_MEM_FENCE); 120 } 121 122 carry_mask = (carry[0]<<1)group_carry[carryno]; 123 bubble_mask = bubble[0]; 124 125 BitBlock s = (carry_mask + bubble_mask) & ~bubble_mask; 126 BitBlock dec = s  (scarry_mask); 127 BitBlock rslt = partial_diff  ((dec>>idx)&0x1); 128 group_carry[carryno] = (carry[0](bubble_mask & dec))>>63; 129 return rslt; 130 } 131 132 inline BitBlock advance(int idx, BitBlock a, __local BitBlock *carry, BitBlock *group_carry, const int carryno){ 133 carry[0] = group_carry[carryno]; 134 BitBlock c = a<<1; 135 carry[idx+1] = (a & CARRY_BIT_MASK)>>63; 136 barrier(CLK_LOCAL_MEM_FENCE); 137 group_carry[carryno] = carry[WORK_GROUP_SIZE]; 138 c = c  carry[idx]; 139 return c; 140 } 141 142 static inline BitBlock scanthru(int idx, BitBlock markers, BitBlock charclass, __local BitBlock *carry, __local BitBlock *bubble, BitBlock *group_carry, const int carryno) { 143 return simd_andc(adc(idx, markers, charclass, carry, bubble, group_carry, carryno), charclass); 144 } 145 146 static inline BitBlock scanto(int idx, BitBlock markers, BitBlock charclass, __local BitBlock *carry, __local BitBlock *bubble, BitBlock *group_carry, const int carryno) { 147 return simd_and(adc(idx, markers, simd_not(charclass), carry, bubble, group_carry, carryno), charclass); 148 } 149 150 static inline BitBlock scantofirst(int idx, BitBlock charclass, __local BitBlock *carry, __local BitBlock *bubble, BitBlock *group_carry, const int carryno) { 151 BitBlock marker; 152 group_carry[carryno] = simd_xor(group_carry[carryno], 1); 153 marker = adc(idx, 0, simd_not(charclass), carry, bubble, group_carry, carryno); 154 group_carry[carryno] = simd_xor(group_carry[carryno], 1); 155 return simd_and(marker, charclass); 156 } 157 158 inline void CarryInit(BitBlock * carry, int count){ 159 for (int j=0; j < count; j++) 160 carry[j] = 0; 161 } 162 163 static inline int CarryTest(BitBlock * cq, const int carryno, const int carry_count) { 164 BitBlock c1 = cq[carryno]; 165 for (int i = carryno + 1; i < carryno + carry_count; i++) { 166 c1 = c1  cq[i]; 167 } 168 return bitblock_has_bit(c1); 169 } 170 171 static inline void CarryCombine(BitBlock * cq, BitBlock * local_cq, const int carryno, const int carry_count) { 172 for (int i = 0; i < carry_count; i++) { 173 cq[carryno+i] = simd_or(cq[carryno+i], local_cq[i]); 174 } 175 } 146 176 /**************Parabix Structs***************/ 147 177 struct Basis_bits { … … 187 217 }; 188 218 219 struct U8 { 220 BitBlock suffix; 221 BitBlock FFFE_FFFF; 222 BitBlock error; 223 }; 224 225 struct Scope1 { 226 BitBlock RefStart; 227 BitBlock LAngle; 228 BitBlock Hyphen; 229 BitBlock QMark; 230 BitBlock RBracket; 231 }; 232 233 struct CtCDPI_Callouts { 234 BitBlock CD_end; 235 BitBlock Ct_starts; 236 BitBlock Ct_ends; 237 BitBlock CD_starts; 238 BitBlock CD_ends; 239 BitBlock PI_starts; 240 BitBlock PI_name_starts; 241 BitBlock PI_name_ends; 242 BitBlock PI_ends; 243 BitBlock CtCDPI_mask; 244 BitBlock error; 245 }; 246 189 247 /**************Parabix Functions***************/ 190 #define classify_bytes (idx, basis_bits, lex,carry)\248 #define classify_bytes_Validate_utf8(idx, basis_bits, lex, u8, carry, group_carry)\ 191 249 do {\ 192 250 BitBlock temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;\ … … 198 256 BitBlock temp50, temp51, temp52, temp53, temp54, temp55, temp56, temp57;\ 199 257 BitBlock temp58, temp59, temp60, temp61, temp62, temp63, temp64, temp65;\ 258 BitBlock u8anyscope, temp66, temp67, temp68, temp69, temp70, temp71;\ 259 BitBlock unibyte, prefix, prefix2, prefix3, prefix4, badprefix;\ 260 BitBlock xE0, xED, xF0, xF4, xA0_xBF, x80_x9F, x90_xBF, x80_x8F, xEF, xBF, xBE;\ 261 BitBlock scope22, scope32, scope33, scope42, scope43, scope44, xE0_scope, xED_scope, xF0_scope, xF4_scope, xEF_scope;\ 262 BitBlock temp72, temp73, u8lastscope, u8error1, u8error2;\ 263 BitBlock u8error3, u8error4, EF_BF_pending, u8mismatch;\ 200 264 \ 201 265 temp1 = simd_or(basis_bits.bit_0, basis_bits.bit_1);\ … … 292 356 lex.Hex = simd_or(temp62, temp65);\ 293 357 lex.error = simd_andc(x00_x1F, lex.WS);\ 358 unibyte = simd_not(basis_bits.bit_0);\ 359 u8.suffix = simd_const_1(0);\ 360 u8.error = simd_const_1(0);\ 361 u8.FFFE_FFFF = simd_const_1(0);\ 362 u8anyscope = simd_const_1(0);\ 363 if ((bitblock_has_bit(basis_bits.bit_0)  CarryTest(group_carry, 0, 11))) {\ 364 prefix = simd_and(basis_bits.bit_0, basis_bits.bit_1);\ 365 prefix2 = simd_andc(prefix, basis_bits.bit_2);\ 366 prefix3 = simd_and(prefix, temp2);\ 367 prefix4 = simd_and(prefix, temp7);\ 368 u8.suffix = simd_andc(basis_bits.bit_0, basis_bits.bit_1);\ 369 temp66 = simd_andc(prefix, temp49);\ 370 temp67 = simd_or(temp21, basis_bits.bit_6);\ 371 temp68 = simd_andc(temp66, temp67);\ 372 temp69 = simd_and(basis_bits.bit_5, temp13);\ 373 temp70 = simd_or(basis_bits.bit_4, temp69);\ 374 temp71 = simd_and(prefix4, temp70);\ 375 badprefix = simd_or(temp68, temp71);\ 376 u8.error = badprefix;\ 377 scope22 = advance(idx, prefix2, carry, group_carry, 0);\ 378 u8anyscope = scope22;\ 379 if (bitblock_has_bit(simd_or(prefix3, prefix4))) {\ 380 xE0 = simd_andc(prefix3, temp36);\ 381 xED = simd_and(prefix3, temp20);\ 382 xF0 = simd_andc(prefix4, temp36);\ 383 temp72 = simd_andc(temp4, temp13);\ 384 xF4 = simd_and(prefix4, temp72);\ 385 xA0_xBF = simd_and(u8.suffix, basis_bits.bit_2);\ 386 x80_x9F = simd_andc(u8.suffix, basis_bits.bit_2);\ 387 x90_xBF = simd_and(u8.suffix, temp49);\ 388 x80_x8F = simd_andc(u8.suffix, temp49);\ 389 xEF = simd_and(prefix3, temp23);\ 390 temp73 = simd_and(u8.suffix, temp7);\ 391 xBF = simd_and(temp73, temp23);\ 392 xBE = simd_and(temp73, temp15);\ 393 xE0_scope = advance(idx, xE0, carry, group_carry, 1);\ 394 xED_scope = advance(idx, xED, carry, group_carry, 2);\ 395 xF0_scope = advance(idx, xF0, carry, group_carry, 3);\ 396 xF4_scope = advance(idx, xF4, carry, group_carry, 4);\ 397 xEF_scope = advance(idx, xEF, carry, group_carry, 5);\ 398 scope32 = advance(idx, prefix3, carry, group_carry, 6);\ 399 scope33 = advance(idx, scope32, carry, group_carry, 7);\ 400 scope42 = advance(idx, prefix4, carry, group_carry, 8);\ 401 scope43 = advance(idx, scope42, carry, group_carry, 9);\ 402 scope44 = advance(idx, scope43, carry, group_carry, 10);\ 403 u8lastscope = simd_or(simd_or(scope22, scope33), scope44);\ 404 u8anyscope = simd_or(simd_or(simd_or(u8lastscope, scope32), scope42), scope43);\ 405 u8error1 = simd_and(xE0_scope, x80_x9F);\ 406 u8error2 = simd_and(xED_scope, xA0_xBF);\ 407 u8error3 = simd_and(xF0_scope, x80_x8F);\ 408 u8error4 = simd_and(xF4_scope, x90_xBF);\ 409 u8.error = simd_or(u8.error, simd_or(simd_or(simd_or(u8error1, u8error2), u8error3), u8error4));\ 410 EF_BF_pending = advance(idx, simd_and(xEF_scope, xBF), carry, group_carry, 11);\ 411 u8.FFFE_FFFF = simd_and(EF_BF_pending, simd_or(xBE, xBF));\ 412 }\ 413 }\ 414 u8mismatch = simd_xor(u8anyscope, u8.suffix);\ 415 u8.error = simd_or(u8.error, u8mismatch);\ 416 } while (0) 417 418 #define add_scope_streams(idx, lex, scope1, carry, group_carry)\ 419 do {\ 420 BitBlock v, w, v1, w1;\ 421 \ 422 v = simd_or(lex.LAngle, lex.Hyphen);\ 423 w = simd_or(lex.Hyphen, lex.QMark);\ 424 v1 = advance(idx, v, carry, group_carry, 0);\ 425 w1 = advance(idx, w, carry, group_carry, 1);\ 426 scope1.LAngle = simd_andc(v1, w1);\ 427 scope1.Hyphen = simd_and(v1, w1);\ 428 scope1.QMark = simd_andc(w1, v1);\ 429 } while (0) 430 431 #define parse_CtCDPI(idx, ctCDPI_Callouts, lex, scope1, carry, bubble, group_carry)\ 432 do {\ 433 BitBlock CtCDPI_starts, Ct_errors, PI_start, CtCD_start, CtCDPI_start;\ 434 BitBlock DoubleHyphen, PI_end, CtCDPI_Cursor, PI_Cursor, CD_Ct_Cursor;\ 435 BitBlock CD_Cursor, Ct_Cursor, PI_name_end;\ 436 \ 437 ctCDPI_Callouts.CD_end = simd_const_1(0);\ 438 ctCDPI_Callouts.Ct_starts = simd_const_1(0);\ 439 ctCDPI_Callouts.Ct_ends = simd_const_1(0);\ 440 ctCDPI_Callouts.CD_starts = simd_const_1(0);\ 441 ctCDPI_Callouts.CD_ends = simd_const_1(0);\ 442 ctCDPI_Callouts.PI_starts = simd_const_1(0);\ 443 ctCDPI_Callouts.PI_name_starts = simd_const_1(0);\ 444 ctCDPI_Callouts.PI_name_ends = simd_const_1(0);\ 445 ctCDPI_Callouts.PI_ends = simd_const_1(0);\ 446 ctCDPI_Callouts.CtCDPI_mask = simd_const_1(0);\ 447 ctCDPI_Callouts.error = simd_const_1(0);\ 448 CtCDPI_starts = simd_const_1(0);\ 449 Ct_errors = simd_const_1(0);\ 450 if ((bitblock_has_bit(lex.RBracket)  CarryTest(group_carry, 0, 2))) {\ 451 scope1.RBracket = advance(idx, lex.RBracket, carry, group_carry, 0);\ 452 ctCDPI_Callouts.CD_end = simd_and(advance(idx, simd_and(scope1.RBracket, lex.RBracket), carry, group_carry, 1), lex.RAngle);\ 453 }\ 454 PI_start = simd_and(scope1.LAngle, lex.QMark);\ 455 CtCD_start = simd_and(scope1.LAngle, lex.Exclam);\ 456 CtCDPI_start = simd_or(PI_start, CtCD_start);\ 457 DoubleHyphen = simd_and(scope1.Hyphen, lex.Hyphen);\ 458 PI_end = simd_and(scope1.QMark, lex.RAngle);\ 459 CtCDPI_Cursor = scantofirst(idx, CtCDPI_start, carry, bubble, group_carry, 2);\ 460 if ((bitblock_has_bit(CtCDPI_Cursor)  CarryTest(group_carry, 3, 14))) {\ 461 CtCDPI_starts = simd_or(CtCDPI_starts, CtCDPI_Cursor);\ 462 PI_Cursor = simd_and(CtCDPI_Cursor, PI_start);\ 463 CD_Ct_Cursor = advance(idx, simd_andc(CtCDPI_Cursor, PI_Cursor), carry, group_carry, 3);\ 464 CD_Cursor = simd_and(CD_Ct_Cursor, lex.LBracket);\ 465 Ct_Cursor = simd_and(CD_Ct_Cursor, lex.Hyphen);\ 466 ctCDPI_Callouts.PI_starts = simd_or(ctCDPI_Callouts.PI_starts, PI_Cursor);\ 467 ctCDPI_Callouts.CD_starts = simd_or(ctCDPI_Callouts.CD_starts, CD_Cursor);\ 468 ctCDPI_Callouts.Ct_starts = simd_or(ctCDPI_Callouts.Ct_starts, Ct_Cursor);\ 469 Ct_Cursor = advance(idx, Ct_Cursor, carry, group_carry, 4);\ 470 Ct_errors = simd_or(Ct_errors, simd_andc(Ct_Cursor, lex.Hyphen));\ 471 Ct_Cursor = advance(idx, advance(idx, Ct_Cursor, carry, group_carry, 5), carry, group_carry, 6);\ 472 PI_Cursor = advance(idx, PI_Cursor, carry, group_carry, 7);\ 473 ctCDPI_Callouts.PI_name_starts = simd_or(ctCDPI_Callouts.PI_name_starts, PI_Cursor);\ 474 PI_name_end = scanthru(idx, PI_Cursor, lex.NameScan, carry, bubble, group_carry, 8);\ 475 ctCDPI_Callouts.PI_name_ends = simd_or(ctCDPI_Callouts.PI_name_ends, PI_name_end);\ 476 PI_Cursor = scanto(idx, PI_name_end, PI_end, carry, bubble, group_carry, 9);\ 477 CD_Cursor = scanto(idx, CD_Cursor, ctCDPI_Callouts.CD_end, carry, bubble, group_carry, 10);\ 478 Ct_Cursor = advance(idx, scanto(idx, Ct_Cursor, DoubleHyphen, carry, bubble, group_carry, 11), carry, group_carry, 12);\ 479 ctCDPI_Callouts.PI_ends = simd_or(ctCDPI_Callouts.PI_ends, PI_Cursor);\ 480 ctCDPI_Callouts.CD_ends = simd_or(ctCDPI_Callouts.CD_ends, CD_Cursor);\ 481 ctCDPI_Callouts.Ct_ends = simd_or(ctCDPI_Callouts.Ct_ends, Ct_Cursor);\ 482 CtCDPI_Cursor = simd_or(simd_or(PI_Cursor, CD_Cursor), Ct_Cursor);\ 483 CtCDPI_Cursor = scanto(idx, CtCDPI_Cursor, CtCDPI_start, carry, bubble, group_carry, 13);\ 484 ctCDPI_Callouts.CtCDPI_mask = sbb(idx, advance(idx, simd_or(simd_or(ctCDPI_Callouts.CD_ends, ctCDPI_Callouts.Ct_ends), ctCDPI_Callouts.PI_ends), carry, group_carry, 14), CtCDPI_starts, carry, bubble, group_carry, 15);\ 485 ctCDPI_Callouts.error = simd_or(Ct_errors, simd_andc(ctCDPI_Callouts.Ct_ends, lex.RAngle));\ 486 ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_andc(advance(idx, simd_andc(ctCDPI_Callouts.PI_name_ends, lex.WS), carry, group_carry, 16), PI_end));\ 487 ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_and(ctCDPI_Callouts.PI_name_starts, ctCDPI_Callouts.PI_name_ends));\ 488 ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_andc(ctCDPI_Callouts.CtCDPI_mask, EOF_mask));\ 489 while (bitblock_has_bit(CtCDPI_Cursor)) {\ 490 BitBlock subcarryQ[14];\ 491 CarryInit(subcarryQ, 14);\ 492 CtCDPI_starts = simd_or(CtCDPI_starts, CtCDPI_Cursor);\ 493 PI_Cursor = simd_and(CtCDPI_Cursor, PI_start);\ 494 CD_Ct_Cursor = advance(idx, simd_andc(CtCDPI_Cursor, PI_Cursor), carry, subcarryQ, 0);\ 495 CD_Cursor = simd_and(CD_Ct_Cursor, lex.LBracket);\ 496 Ct_Cursor = simd_and(CD_Ct_Cursor, lex.Hyphen);\ 497 ctCDPI_Callouts.PI_starts = simd_or(ctCDPI_Callouts.PI_starts, PI_Cursor);\ 498 ctCDPI_Callouts.CD_starts = simd_or(ctCDPI_Callouts.CD_starts, CD_Cursor);\ 499 ctCDPI_Callouts.Ct_starts = simd_or(ctCDPI_Callouts.Ct_starts, Ct_Cursor);\ 500 Ct_Cursor = advance(idx, Ct_Cursor, carry, subcarryQ, 1);\ 501 Ct_errors = simd_or(Ct_errors, simd_andc(Ct_Cursor, lex.Hyphen));\ 502 Ct_Cursor = advance(idx, advance(idx, Ct_Cursor, carry, subcarryQ, 2), carry, subcarryQ, 3);\ 503 PI_Cursor = advance(idx, PI_Cursor, carry, subcarryQ, 4);\ 504 ctCDPI_Callouts.PI_name_starts = simd_or(ctCDPI_Callouts.PI_name_starts, PI_Cursor);\ 505 PI_name_end = scanthru(idx, PI_Cursor, lex.NameScan, carry, bubble, subcarryQ, 5);\ 506 ctCDPI_Callouts.PI_name_ends = simd_or(ctCDPI_Callouts.PI_name_ends, PI_name_end);\ 507 PI_Cursor = scanto(idx, PI_name_end, PI_end, carry, bubble, subcarryQ, 6);\ 508 CD_Cursor = scanto(idx, CD_Cursor, ctCDPI_Callouts.CD_end, carry, bubble, subcarryQ, 7);\ 509 Ct_Cursor = advance(idx, scanto(idx, Ct_Cursor, DoubleHyphen, carry, bubble, subcarryQ, 8), carry, subcarryQ, 9);\ 510 ctCDPI_Callouts.PI_ends = simd_or(ctCDPI_Callouts.PI_ends, PI_Cursor);\ 511 ctCDPI_Callouts.CD_ends = simd_or(ctCDPI_Callouts.CD_ends, CD_Cursor);\ 512 ctCDPI_Callouts.Ct_ends = simd_or(ctCDPI_Callouts.Ct_ends, Ct_Cursor);\ 513 CtCDPI_Cursor = simd_or(simd_or(PI_Cursor, CD_Cursor), Ct_Cursor);\ 514 CtCDPI_Cursor = scanto(idx, CtCDPI_Cursor, CtCDPI_start, carry, bubble, subcarryQ, 10);\ 515 ctCDPI_Callouts.CtCDPI_mask = sbb(idx, advance(idx, simd_or(simd_or(ctCDPI_Callouts.CD_ends, ctCDPI_Callouts.Ct_ends), ctCDPI_Callouts.PI_ends), carry, subcarryQ, 11), CtCDPI_starts, carry, bubble, subcarryQ, 12);\ 516 ctCDPI_Callouts.error = simd_or(Ct_errors, simd_andc(ctCDPI_Callouts.Ct_ends, lex.RAngle));\ 517 ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_andc(advance(idx, simd_andc(ctCDPI_Callouts.PI_name_ends, lex.WS), carry, subcarryQ, 13), PI_end));\ 518 ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_and(ctCDPI_Callouts.PI_name_starts, ctCDPI_Callouts.PI_name_ends));\ 519 ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_andc(ctCDPI_Callouts.CtCDPI_mask, EOF_mask));\ 520 CarryCombine(group_carry, subcarryQ, 3, 14);\ 521 }\ 522 }\ 294 523 } while (0) 295 524 … … 302 531 303 532 int i = get_local_id(0); 304 BitBlock group_carryQ[MAX_CARRY];305 533 struct Basis_bits basis_bits; 306 534 struct Lex lex; … … 309 537 struct CtCDPI_Callouts ctCDPI_Callouts; 310 538 311 for(int i=0; i<MAX_CARRY; i++) 312 group_carryQ[i] = 0; 539 BitBlock classify_bytes_Validate_utf8_carryQ[12]; 540 CarryInit(classify_bytes_Validate_utf8_carryQ, 12); 541 BitBlock add_scope_streams_carry[2]; 542 CarryInit(add_scope_streams_carry, 2); 543 BitBlock parse_CtCDPI_carryQ[17]; 544 CarryInit(parse_CtCDPI_carryQ, 17); 545 546 BitBlock group_carryQ[MAX_CARRY]; 547 CarryInit(group_carryQ, 2); 313 548 314 549 for(int idx=i; idx<ELEMENTS;idx=idx+WORK_GROUP_SIZE){ … … 317 552 basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7); 318 553 319 classify_bytes(i, basis_bits, lex, carry); 320 BitBlock LA_scope,StartTag_NameStart,StartTag_End; 321 advance(i,lex.LAngle, LA_scope, carry, group_carryQ[0]); 322 StartTag_NameStart = simd_andc(LA_scope,lex.Slash); 323 adc(i,StartTag_NameStart,lex.NameScan,StartTag_End,carry,bubble, group_carryQ[1]); 324 StartTag_End = simd_andc(StartTag_End, lex.NameScan); 325 err_pos[idx] = simd_andc(StartTag_End,lex.RAngle); 326 554 classify_bytes_Validate_utf8(i, basis_bits, lex, u8, carry, classify_bytes_Validate_utf8_carryQ); 555 add_scope_streams(i, lex, scope1, carry, add_scope_streams_carry); 556 parse_CtCDPI(idx, ctCDPI_Callouts, lex, scope1, carry, bubble, parse_CtCDPI_carryQ); 557 err_pos[idx] = ctCDPI_Callouts.error; 327 558 } 328 559
Note: See TracChangeset
for help on using the changeset viewer.