Changeset 1669


Ignore:
Timestamp:
Nov 7, 2011, 4:45:11 PM (7 years ago)
Author:
lindanl
Message:

Parabix on GPU : Add u8, scope and ctcdpi. Add scan functions.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • GPU/xmlwf.cl

    r1668 r1669  
    33#define WORK_GROUP_SIZE 64
    44#define ELEMENTS 1024*128
    5 #define MAX_CARRY 2
     5#define MAX_CARRY 10
     6#define EOF_mask 0xFFFFFFFFFFFFFFFF
    67
    78/******************Library Functions 64-bit********************/
     
    7576
    7677
    77 #define adc(idx, a, b, c, carry, bubble, group_carry) \
    78 do {\
    79 \
    80    BitBlock carry_mask;\
    81    BitBlock bubble_mask;\
    82 \
    83    BitBlock partial_sum = a+b;\
    84    BitBlock gen = a&b;\
    85    BitBlock prop = a^b;\
    86    carry[idx] = ((gen | (prop & ~partial_sum))&CARRY_BIT_MASK)>>(WORK_GROUP_SIZE-1-idx);\
    87    bubble[idx] = (partial_sum + 1)? 0:(((long long)1)<<idx);\
    88  \
    89    barrier(CLK_LOCAL_MEM_FENCE);\
    90    for(int offset=WORK_GROUP_SIZE/2; offset>0; offset=offset>>1){\
    91                 carry[idx] = carry[idx]|carry[idx^offset];\
    92                 bubble[idx] = bubble[idx]|bubble[idx^offset];\
    93                 barrier(CLK_LOCAL_MEM_FENCE);\
    94    }\
    95 \
    96    carry_mask = (carry[0]<<1)|group_carry;\
    97    bubble_mask = bubble[0];\
    98 \
    99    BitBlock s = (carry_mask + bubble_mask) & ~bubble_mask;\
    100    BitBlock inc = s | (s-carry_mask);\
    101    c = partial_sum + ((inc>>idx)&0x1);\
    102    group_carry = (carry[0]|(bubble_mask & inc))>>63;\
    103 \
    104 }while(0)
    105 
    106 #define sbb(idx, a, b, c, carry, bubble, group_carry) \
    107 do {\
    108 \
    109    BitBlock carry_mask;\
    110    BitBlock bubble_mask;\
    111 \
    112    BitBlock partial_diff = a-b;\
    113    BitBlock gen = b & ~a;\
    114    BitBlock prop = ~(a^b);\
    115    carry[idx] = ((gen | (prop & partial_diff))&CARRY_BIT_MASK)>>(63-idx);\
    116    bubble[idx] = (partial_diff)? 0:(((long long)1)<<idx);\
    117  \
    118    barrier(CLK_LOCAL_MEM_FENCE);\
    119    for(int offset=WORK_GROUP_SIZE/2; offset>0; offset=offset>>1){\
    120                 carry[idx] = carry[idx]|carry[idx^offset];\
    121                 bubble[idx] = bubble[idx]|bubble[idx^offset];\
    122                 barrier(CLK_LOCAL_MEM_FENCE);\
    123    }\
    124 \
    125    carry_mask = (carry[0]<<1)|group_carry;\
    126    bubble_mask = bubble[0];\
    127 \
    128    BitBlock s = (carry_mask + bubble_mask) & ~bubble_mask;\
    129    BitBlock dec = s | (s-carry_mask);\
    130    c = partial_diff - ((dec>>idx)&0x1);\
    131    group_carry = (carry[0]|(bubble_mask & dec))>>63;\
    132 \
    133 }while(0)
    134 
    135 #define advance(idx, a, c, carry, group_carry) \
    136 do {\
    137   carry[0] = group_carry;\
    138   c = a<<1;\
    139   carry[idx+1] = (a & CARRY_BIT_MASK)>>63;\
    140   barrier(CLK_LOCAL_MEM_FENCE);\
    141   group_carry = carry[WORK_GROUP_SIZE];\
    142   c = c | carry[idx];\
    143 \
    144 }while(0)
    145 
     78inline BitBlock adc(int idx, BitBlock a, BitBlock b, __local BitBlock *carry, __local BitBlock *bubble, BitBlock *group_carry, const int carryno){
     79        BitBlock carry_mask;
     80        BitBlock bubble_mask;
     81
     82        BitBlock partial_sum = a+b;
     83        BitBlock gen = a&b;
     84        BitBlock prop = a^b;
     85        carry[idx] = ((gen | (prop & ~partial_sum))&CARRY_BIT_MASK)>>(WORK_GROUP_SIZE-1-idx);
     86        bubble[idx] = (partial_sum + 1)? 0:(((long long)1)<<idx);
     87       
     88        barrier(CLK_LOCAL_MEM_FENCE);
     89        for(int offset=WORK_GROUP_SIZE/2; offset>0; offset=offset>>1){
     90                carry[idx] = carry[idx]|carry[idx^offset];
     91                bubble[idx] = bubble[idx]|bubble[idx^offset];
     92                barrier(CLK_LOCAL_MEM_FENCE);
     93        }
     94       
     95        carry_mask = (carry[0]<<1)|group_carry[carryno];
     96        bubble_mask = bubble[0];
     97       
     98        BitBlock s = (carry_mask + bubble_mask) & ~bubble_mask;
     99        BitBlock inc = s | (s-carry_mask);
     100        BitBlock rslt = partial_sum + ((inc>>idx)&0x1);
     101        group_carry[carryno] = (carry[0]|(bubble_mask & inc))>>63;
     102        return rslt;
     103}
     104
     105inline BitBlock sbb(int idx, BitBlock a, BitBlock b, __local BitBlock *carry, __local BitBlock *bubble, BitBlock *group_carry, const int carryno){
     106        BitBlock carry_mask;
     107        BitBlock bubble_mask;
     108
     109        BitBlock partial_diff = a-b;
     110        BitBlock gen = b & ~a;
     111        BitBlock prop = ~(a^b);
     112        carry[idx] = ((gen | (prop & partial_diff))&CARRY_BIT_MASK)>>(63-idx);
     113        bubble[idx] = (partial_diff)? 0:(((long long)1)<<idx);
     114       
     115        barrier(CLK_LOCAL_MEM_FENCE);
     116        for(int offset=WORK_GROUP_SIZE/2; offset>0; offset=offset>>1){
     117                carry[idx] = carry[idx]|carry[idx^offset];
     118                bubble[idx] = bubble[idx]|bubble[idx^offset];
     119                barrier(CLK_LOCAL_MEM_FENCE);
     120        }
     121       
     122        carry_mask = (carry[0]<<1)|group_carry[carryno];
     123        bubble_mask = bubble[0];
     124       
     125        BitBlock s = (carry_mask + bubble_mask) & ~bubble_mask;
     126        BitBlock dec = s | (s-carry_mask);
     127        BitBlock rslt = partial_diff - ((dec>>idx)&0x1);
     128        group_carry[carryno] = (carry[0]|(bubble_mask & dec))>>63;
     129        return rslt;
     130}
     131
     132inline BitBlock advance(int idx, BitBlock a, __local BitBlock *carry, BitBlock *group_carry, const int carryno){
     133        carry[0] = group_carry[carryno];
     134        BitBlock c = a<<1;
     135        carry[idx+1] = (a & CARRY_BIT_MASK)>>63;
     136        barrier(CLK_LOCAL_MEM_FENCE);
     137        group_carry[carryno] = carry[WORK_GROUP_SIZE];
     138        c = c | carry[idx];
     139        return c;
     140}
     141
     142static inline BitBlock scanthru(int idx, BitBlock markers, BitBlock charclass, __local BitBlock *carry, __local BitBlock *bubble, BitBlock *group_carry, const int carryno) {
     143        return simd_andc(adc(idx, markers, charclass, carry, bubble, group_carry, carryno), charclass);
     144}
     145
     146static inline BitBlock scanto(int idx, BitBlock markers, BitBlock charclass, __local BitBlock *carry, __local BitBlock *bubble, BitBlock *group_carry, const int carryno) {
     147        return simd_and(adc(idx, markers, simd_not(charclass), carry, bubble, group_carry, carryno), charclass);
     148}
     149
     150static inline BitBlock scantofirst(int idx, BitBlock charclass, __local BitBlock *carry, __local BitBlock *bubble, BitBlock *group_carry, const int carryno) {
     151        BitBlock marker;
     152        group_carry[carryno] = simd_xor(group_carry[carryno], 1);
     153    marker = adc(idx, 0, simd_not(charclass), carry, bubble, group_carry, carryno);
     154    group_carry[carryno] = simd_xor(group_carry[carryno], 1);
     155    return simd_and(marker, charclass);
     156}
     157
     158inline void CarryInit(BitBlock * carry, int count){
     159    for (int j=0; j < count; j++)
     160         carry[j] = 0;
     161}
     162
     163static inline int CarryTest(BitBlock * cq, const int carryno, const int carry_count) {
     164  BitBlock c1 = cq[carryno];
     165  for (int i = carryno + 1; i < carryno + carry_count; i++) {
     166    c1 = c1 | cq[i];
     167  }
     168  return bitblock_has_bit(c1);
     169}
     170
     171static inline void CarryCombine(BitBlock * cq, BitBlock * local_cq, const int carryno, const int carry_count) {
     172  for (int i = 0; i < carry_count; i++) {
     173    cq[carryno+i] = simd_or(cq[carryno+i], local_cq[i]);
     174  }
     175}
    146176/**************Parabix Structs***************/
    147177  struct Basis_bits {
     
    187217};
    188218
     219  struct U8 {
     220  BitBlock suffix;
     221  BitBlock FFFE_FFFF;
     222  BitBlock error;
     223};
     224
     225  struct Scope1 {
     226  BitBlock RefStart;
     227  BitBlock LAngle;
     228  BitBlock Hyphen;
     229  BitBlock QMark;
     230  BitBlock RBracket;
     231};
     232
     233  struct CtCDPI_Callouts {
     234  BitBlock CD_end;
     235  BitBlock Ct_starts;
     236  BitBlock Ct_ends;
     237  BitBlock CD_starts;
     238  BitBlock CD_ends;
     239  BitBlock PI_starts;
     240  BitBlock PI_name_starts;
     241  BitBlock PI_name_ends;
     242  BitBlock PI_ends;
     243  BitBlock CtCDPI_mask;
     244  BitBlock error;
     245};
     246
    189247/**************Parabix Functions***************/
    190 #define classify_bytes(idx, basis_bits, lex, carry)\
     248#define classify_bytes_Validate_utf8(idx, basis_bits, lex, u8, carry, group_carry)\
    191249 do {\
    192250                BitBlock temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;\
     
    198256                BitBlock temp50, temp51, temp52, temp53, temp54, temp55, temp56, temp57;\
    199257                BitBlock temp58, temp59, temp60, temp61, temp62, temp63, temp64, temp65;\
     258                BitBlock u8anyscope, temp66, temp67, temp68, temp69, temp70, temp71;\
     259                BitBlock unibyte, prefix, prefix2, prefix3, prefix4, badprefix;\
     260                BitBlock xE0, xED, xF0, xF4, xA0_xBF, x80_x9F, x90_xBF, x80_x8F, xEF, xBF, xBE;\
     261                BitBlock scope22, scope32, scope33, scope42, scope43, scope44, xE0_scope, xED_scope, xF0_scope, xF4_scope, xEF_scope;\
     262                BitBlock temp72, temp73, u8lastscope, u8error1, u8error2;\
     263                BitBlock u8error3, u8error4, EF_BF_pending, u8mismatch;\
    200264\
    201265        temp1 = simd_or(basis_bits.bit_0, basis_bits.bit_1);\
     
    292356        lex.Hex = simd_or(temp62, temp65);\
    293357        lex.error = simd_andc(x00_x1F, lex.WS);\
     358                unibyte = simd_not(basis_bits.bit_0);\
     359        u8.suffix = simd_const_1(0);\
     360        u8.error = simd_const_1(0);\
     361        u8.FFFE_FFFF = simd_const_1(0);\
     362        u8anyscope = simd_const_1(0);\
     363        if ((bitblock_has_bit(basis_bits.bit_0) || CarryTest(group_carry, 0, 11))) {\
     364          prefix = simd_and(basis_bits.bit_0, basis_bits.bit_1);\
     365          prefix2 = simd_andc(prefix, basis_bits.bit_2);\
     366          prefix3 = simd_and(prefix, temp2);\
     367          prefix4 = simd_and(prefix, temp7);\
     368          u8.suffix = simd_andc(basis_bits.bit_0, basis_bits.bit_1);\
     369          temp66 = simd_andc(prefix, temp49);\
     370          temp67 = simd_or(temp21, basis_bits.bit_6);\
     371          temp68 = simd_andc(temp66, temp67);\
     372          temp69 = simd_and(basis_bits.bit_5, temp13);\
     373          temp70 = simd_or(basis_bits.bit_4, temp69);\
     374          temp71 = simd_and(prefix4, temp70);\
     375          badprefix = simd_or(temp68, temp71);\
     376          u8.error = badprefix;\
     377          scope22 = advance(idx, prefix2, carry, group_carry, 0);\
     378          u8anyscope = scope22;\
     379          if (bitblock_has_bit(simd_or(prefix3, prefix4))) {\
     380            xE0 = simd_andc(prefix3, temp36);\
     381            xED = simd_and(prefix3, temp20);\
     382            xF0 = simd_andc(prefix4, temp36);\
     383            temp72 = simd_andc(temp4, temp13);\
     384            xF4 = simd_and(prefix4, temp72);\
     385            xA0_xBF = simd_and(u8.suffix, basis_bits.bit_2);\
     386            x80_x9F = simd_andc(u8.suffix, basis_bits.bit_2);\
     387            x90_xBF = simd_and(u8.suffix, temp49);\
     388            x80_x8F = simd_andc(u8.suffix, temp49);\
     389            xEF = simd_and(prefix3, temp23);\
     390            temp73 = simd_and(u8.suffix, temp7);\
     391            xBF = simd_and(temp73, temp23);\
     392            xBE = simd_and(temp73, temp15);\
     393            xE0_scope = advance(idx, xE0, carry, group_carry, 1);\
     394            xED_scope = advance(idx, xED, carry, group_carry, 2);\
     395            xF0_scope = advance(idx, xF0, carry, group_carry, 3);\
     396            xF4_scope = advance(idx, xF4, carry, group_carry, 4);\
     397            xEF_scope = advance(idx, xEF, carry, group_carry, 5);\
     398            scope32 = advance(idx, prefix3, carry, group_carry, 6);\
     399            scope33 = advance(idx, scope32, carry, group_carry, 7);\
     400            scope42 = advance(idx, prefix4, carry, group_carry, 8);\
     401            scope43 = advance(idx, scope42, carry, group_carry, 9);\
     402            scope44 = advance(idx, scope43, carry, group_carry, 10);\
     403            u8lastscope = simd_or(simd_or(scope22, scope33), scope44);\
     404            u8anyscope = simd_or(simd_or(simd_or(u8lastscope, scope32), scope42), scope43);\
     405            u8error1 = simd_and(xE0_scope, x80_x9F);\
     406            u8error2 = simd_and(xED_scope, xA0_xBF);\
     407            u8error3 = simd_and(xF0_scope, x80_x8F);\
     408            u8error4 = simd_and(xF4_scope, x90_xBF);\
     409            u8.error = simd_or(u8.error, simd_or(simd_or(simd_or(u8error1, u8error2), u8error3), u8error4));\
     410            EF_BF_pending = advance(idx, simd_and(xEF_scope, xBF), carry, group_carry, 11);\
     411            u8.FFFE_FFFF = simd_and(EF_BF_pending, simd_or(xBE, xBF));\
     412          }\
     413        }\
     414        u8mismatch = simd_xor(u8anyscope, u8.suffix);\
     415        u8.error = simd_or(u8.error, u8mismatch);\
     416  } while (0)
     417
     418 #define add_scope_streams(idx, lex, scope1, carry, group_carry)\
     419 do {\
     420                BitBlock v, w, v1, w1;\
     421\
     422        v = simd_or(lex.LAngle, lex.Hyphen);\
     423        w = simd_or(lex.Hyphen, lex.QMark);\
     424        v1 = advance(idx, v, carry, group_carry, 0);\
     425        w1 = advance(idx, w, carry, group_carry, 1);\
     426        scope1.LAngle = simd_andc(v1, w1);\
     427        scope1.Hyphen = simd_and(v1, w1);\
     428        scope1.QMark = simd_andc(w1, v1);\
     429  } while (0)
     430
     431 #define parse_CtCDPI(idx, ctCDPI_Callouts, lex, scope1, carry, bubble, group_carry)\
     432 do {\
     433                BitBlock CtCDPI_starts, Ct_errors, PI_start, CtCD_start, CtCDPI_start;\
     434                BitBlock DoubleHyphen, PI_end, CtCDPI_Cursor, PI_Cursor, CD_Ct_Cursor;\
     435                BitBlock CD_Cursor, Ct_Cursor, PI_name_end;\
     436\
     437        ctCDPI_Callouts.CD_end = simd_const_1(0);\
     438        ctCDPI_Callouts.Ct_starts = simd_const_1(0);\
     439        ctCDPI_Callouts.Ct_ends = simd_const_1(0);\
     440        ctCDPI_Callouts.CD_starts = simd_const_1(0);\
     441        ctCDPI_Callouts.CD_ends = simd_const_1(0);\
     442        ctCDPI_Callouts.PI_starts = simd_const_1(0);\
     443        ctCDPI_Callouts.PI_name_starts = simd_const_1(0);\
     444        ctCDPI_Callouts.PI_name_ends = simd_const_1(0);\
     445        ctCDPI_Callouts.PI_ends = simd_const_1(0);\
     446        ctCDPI_Callouts.CtCDPI_mask = simd_const_1(0);\
     447        ctCDPI_Callouts.error = simd_const_1(0);\
     448        CtCDPI_starts = simd_const_1(0);\
     449        Ct_errors = simd_const_1(0);\
     450        if ((bitblock_has_bit(lex.RBracket) || CarryTest(group_carry, 0, 2))) {\
     451          scope1.RBracket = advance(idx, lex.RBracket, carry, group_carry, 0);\
     452          ctCDPI_Callouts.CD_end = simd_and(advance(idx, simd_and(scope1.RBracket, lex.RBracket), carry, group_carry, 1), lex.RAngle);\
     453        }\
     454        PI_start = simd_and(scope1.LAngle, lex.QMark);\
     455        CtCD_start = simd_and(scope1.LAngle, lex.Exclam);\
     456        CtCDPI_start = simd_or(PI_start, CtCD_start);\
     457        DoubleHyphen = simd_and(scope1.Hyphen, lex.Hyphen);\
     458        PI_end = simd_and(scope1.QMark, lex.RAngle);\
     459        CtCDPI_Cursor = scantofirst(idx, CtCDPI_start, carry, bubble, group_carry, 2);\
     460        if ((bitblock_has_bit(CtCDPI_Cursor) || CarryTest(group_carry, 3, 14))) {\
     461          CtCDPI_starts = simd_or(CtCDPI_starts, CtCDPI_Cursor);\
     462          PI_Cursor = simd_and(CtCDPI_Cursor, PI_start);\
     463          CD_Ct_Cursor = advance(idx, simd_andc(CtCDPI_Cursor, PI_Cursor), carry, group_carry, 3);\
     464          CD_Cursor = simd_and(CD_Ct_Cursor, lex.LBracket);\
     465          Ct_Cursor = simd_and(CD_Ct_Cursor, lex.Hyphen);\
     466          ctCDPI_Callouts.PI_starts = simd_or(ctCDPI_Callouts.PI_starts, PI_Cursor);\
     467          ctCDPI_Callouts.CD_starts = simd_or(ctCDPI_Callouts.CD_starts, CD_Cursor);\
     468          ctCDPI_Callouts.Ct_starts = simd_or(ctCDPI_Callouts.Ct_starts, Ct_Cursor);\
     469          Ct_Cursor = advance(idx, Ct_Cursor, carry, group_carry, 4);\
     470          Ct_errors = simd_or(Ct_errors, simd_andc(Ct_Cursor, lex.Hyphen));\
     471          Ct_Cursor = advance(idx, advance(idx, Ct_Cursor, carry, group_carry, 5), carry, group_carry, 6);\
     472          PI_Cursor = advance(idx, PI_Cursor, carry, group_carry, 7);\
     473          ctCDPI_Callouts.PI_name_starts = simd_or(ctCDPI_Callouts.PI_name_starts, PI_Cursor);\
     474          PI_name_end = scanthru(idx, PI_Cursor, lex.NameScan, carry, bubble, group_carry, 8);\
     475          ctCDPI_Callouts.PI_name_ends = simd_or(ctCDPI_Callouts.PI_name_ends, PI_name_end);\
     476          PI_Cursor = scanto(idx, PI_name_end, PI_end, carry, bubble, group_carry, 9);\
     477          CD_Cursor = scanto(idx, CD_Cursor, ctCDPI_Callouts.CD_end, carry, bubble, group_carry, 10);\
     478          Ct_Cursor = advance(idx, scanto(idx, Ct_Cursor, DoubleHyphen, carry, bubble, group_carry, 11), carry, group_carry, 12);\
     479          ctCDPI_Callouts.PI_ends = simd_or(ctCDPI_Callouts.PI_ends, PI_Cursor);\
     480          ctCDPI_Callouts.CD_ends = simd_or(ctCDPI_Callouts.CD_ends, CD_Cursor);\
     481          ctCDPI_Callouts.Ct_ends = simd_or(ctCDPI_Callouts.Ct_ends, Ct_Cursor);\
     482          CtCDPI_Cursor = simd_or(simd_or(PI_Cursor, CD_Cursor), Ct_Cursor);\
     483          CtCDPI_Cursor = scanto(idx, CtCDPI_Cursor, CtCDPI_start, carry, bubble, group_carry, 13);\
     484          ctCDPI_Callouts.CtCDPI_mask = sbb(idx, advance(idx, simd_or(simd_or(ctCDPI_Callouts.CD_ends, ctCDPI_Callouts.Ct_ends), ctCDPI_Callouts.PI_ends), carry, group_carry, 14), CtCDPI_starts, carry, bubble, group_carry, 15);\
     485          ctCDPI_Callouts.error = simd_or(Ct_errors, simd_andc(ctCDPI_Callouts.Ct_ends, lex.RAngle));\
     486          ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_andc(advance(idx, simd_andc(ctCDPI_Callouts.PI_name_ends, lex.WS), carry, group_carry, 16), PI_end));\
     487          ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_and(ctCDPI_Callouts.PI_name_starts, ctCDPI_Callouts.PI_name_ends));\
     488          ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_andc(ctCDPI_Callouts.CtCDPI_mask, EOF_mask));\
     489          while (bitblock_has_bit(CtCDPI_Cursor)) {\
     490            BitBlock subcarryQ[14];\
     491            CarryInit(subcarryQ, 14);\
     492            CtCDPI_starts = simd_or(CtCDPI_starts, CtCDPI_Cursor);\
     493            PI_Cursor = simd_and(CtCDPI_Cursor, PI_start);\
     494            CD_Ct_Cursor = advance(idx, simd_andc(CtCDPI_Cursor, PI_Cursor), carry, subcarryQ, 0);\
     495            CD_Cursor = simd_and(CD_Ct_Cursor, lex.LBracket);\
     496            Ct_Cursor = simd_and(CD_Ct_Cursor, lex.Hyphen);\
     497            ctCDPI_Callouts.PI_starts = simd_or(ctCDPI_Callouts.PI_starts, PI_Cursor);\
     498            ctCDPI_Callouts.CD_starts = simd_or(ctCDPI_Callouts.CD_starts, CD_Cursor);\
     499            ctCDPI_Callouts.Ct_starts = simd_or(ctCDPI_Callouts.Ct_starts, Ct_Cursor);\
     500            Ct_Cursor = advance(idx, Ct_Cursor, carry, subcarryQ, 1);\
     501            Ct_errors = simd_or(Ct_errors, simd_andc(Ct_Cursor, lex.Hyphen));\
     502            Ct_Cursor = advance(idx, advance(idx, Ct_Cursor, carry, subcarryQ, 2), carry, subcarryQ, 3);\
     503            PI_Cursor = advance(idx, PI_Cursor, carry, subcarryQ, 4);\
     504            ctCDPI_Callouts.PI_name_starts = simd_or(ctCDPI_Callouts.PI_name_starts, PI_Cursor);\
     505            PI_name_end = scanthru(idx, PI_Cursor, lex.NameScan, carry, bubble, subcarryQ, 5);\
     506            ctCDPI_Callouts.PI_name_ends = simd_or(ctCDPI_Callouts.PI_name_ends, PI_name_end);\
     507            PI_Cursor = scanto(idx, PI_name_end, PI_end, carry, bubble, subcarryQ, 6);\
     508            CD_Cursor = scanto(idx, CD_Cursor, ctCDPI_Callouts.CD_end, carry, bubble, subcarryQ, 7);\
     509            Ct_Cursor = advance(idx, scanto(idx, Ct_Cursor, DoubleHyphen, carry, bubble, subcarryQ, 8), carry, subcarryQ, 9);\
     510            ctCDPI_Callouts.PI_ends = simd_or(ctCDPI_Callouts.PI_ends, PI_Cursor);\
     511            ctCDPI_Callouts.CD_ends = simd_or(ctCDPI_Callouts.CD_ends, CD_Cursor);\
     512            ctCDPI_Callouts.Ct_ends = simd_or(ctCDPI_Callouts.Ct_ends, Ct_Cursor);\
     513            CtCDPI_Cursor = simd_or(simd_or(PI_Cursor, CD_Cursor), Ct_Cursor);\
     514            CtCDPI_Cursor = scanto(idx, CtCDPI_Cursor, CtCDPI_start, carry, bubble, subcarryQ, 10);\
     515            ctCDPI_Callouts.CtCDPI_mask = sbb(idx, advance(idx, simd_or(simd_or(ctCDPI_Callouts.CD_ends, ctCDPI_Callouts.Ct_ends), ctCDPI_Callouts.PI_ends), carry, subcarryQ, 11), CtCDPI_starts, carry, bubble, subcarryQ, 12);\
     516            ctCDPI_Callouts.error = simd_or(Ct_errors, simd_andc(ctCDPI_Callouts.Ct_ends, lex.RAngle));\
     517            ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_andc(advance(idx, simd_andc(ctCDPI_Callouts.PI_name_ends, lex.WS), carry, subcarryQ, 13), PI_end));\
     518            ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_and(ctCDPI_Callouts.PI_name_starts, ctCDPI_Callouts.PI_name_ends));\
     519            ctCDPI_Callouts.error = simd_or(ctCDPI_Callouts.error, simd_andc(ctCDPI_Callouts.CtCDPI_mask, EOF_mask));\
     520            CarryCombine(group_carry, subcarryQ, 3, 14);\
     521          }\
     522        }\
    294523  } while (0)
    295524
     
    302531
    303532   int i = get_local_id(0);
    304    BitBlock group_carryQ[MAX_CARRY];
    305533   struct Basis_bits basis_bits;
    306534   struct Lex lex;
     
    309537   struct CtCDPI_Callouts ctCDPI_Callouts;
    310538
    311    for(int i=0; i<MAX_CARRY; i++)
    312                 group_carryQ[i] = 0;
     539   BitBlock classify_bytes_Validate_utf8_carryQ[12];
     540   CarryInit(classify_bytes_Validate_utf8_carryQ, 12);
     541   BitBlock add_scope_streams_carry[2];
     542   CarryInit(add_scope_streams_carry, 2);
     543   BitBlock parse_CtCDPI_carryQ[17];
     544   CarryInit(parse_CtCDPI_carryQ, 17);
     545   
     546   BitBlock group_carryQ[MAX_CARRY];
     547   CarryInit(group_carryQ, 2);
    313548
    314549   for(int idx=i; idx<ELEMENTS;idx=idx+WORK_GROUP_SIZE){
     
    317552                basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    318553               
    319                 classify_bytes(i, basis_bits, lex, carry);
    320                 BitBlock LA_scope,StartTag_NameStart,StartTag_End;
    321                 advance(i,lex.LAngle, LA_scope, carry, group_carryQ[0]);
    322                 StartTag_NameStart = simd_andc(LA_scope,lex.Slash);
    323                 adc(i,StartTag_NameStart,lex.NameScan,StartTag_End,carry,bubble, group_carryQ[1]);
    324                 StartTag_End = simd_andc(StartTag_End, lex.NameScan);
    325                 err_pos[idx] = simd_andc(StartTag_End,lex.RAngle);
    326 
     554                classify_bytes_Validate_utf8(i, basis_bits, lex, u8, carry, classify_bytes_Validate_utf8_carryQ);
     555                add_scope_streams(i, lex, scope1, carry, add_scope_streams_carry);
     556                parse_CtCDPI(idx, ctCDPI_Callouts, lex, scope1, carry, bubble, parse_CtCDPI_carryQ);
     557                err_pos[idx] = ctCDPI_Callouts.error;
    327558        }
    328559
Note: See TracChangeset for help on using the changeset viewer.