source: proto/xmlschema/parabix2_validation_mulvec.py @ 2219

Last change on this file since 2219 was 2219, checked in by shiyangy, 7 years ago

project checkin

File size: 25.0 KB
Line 
1# -*- coding: utf-8 -*-
2#
3# parabix2_compilable.py
4#
5# Parallel XML Parsing with Bitstream Addition
6#
7# - Complete prototype for all bitstream computations in Parabix2
8# - Optimized for compilation
9# - Separate compilation
10
11# Robert D. Cameron
12# July 29, 2010
13#
14
15#import bitutil
16
17class u8 ():
18  unibyte = 0
19  prefix = 0
20  prefix2 = 0
21  prefix3 = 0
22  prefix4 = 0
23  suffix = 0
24  badprefix = 0
25  xE0 = 0
26  xED = 0
27  xF0 = 0
28  xF4 = 0
29  xA0_xBF = 0
30  x80_x9F = 0
31  x90_xBF = 0
32  x80_x8F = 0
33  xEF = 0
34  xBF = 0
35  xBE = 0
36  scope22 = 0
37  scope32 = 0
38  scope33 = 0
39  scope42 = 0
40  scope43 = 0
41  scope44 = 0
42  xE0_scope = 0
43  xED_scope = 0
44  xF0_scope = 0
45  xF4_scope = 0
46  xEF_scope = 0
47
48class Lex ():
49        CR = 0
50        LF = 0
51        HT = 0
52        SP = 0
53        CRLF = 0
54        RefStart = 0
55        Semicolon = 0
56        Colon = 0
57        LAngle = 0
58        RAngle = 0
59        LBracket = 0
60        RBracket = 0
61        Exclam = 0
62        QMark = 0
63        Hyphen = 0
64        Equals = 0
65        SQuote = 0
66        DQuote = 0
67        Slash = 0
68        Hash = 0
69        x = 0
70        ASCII_name_start = 0
71        ASCII_name_char = 0
72        NameScan = 0
73        Digit = 0
74        Hex = 0
75        WS = 0
76
77class Scope1 ():
78        RefStart = 0
79        LAngle = 0
80        Hyphen = 0
81        QMark = 0
82        RBracket = 0
83
84class CtCDPI_Callouts():
85        CD_end = 0
86        Ct_starts = 0
87        Ct_ends = 0
88        CD_starts = 0
89        CD_ends = 0
90        PI_starts = 0
91        PI_name_starts = 0
92        PI_name_ends = 0
93        PI_ends = 0
94        CtCDPI_mask = 0
95
96class Ref_Callouts():
97        GenRef_starts = 0
98        GenRef_ends = 0
99        DecRef_starts = 0
100        DecRef_ends = 0
101        HexRef_starts = 0
102        HexRef_ends = 0
103
104class Hash_data():
105        Hash_value = 0
106
107class Tag_Callouts():
108        ElemName_starts = 0
109        ElemName_ends = 0
110        ElemName_ends_1 = 0
111        ElemName_ends_2 = 0
112        ElemName_ends_3 = 0
113        ElemName_ends_4 = 0
114        ElemName_ends_5 = 0
115        ElemName_ends_6 = 0
116        ElemName_ends_7 = 0
117        ElemName_ends_8 = 0
118        ElemName_ends_9 = 0
119        ElemName_ends_10 = 0
120        ElemName_ends_11 = 0
121        ElemName_ends_12 = 0
122        ElemName_ends_13 = 0
123        ElemName_ends_14 = 0
124        ElemName_ends_15 = 0
125        ElemName_ends_16 = 0
126        ElemName_ends_17_and_longer = 0
127        AttName_starts = 0
128        AttName_ends = 0
129        AttVal_starts = 0
130        AttVal_ends = 0
131        AttVal_spans = 0
132        EmptyTag_marks = 0
133        EndTag_marks = 0
134       
135        ElemName_starts_no_ns = 0
136       
137        Tag_closing = 0
138       
139        ElemContent_starts = 0
140        ElemContent_ends = 0
141       
142        stends = 0
143
144class Basis_bits():
145        bit_0 = 0
146        bit_1 = 0
147        bit_2 = 0
148        bit_3 = 0
149        bit_4 = 0
150        bit_5 = 0
151        bit_6 = 0
152        bit_7 = 0
153
154class Check_streams():
155        misc_mask = 0
156        non_ascii_name_starts = 0
157        non_ascii_names = 0
158        tag_marks = 0
159        name_follows = 0
160        att_refs = 0
161
162class Xml_names():
163        namespace_error = 0
164       
165class Citygml():
166        null = 0
167        appearanceMember = 0
168        Appearance = 0
169        surfaceDataMember = 0
170        ParameterizedTexture = 0
171        imageURI = 0
172        textureType = 0
173        wrapMode = 0
174        borderColor = 0
175        target = 0
176        TexCoordList = 0
177        textureCoordinates = 0
178       
179def Validation_Vec_2(citygml,basis_bits):
180        temp1 = (basis_bits.bit_0 | basis_bits.bit_1)
181        temp2 = (basis_bits.bit_2 & basis_bits.bit_3)
182        temp3 = (temp2 &~ temp1)
183        temp4 = (basis_bits.bit_4 | basis_bits.bit_5)
184        temp5 = (basis_bits.bit_6 | basis_bits.bit_7)
185        temp6 = (temp4 | temp5)
186        temp7 = (temp3 &~ temp6)
187        temp8 = (basis_bits.bit_1 &~ basis_bits.bit_0)
188        temp9 = (temp8 & temp2)
189        temp10 = (basis_bits.bit_4 &~ basis_bits.bit_5)
190        temp11 = (temp10 &~ temp5)
191        temp12 = (temp9 & temp11)
192        temp13 = (temp7 | temp12)
193        temp14 = (temp13 | temp7)
194        citygml.null = (temp14 | temp7)
195        temp15 = (basis_bits.bit_6 & basis_bits.bit_7)
196        temp16 = (temp15 &~ temp4)
197        temp17 = (temp3 & temp16)
198        citygml.Appearance = (temp14 | temp17)
199        apperance_scope = pablo.Advance(citygml.Appearance)
200        vec_2_error1 = apperance_scope &~ citygml.null
201        if vec_2_error1:
202                error_tracker.NoteError("Vec_2_error found", (vec_2_error1))
203
204
205def Classify_bytes_Validate_utf8(basis_bits, lex, u8):
206        temp1 = (basis_bits.bit_0 | basis_bits.bit_1);
207        temp2 = (basis_bits.bit_2 &~ basis_bits.bit_3);
208        temp3 = (temp2 &~ temp1);
209        temp4 = (basis_bits.bit_5 &~ basis_bits.bit_4);
210        temp5 = (basis_bits.bit_6 &~ basis_bits.bit_7);
211        temp6 = (temp4 & temp5);
212        lex.RefStart = (temp3 & temp6);
213        temp7 = (basis_bits.bit_2 & basis_bits.bit_3);
214        temp8 = (temp7 &~ temp1);
215        temp9 = (basis_bits.bit_4 &~ basis_bits.bit_5);
216        temp10 = (basis_bits.bit_6 & basis_bits.bit_7);
217        temp11 = (temp9 & temp10);
218        lex.Semicolon = (temp8 & temp11);
219        temp12 = (basis_bits.bit_4 & basis_bits.bit_5);
220        temp13 = (basis_bits.bit_6 | basis_bits.bit_7);
221        temp14 = (temp12 &~ temp13);
222        lex.LAngle = (temp8 & temp14);
223        temp15 = (temp12 & temp5);
224        lex.RAngle = (temp8 & temp15);
225        temp16 = (basis_bits.bit_1 &~ basis_bits.bit_0);
226        temp17 = (basis_bits.bit_3 &~ basis_bits.bit_2);
227        temp18 = (temp16 & temp17);
228        lex.LBracket = (temp18 & temp11);
229        temp19 = (basis_bits.bit_7 &~ basis_bits.bit_6);
230        temp20 = (temp12 & temp19);
231        lex.RBracket = (temp18 & temp20);
232        temp21 = (basis_bits.bit_4 | basis_bits.bit_5);
233        temp22 = (temp19 &~ temp21);
234        lex.Exclam = (temp3 & temp22);
235        temp23 = (temp12 & temp10);
236        lex.QMark = (temp8 & temp23);
237        lex.Hyphen = (temp3 & temp20);
238        lex.Equals = (temp8 & temp20);
239        temp24 = (temp4 & temp10);
240        lex.SQuote = (temp3 & temp24);
241        temp25 = (temp5 &~ temp21);
242        lex.DQuote = (temp3 & temp25);
243        lex.Slash = (temp3 & temp23);
244        temp26 = (temp10 &~ temp21);
245        lex.Hash = (temp3 & temp26);
246        temp27 = (temp16 & temp7);
247        temp28 = (temp9 &~ temp13);
248        lex.x = (temp27 & temp28);
249        temp29 = (temp9 & temp5);
250        lex.Colon = (temp8 & temp29);
251        temp30 = (temp18 & temp23);
252        temp31 = (temp30 | lex.Colon);
253        temp32 = (temp16 &~ basis_bits.bit_2);
254        temp33 = (basis_bits.bit_5 | temp10);
255        temp34 = (basis_bits.bit_4 & temp33);
256        temp35 = (~temp34);
257        temp36 = (temp21 | temp13);
258        temp37 = ((basis_bits.bit_3 & temp35)|(~(basis_bits.bit_3) & temp36));
259        temp38 = (temp32 & temp37);
260        temp39 = (temp31 | temp38);
261        temp40 = (temp16 & basis_bits.bit_2);
262        temp41 = (temp40 & temp37);
263        lex.ASCII_name_start = (temp39 | temp41);
264        temp42 = (temp30 | lex.Hyphen);
265        temp43 = (temp3 & temp15);
266        temp44 = (temp42 | temp43);
267        temp45 = (temp8 &~ temp34);
268        temp46 = (temp44 | temp45);
269        temp47 = (temp46 | temp38);
270        lex.ASCII_name_char = (temp47 | temp41);
271        lex.NameScan = (lex.ASCII_name_char | basis_bits.bit_0);
272        temp48 = (temp1 | basis_bits.bit_2);
273        x00_x1F = (~temp48);
274        temp49 = (basis_bits.bit_2 | basis_bits.bit_3);
275        temp50 = (temp1 | temp49);
276        lex.CR = (temp20 &~ temp50);
277        lex.LF = (temp29 &~ temp50);
278        temp51 = (temp9 & temp19);
279        lex.HT = (temp51 &~ temp50);
280        lex.SP = (temp3 &~ temp36);
281        temp52 = (temp20 | temp29);
282        temp53 = (temp52 | temp51);
283        temp54 = (temp53 &~ temp50);
284        lex.WS = (temp54 | lex.SP);
285        temp55 = (basis_bits.bit_5 | basis_bits.bit_6);
286        temp56 = (basis_bits.bit_4 & temp55);
287        lex.Digit = (temp8 &~ temp56);
288        temp57 = (temp16 &~ temp49);
289        temp58 = (temp57 &~ basis_bits.bit_4);
290        temp59 = (~temp10);
291        temp60 = ((basis_bits.bit_5 & temp59)|(~(basis_bits.bit_5) & temp13));
292        temp61 = (temp58 & temp60);
293        temp62 = (lex.Digit | temp61);
294        temp63 = (temp16 & temp2);
295        temp64 = (temp63 &~ basis_bits.bit_4);
296        temp65 = (temp64 & temp60);
297        lex.Hex = (temp62 | temp65);
298        lex_error = x00_x1F &~ lex.WS
299        if lex_error & EOF_mask:
300                error_tracker.NoteError("Error: illegal character", lex_error)
301
302
303        ### Validate_utf8(basis_bits, u8):
304        u8.unibyte = (~basis_bits.bit_0);
305        u8.suffix = 0
306        u8_error = 0
307        u8_FFFE_FFFF = 0
308        u8anyscope = 0 #local
309        if basis_bits.bit_0:
310                u8.prefix = (basis_bits.bit_0 & basis_bits.bit_1);
311                u8.prefix2 = (u8.prefix &~ basis_bits.bit_2);
312                u8.prefix3 = (u8.prefix & temp2);
313                u8.prefix4 = (u8.prefix & temp7);
314                u8.suffix = (basis_bits.bit_0 &~ basis_bits.bit_1);
315                temp66 = (u8.prefix &~ temp49);
316                temp67 = (temp21 | basis_bits.bit_6);
317                temp68 = (temp66 &~ temp67);
318                temp69 = (basis_bits.bit_5 & temp13);
319                temp70 = (basis_bits.bit_4 | temp69);
320                temp71 = (u8.prefix4 & temp70);
321                u8.badprefix = (temp68 | temp71);
322                u8_error = u8.badprefix
323                u8.scope22 = pablo.Advance(u8.prefix2)
324                u8anyscope = u8.scope22
325                if u8.prefix3 | u8.prefix4:
326                        xE0 = (u8.prefix3 &~ temp36);
327                        xED = (u8.prefix3 & temp20);
328                        xF0 = (u8.prefix4 &~ temp36);
329                        temp72 = (temp4 &~ temp13);
330                        xF4 = (u8.prefix4 & temp72);
331                        u8.xA0_xBF = (u8.suffix & basis_bits.bit_2);
332                        u8.x80_x9F = (u8.suffix &~ basis_bits.bit_2);
333                        u8.x90_xBF = (u8.suffix & temp49);
334                        u8.x80_x8F = (u8.suffix &~ temp49);
335                        xEF = (u8.prefix3 & temp23);
336                        temp73 = (u8.suffix & temp7);
337                        u8.xBF = (temp73 & temp23);
338                        u8.xBE = (temp73 & temp15);
339                        u8.xE0_scope = pablo.Advance(xE0);
340                        u8.xED_scope = pablo.Advance(xED);
341                        u8.xF0_scope = pablo.Advance(xF0);
342                        u8.xF4_scope = pablo.Advance(xF4);
343                        u8.xEF_scope = pablo.Advance(xEF);
344                        u8.scope32 = pablo.Advance(u8.prefix3)
345                        u8.scope33 = pablo.Advance(u8.scope32)
346                        u8.scope42 = pablo.Advance(u8.prefix4)
347                        u8.scope43 = pablo.Advance(u8.scope42)
348                        u8.scope44 = pablo.Advance(u8.scope43)
349
350                        u8lastscope = u8.scope22 | u8.scope33 | u8.scope44
351                        u8anyscope = u8lastscope | u8.scope32 | u8.scope42 | u8.scope43
352
353                        u8error1 = u8.xE0_scope & u8.x80_x9F
354                        u8error2 = u8.xED_scope & u8.xA0_xBF
355                        u8error3 = u8.xF0_scope & u8.x80_x8F
356                        u8error4 = u8.xF4_scope & u8.x90_xBF
357
358                        u8_error |= u8error1 | u8error2 | u8error3 | u8error4
359
360                        EF_BF_pending = pablo.Advance(u8.xEF_scope & u8.xBF)
361
362                        u8_FFFE_FFFF = (EF_BF_pending & (u8.xBE | u8.xBF))
363                u8mismatch = u8anyscope ^ u8.suffix
364                u8_error |= u8mismatch | u8_FFFE_FFFF
365                if u8_error:
366                        error_tracker.NoteError("UTF-8 error found", (u8_error))
367
368
369def Add_scope_streams(lex, scope1):
370        #scope1.LAngle = pablo.Advance(lex.LAngle)
371        #scope1.Hyphen = pablo.Advance(lex.Hyphen)
372        #scope1.QMark = pablo.Advance(lex.QMark)
373        v = lex.LAngle | lex.Hyphen
374        w = lex.Hyphen | lex.QMark
375        v1 = pablo.Advance(v)
376        w1 = pablo.Advance(w)
377        scope1.LAngle = v1 &~ w1
378        scope1.Hyphen = v1 & w1
379        scope1.QMark = w1 &~ v1
380        scope1.RefStart = 0 # default
381
382def Parse_CtCDPI(ctCDPI_Callouts, lex, scope1, check_streams):
383        ctCDPI_Callouts.CD_end = 0
384        ctCDPI_Callouts.Ct_starts = 0
385        ctCDPI_Callouts.Ct_ends = 0
386        ctCDPI_Callouts.CD_starts = 0
387        ctCDPI_Callouts.CD_ends = 0
388        ctCDPI_Callouts.PI_starts = 0
389        ctCDPI_Callouts.PI_name_starts = 0
390        ctCDPI_Callouts.PI_name_ends = 0
391        ctCDPI_Callouts.PI_ends = 0
392        ctCDPI_Callouts.CtCDPI_mask = 0
393        ctCDPI_error = 0
394        CtCDPI_starts = 0
395        Ct_errors = 0
396
397        if lex.RBracket:
398                scope1.RBracket = pablo.Advance(lex.RBracket)
399                ctCDPI_Callouts.CD_end = pablo.Advance(scope1.RBracket & lex.RBracket) & lex.RAngle
400        PI_start = scope1.LAngle & lex.QMark
401        CtCD_start = scope1.LAngle & lex.Exclam
402        CtCDPI_start = PI_start | CtCD_start
403
404        DoubleHyphen = scope1.Hyphen & lex.Hyphen
405        PI_end = scope1.QMark & lex.RAngle
406
407
408        #
409        # Initiate the scan
410        CtCDPI_Cursor = pablo.ScanToFirst(CtCDPI_start)
411        while CtCDPI_Cursor:
412                CtCDPI_starts |= CtCDPI_Cursor
413                PI_Cursor = CtCDPI_Cursor & PI_start
414                CD_Ct_Cursor = pablo.Advance(CtCDPI_Cursor & ~PI_Cursor)
415                CD_Cursor = CD_Ct_Cursor & lex.LBracket
416                Ct_Cursor = CD_Ct_Cursor & lex.Hyphen
417                ctCDPI_Callouts.PI_starts |= PI_Cursor
418                ctCDPI_Callouts.CD_starts |= CD_Cursor
419                ctCDPI_Callouts.Ct_starts |= Ct_Cursor
420                Ct_Cursor = pablo.Advance(Ct_Cursor)
421                Ct_errors |= Ct_Cursor & ~ lex.Hyphen
422                # Advance twice past <!--, so that we don't treat <!---
423                # as being a terminated comment.
424                Ct_Cursor = pablo.Advance(pablo.Advance(Ct_Cursor))
425                PI_Cursor = pablo.Advance(PI_Cursor)
426                ctCDPI_Callouts.PI_name_starts |= PI_Cursor
427                PI_name_end = pablo.ScanThru(PI_Cursor, lex.NameScan)
428                ctCDPI_Callouts.PI_name_ends |= PI_name_end
429                PI_Cursor = pablo.ScanTo(PI_name_end, PI_end)
430                CD_Cursor = pablo.ScanTo(CD_Cursor, ctCDPI_Callouts.CD_end)
431                Ct_Cursor = pablo.Advance(pablo.ScanTo(Ct_Cursor, DoubleHyphen))
432                ctCDPI_Callouts.PI_ends |= PI_Cursor
433                ctCDPI_Callouts.CD_ends |= CD_Cursor
434                ctCDPI_Callouts.Ct_ends |= Ct_Cursor
435                CtCDPI_Cursor = PI_Cursor | CD_Cursor | Ct_Cursor
436                CtCDPI_Cursor = pablo.ScanTo(CtCDPI_Cursor, CtCDPI_start)
437
438                ctCDPI_Callouts.CtCDPI_mask = pablo.Advance(ctCDPI_Callouts.CD_ends | ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - CtCDPI_starts
439                #ctCDPI_Callouts.error = Ct_ends & ~lex.RAngle | Ct_starts & ~ lex.Hyphen
440                ctCDPI_error = Ct_errors | ctCDPI_Callouts.Ct_ends & ~lex.RAngle
441                ctCDPI_error |= pablo.Advance(ctCDPI_Callouts.PI_name_ends & ~ lex.WS) & ~ PI_end
442                ctCDPI_error |= ctCDPI_Callouts.PI_name_starts & ctCDPI_Callouts.PI_name_ends
443                # If any of the Comment, CDATA or PI markups are unterminated, it is an error.
444                ctCDPI_error |= ctCDPI_Callouts.CtCDPI_mask &~ EOF_mask
445
446        if ctCDPI_error:
447                error_tracker.NoteError("Error in comment, CDATA or processing instruction syntax", ctCDPI_error)
448
449        check_streams.misc_mask = (lex.WS | lex.LAngle | (pablo.Advance(ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - (ctCDPI_Callouts.Ct_starts | ctCDPI_Callouts.PI_starts)) | CtCDPI_starts) & EOF_mask
450
451def Parse_tags(lex, scope1, ctCDPI_Callouts, tag_Callouts):
452
453
454        # Delimiters for scans.
455        DQuoteDelim = lex.DQuote | lex.LAngle
456        SQuoteDelim = lex.SQuote | lex.LAngle
457        AttListDelim = lex.Slash | lex.RAngle
458
459        # Start the parallel parsing by inspecting the character
460        # after the opening "<" of a tag.
461        LAngleFollow = scope1.LAngle &~ ctCDPI_Callouts.CtCDPI_mask
462        tag_Callouts.ElemName_starts = LAngleFollow & ~lex.Slash
463        tag_Callouts.EndTag_marks = LAngleFollow & lex.Slash
464       
465        ColonInElemName = lex.Colon & lex.NameScan
466        tag_Callouts.ElemName_starts_no_ns = pablo.Advance(ColonInElemName) | tag_Callouts.ElemName_starts
467
468        # Start Tag/Empty Element Tag Parsing
469
470        # Advance all cursors by scanning through the tag name.
471        tag_Callouts.ElemName_ends = pablo.ScanThru(tag_Callouts.ElemName_starts, lex.NameScan)
472        # Must have at least one name character for a legal start tag.
473        # Mark any occurrences of null names as errors.
474        ParseError = tag_Callouts.ElemName_starts & tag_Callouts.ElemName_ends
475
476        # Initialize the accumulators for attribute name and value positions.
477        tag_Callouts.AttName_starts = 0
478        tag_Callouts.AttName_ends = 0
479        EqToCheck = 0
480        tag_Callouts.AttVal_starts = 0
481        AttValEnds = 0
482        tag_Callouts.AttVal_ends = 0
483
484        # After the element name, there may or may not be an attlist.
485        AfterWS = pablo.ScanThru(tag_Callouts.ElemName_ends, lex.WS)
486        AttListEnd = AfterWS & AttListDelim
487        AttNameStart = AfterWS & ~AttListDelim
488        # At least one WS character is required between ElemNames and AttNames.
489        ParseError |= tag_Callouts.ElemName_ends & AttNameStart
490
491        #
492        # The following loop iterates through attributes within a start tag.
493        # Because all start tags are processed in parallel, the number of
494        # iterations is the maximum number of attributes found in any one
495        # start tag, plus one.
496        while AttNameStart:
497                tag_Callouts.AttName_starts |= AttNameStart
498                AttNameFollow = pablo.ScanThru(AttNameStart, lex.NameScan)
499                tag_Callouts.AttName_ends |= AttNameFollow
500                # Scan through WS to the expected '=' delimiter.
501                # EqExpected = pablo.ScanThru(AttNameFollow, lex.WS)
502                # But use if test to optimize.
503                if AttNameFollow & lex.WS:
504                        EqExpected = pablo.ScanThru(AttNameFollow, lex.WS)
505                else: EqExpected = AttNameFollow
506                EqToCheck |= EqExpected
507                AttValPos = pablo.ScanThru(EqExpected, EqExpected | lex.WS)
508                tag_Callouts.AttVal_starts |= AttValPos
509                DQuoteAttVal = AttValPos & lex.DQuote
510                SQuoteAttVal = AttValPos & lex.SQuote
511                DQuoteAttEnd = pablo.ScanTo(DQuoteAttVal, DQuoteDelim &~ DQuoteAttVal)
512                SQuoteAttEnd = pablo.ScanTo(SQuoteAttVal, SQuoteDelim &~ SQuoteAttVal)
513                AttValEnd = DQuoteAttEnd | SQuoteAttEnd
514                AttValEnds |= AttValEnd
515                AttValFollow = pablo.Advance(AttValEnd)
516                tag_Callouts.AttVal_ends |= AttValFollow
517                #  AfterWS = pablo.ScanThru(AttValFollow, lex.WS)
518                if AttValFollow & lex.WS:
519                        AfterWS = pablo.ScanThru(AttValFollow, lex.WS)
520                else: AfterWS = AttValFollow
521                AfterWS = pablo.ScanThru(AttValFollow, lex.WS)
522                AttListEnd |= AfterWS & AttListDelim
523                AttNameStart = AfterWS & ~AttListDelim
524
525        # No more attribute values to process when AttNameStart == 0.
526        STagEnds = AttListEnd & lex.RAngle
527        # Mark any "/" characters found as the ends of empty element tags.
528        tag_Callouts.EmptyTag_marks = pablo.Advance(AttListEnd & lex.Slash)
529       
530        tag_Callouts.Tag_closing = tag_Callouts.EmptyTag_marks | tag_Callouts.EndTag_marks
531
532
533        # Check for errors.
534        ParseError |= tag_Callouts.AttVal_ends & tag_Callouts.AttName_starts # No intervening WS.
535        ParseError |= tag_Callouts.AttName_starts & tag_Callouts.AttName_ends # Null AttName
536        ParseError |= EqToCheck & ~lex.Equals # = not found where expected.
537        ParseError |= tag_Callouts.AttVal_starts & ~ (lex.DQuote | lex.SQuote)
538        ParseError |= AttValEnds & ~ (lex.DQuote | lex.SQuote)
539        ParseError |= tag_Callouts.EmptyTag_marks & ~lex.RAngle
540       
541        # Element content
542        advanced = pablo.Advance(STagEnds)
543        tag_Callouts.stends = STagEnds
544        none_emptyelem_con_start = pablo.ScanThru(STagEnds,(lex.WS))
545        tag_Callouts.ElemContent_starts = pablo.Advance(none_emptyelem_con_start) &~ lex.LAngle &~ lex.CR &~ lex.LF
546       
547        #tag_Callouts.ElemContent_starts = pablo.ScanTo(none_emptyelem_con_start, lex.NameScan)
548        scanfrom = tag_Callouts.ElemContent_starts
549        scan_to = lex.LAngle
550        tag_Callouts.ElemContent_ends = pablo.ScanTo(scanfrom, scan_to)
551
552        # End Tag Parsing
553
554        EndTagEnds = pablo.ScanThru(tag_Callouts.EndTag_marks, tag_Callouts.EndTag_marks | lex.NameScan)
555        if EndTagEnds & lex.WS:
556                EndTagEnds = pablo.ScanThru(EndTagEnds, lex.WS)
557        ParseError |= EndTagEnds & ~lex.RAngle
558        if ParseError:
559                error_tracker.NoteError("Tag parsing error found", (ParseError))
560
561
562        # Attribute value spans
563        tag_Callouts.AttVal_spans = tag_Callouts.AttVal_ends - tag_Callouts.AttVal_starts
564
565def Parse_refs(lex, scope1, ctCDPI_Callouts, ref_Callouts):
566        ref_Callouts.GenRef_starts = 0
567        ref_Callouts.GenRef_ends = 0
568        ref_Callouts.DecRef_starts = 0
569        ref_Callouts.DecRef_ends = 0
570        ref_Callouts.HexRef_starts = 0
571        ref_Callouts.HexRef_ends = 0
572        ref_error = 0
573
574        Ref1 = lex.RefStart &~ ctCDPI_Callouts.CtCDPI_mask
575        # All remaining "&" must be reference start characters; parse them.
576        if Ref1:
577                scope1.RefStart = pablo.Advance(Ref1)
578                NumRef2 = scope1.RefStart & lex.Hash
579                ref_Callouts.GenRef_starts = scope1.RefStart &~ lex.Hash
580                NumRef3 = pablo.Advance(NumRef2)
581                HexRef3 = NumRef3 & lex.x
582                ref_Callouts.DecRef_starts = NumRef3 &~ lex.x
583                ref_Callouts.HexRef_starts = pablo.Advance(HexRef3)
584                ref_Callouts.GenRef_ends = pablo.ScanThru(ref_Callouts.GenRef_starts, lex.NameScan)
585                ref_Callouts.DecRef_ends = pablo.ScanThru(ref_Callouts.DecRef_starts, lex.Digit)
586                ref_Callouts.HexRef_ends = pablo.ScanThru(ref_Callouts.HexRef_starts, lex.Hex)
587                # Error checks
588                # At least one digit required for DecRef, one hex digit for HexRef.
589                ref_error1 = ref_Callouts.DecRef_starts &~ lex.Digit
590                ref_error2 = ref_Callouts.HexRef_starts &~ lex.Hex
591                # Semicolon terminator required (also covers unterminated at EOF).
592                ref_ends = ref_Callouts.GenRef_ends | ref_Callouts.DecRef_ends | ref_Callouts.HexRef_ends
593                ref_error3 = ref_ends &~ lex.Semicolon
594                ref_error = ref_error1 | ref_error2 | ref_error3
595                if ref_error:
596                        error_tracker.NoteError("Reference error found", (ref_error))
597
598
599
600def Validate_xml_names(ctCDPI_Callouts, ref_Callouts, tag_Callouts, lex, u8, xml_names, check_streams):
601        PI_names = ctCDPI_Callouts.PI_name_ends - ctCDPI_Callouts.PI_name_starts
602        GenRefs = ref_Callouts.GenRef_ends - ref_Callouts.GenRef_starts
603        ElemNames = tag_Callouts.ElemName_ends - tag_Callouts.ElemName_starts
604        AttNames = tag_Callouts.AttName_ends - tag_Callouts.AttName_starts
605        qname_stream =  ElemNames | AttNames
606        ncname_stream = PI_names | GenRefs
607        name_stream = qname_stream | ncname_stream
608        name_start = name_stream &~ pablo.Advance(name_stream)
609        name_cursor = name_stream & ~pablo.Advance(name_stream)
610        void_prefix_err = name_cursor & lex.Colon
611        namespace_sep = pablo.ScanThru(name_cursor, lex.NameScan &~ lex.Colon) & lex.Colon
612        local_part_start = pablo.Advance(namespace_sep)
613        local_part_err = local_part_start &~ lex.NameScan
614        colon2_err = pablo.ScanThru(local_part_start, lex.NameScan &~ lex.Colon) & lex.Colon
615        ncname_err = ncname_stream & lex.Colon
616        namespace_error = void_prefix_err | local_part_err | colon2_err | ncname_err
617        if namespace_error:
618                error_tracker.NoteError("error found", namespace_error)
619
620
621        check_streams.non_ascii_name_starts = name_start &~lex.ASCII_name_start
622        check_streams.non_ascii_names = (name_stream &~ name_start) & ~lex.ASCII_name_char & ~u8.suffix
623
624def Do_check_streams(ctCDPI_Callouts, tag_Callouts, lex, u8, scope1, ref_Callouts, xml_names, check_streams):
625    # Ensure that no occurrence of ]]> occurs outside of markup.
626    CD_end_error = ctCDPI_Callouts.CD_end & ~(ctCDPI_Callouts.CtCDPI_mask | tag_Callouts.AttVal_spans)
627
628    # Consolidate and check for errors
629    if CD_end_error:
630                error_tracker.NoteError("Error: ]]> in text", CD_end_error)
631
632
633    check_streams.tag_marks = tag_Callouts.EmptyTag_marks | tag_Callouts.ElemName_starts | tag_Callouts.EndTag_marks | tag_Callouts.AttName_starts
634    check_streams.name_follows = tag_Callouts.ElemName_ends | tag_Callouts.AttName_ends
635    check_streams.att_refs = tag_Callouts.AttVal_spans & scope1.RefStart
636
637
638def Form_Length_Group_Bitstreams(tag_Callouts):
639
640    remaining_starts = tag_Callouts.ElemName_starts
641    remaining_ends = tag_Callouts.ElemName_ends
642    temp = tag_Callouts.ElemName_starts
643
644    # Group symbols of length 1
645    temp = pablo.Advance(temp)
646    tag_Callouts.ElemName_ends_1 = temp & remaining_ends
647    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_1
648
649    # Group symbols of length 2
650    temp = pablo.Advance(temp)
651    tag_Callouts.ElemName_ends_2 = temp & remaining_ends
652    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_2
653
654    # Group symbols of length 3
655    temp = pablo.Advance(temp)
656    tag_Callouts.ElemName_ends_3 = temp & remaining_ends
657    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_3
658
659    # Group symbols of length 4
660    temp = pablo.Advance(temp)
661    tag_Callouts.ElemName_ends_4 = temp & remaining_ends
662    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_4
663
664    # Group symbols of length 5
665    temp = pablo.Advance(temp)
666    tag_Callouts.ElemName_ends_5 = temp & remaining_ends
667    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_5
668
669    # Group symbols of length 6
670    temp = pablo.Advance(temp)
671    tag_Callouts.ElemName_ends_6 = temp & remaining_ends
672    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_6
673
674    # Group symbols of length 7
675    temp = pablo.Advance(temp)
676    tag_Callouts.ElemName_ends_7 = temp & remaining_ends
677    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_7
678
679    # Group symbols of length 8
680    temp = pablo.Advance(temp)
681    tag_Callouts.ElemName_ends_8 = temp & remaining_ends
682    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_8
683
684    # Group symbols of length 9
685    temp = pablo.Advance(temp)
686    tag_Callouts.ElemName_ends_9 = temp & remaining_ends
687    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_9
688
689    # Group symbols of length 10
690    temp = pablo.Advance(temp)
691    tag_Callouts.ElemName_ends_10 = temp & remaining_ends
692    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_10
693
694    # Group symbols of length 11
695    temp = pablo.Advance(temp)
696    tag_Callouts.ElemName_ends_11 = temp & remaining_ends
697    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_11
698
699    # Group symbols of length 12
700    temp = pablo.Advance(temp)
701    tag_Callouts.ElemName_ends_12 = temp & remaining_ends
702    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_12
703
704    # Group symbols of length 13
705    temp = pablo.Advance(temp)
706    tag_Callouts.ElemName_ends_13 = temp & remaining_ends
707    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_13
708
709    # Group symbols of length 14
710    temp = pablo.Advance(temp)
711    tag_Callouts.ElemName_ends_14 = temp & remaining_ends
712    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_14
713
714    # Group symbols of length 15
715    temp = pablo.Advance(temp)
716    tag_Callouts.ElemName_ends_15 = temp & remaining_ends
717    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_15
718
719    # Group symbols of length 16
720    temp = pablo.Advance(temp)
721    tag_Callouts.ElemName_ends_16 = temp & remaining_ends
722    remaining_ends = remaining_ends & ~tag_Callouts.ElemName_ends_16
723
724    # Group symbols of length 17 and longer
725    tag_Callouts.ElemName_ends_17_and_longer = remaining_ends
726
727
728def Compute_Hash_Value_Bitstream(hash_data, basis_bits):
729    hash_data.Hash_value = basis_bits.bit_2 ^ basis_bits.bit_4 ^ basis_bits.bit_6
730    #hash_data.Hash_value = basis_bits.bit_3 ^ basis_bits.bit_5 ^ basis_bits.bit_7
731   
732
733
734def Main(basis_bits, lex, u8, xml_char, scope1, ctCDPI_Callouts, ref_Callouts, tag_Callouts, masks, xml_names, check_streams, hash_data):
735       
736        # Classify bytes for UTF-8 processing, whitespace and control
737        # processing and XML lexical analysis.
738        # Classify_bytes(basis_bits, lex)
739
740        # Validate UTF-8 multibyte sequences and determine the UTF-8 scope streams
741        # Validate_utf8(basis_bits, u8)
742                               
743        Classify_bytes_Validate_utf8(basis_bits, lex, u8)
744
745        Add_scope_streams(lex, scope1)
746   
747        # Parse all comments, CDATA sections and processing instructions.
748        Parse_CtCDPI(ctCDPI_Callouts, lex, scope1, check_streams)
749               
750        # All remaining '<' must be tag start characters; parse tags.
751        Parse_tags(lex, scope1, ctCDPI_Callouts, tag_Callouts) 
752
753        # All remaining '&' must be reference start characters; parse them.
754        Parse_refs(lex, scope1, ctCDPI_Callouts, ref_Callouts)
755       
756        # Validate XML namespaces and generate bit streams to post validate non-ascii range XML names
757        Validate_xml_names(ctCDPI_Callouts, ref_Callouts, tag_Callouts, lex, u8, xml_names, check_streams)
758   
759   
760        Do_check_streams(ctCDPI_Callouts, tag_Callouts, lex, u8, scope1, ref_Callouts, xml_names, check_streams)
761
762        # These methods are needed to do Paralel Bitstream Based Length Sorting
763        Form_Length_Group_Bitstreams(tag_Callouts)
764
765        Compute_Hash_Value_Bitstream(hash_data, basis_bits);
766       
767               
Note: See TracBrowser for help on using the repository browser.